1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <linux/module.h>
29 #include <linux/pci.h>
30 
31 #include "amdgpu.h"
32 #include "amdgpu_pm.h"
33 #include "amdgpu_vcn.h"
34 #include "soc15d.h"
35 
36 /* Firmware Names */
37 #define FIRMWARE_RAVEN		"amdgpu/raven_vcn.bin"
38 #define FIRMWARE_PICASSO	"amdgpu/picasso_vcn.bin"
39 #define FIRMWARE_RAVEN2		"amdgpu/raven2_vcn.bin"
40 #define FIRMWARE_ARCTURUS	"amdgpu/arcturus_vcn.bin"
41 #define FIRMWARE_RENOIR		"amdgpu/renoir_vcn.bin"
42 #define FIRMWARE_GREEN_SARDINE	"amdgpu/green_sardine_vcn.bin"
43 #define FIRMWARE_NAVI10		"amdgpu/navi10_vcn.bin"
44 #define FIRMWARE_NAVI14		"amdgpu/navi14_vcn.bin"
45 #define FIRMWARE_NAVI12		"amdgpu/navi12_vcn.bin"
46 #define FIRMWARE_SIENNA_CICHLID	"amdgpu/sienna_cichlid_vcn.bin"
47 #define FIRMWARE_NAVY_FLOUNDER	"amdgpu/navy_flounder_vcn.bin"
48 #define FIRMWARE_VANGOGH	"amdgpu/vangogh_vcn.bin"
49 #define FIRMWARE_DIMGREY_CAVEFISH	"amdgpu/dimgrey_cavefish_vcn.bin"
50 #define FIRMWARE_ALDEBARAN	"amdgpu/aldebaran_vcn.bin"
51 #define FIRMWARE_BEIGE_GOBY	"amdgpu/beige_goby_vcn.bin"
52 
53 MODULE_FIRMWARE(FIRMWARE_RAVEN);
54 MODULE_FIRMWARE(FIRMWARE_PICASSO);
55 MODULE_FIRMWARE(FIRMWARE_RAVEN2);
56 MODULE_FIRMWARE(FIRMWARE_ARCTURUS);
57 MODULE_FIRMWARE(FIRMWARE_RENOIR);
58 MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE);
59 MODULE_FIRMWARE(FIRMWARE_ALDEBARAN);
60 MODULE_FIRMWARE(FIRMWARE_NAVI10);
61 MODULE_FIRMWARE(FIRMWARE_NAVI14);
62 MODULE_FIRMWARE(FIRMWARE_NAVI12);
63 MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID);
64 MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER);
65 MODULE_FIRMWARE(FIRMWARE_VANGOGH);
66 MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH);
67 MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY);
68 
69 static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
70 
71 int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
72 {
73 	unsigned long bo_size;
74 	const char *fw_name;
75 	const struct common_firmware_header *hdr;
76 	unsigned char fw_check;
77 	int i, r;
78 
79 	INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
80 	mutex_init(&adev->vcn.vcn_pg_lock);
81 	mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
82 	atomic_set(&adev->vcn.total_submission_cnt, 0);
83 	for (i = 0; i < adev->vcn.num_vcn_inst; i++)
84 		atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
85 
86 	switch (adev->asic_type) {
87 	case CHIP_RAVEN:
88 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
89 			fw_name = FIRMWARE_RAVEN2;
90 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
91 			fw_name = FIRMWARE_PICASSO;
92 		else
93 			fw_name = FIRMWARE_RAVEN;
94 		break;
95 	case CHIP_ARCTURUS:
96 		fw_name = FIRMWARE_ARCTURUS;
97 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
98 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
99 			adev->vcn.indirect_sram = true;
100 		break;
101 	case CHIP_RENOIR:
102 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
103 			fw_name = FIRMWARE_RENOIR;
104 		else
105 			fw_name = FIRMWARE_GREEN_SARDINE;
106 
107 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
108 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
109 			adev->vcn.indirect_sram = true;
110 		break;
111 	case CHIP_ALDEBARAN:
112 		fw_name = FIRMWARE_ALDEBARAN;
113 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
114 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
115 			adev->vcn.indirect_sram = true;
116 		break;
117 	case CHIP_NAVI10:
118 		fw_name = FIRMWARE_NAVI10;
119 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
120 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
121 			adev->vcn.indirect_sram = true;
122 		break;
123 	case CHIP_NAVI14:
124 		fw_name = FIRMWARE_NAVI14;
125 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
126 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
127 			adev->vcn.indirect_sram = true;
128 		break;
129 	case CHIP_NAVI12:
130 		fw_name = FIRMWARE_NAVI12;
131 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
132 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
133 			adev->vcn.indirect_sram = true;
134 		break;
135 	case CHIP_SIENNA_CICHLID:
136 		fw_name = FIRMWARE_SIENNA_CICHLID;
137 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
138 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
139 			adev->vcn.indirect_sram = true;
140 		break;
141 	case CHIP_NAVY_FLOUNDER:
142 		fw_name = FIRMWARE_NAVY_FLOUNDER;
143 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
144 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
145 			adev->vcn.indirect_sram = true;
146 		break;
147 	case CHIP_VANGOGH:
148 		fw_name = FIRMWARE_VANGOGH;
149 		break;
150 	case CHIP_DIMGREY_CAVEFISH:
151 		fw_name = FIRMWARE_DIMGREY_CAVEFISH;
152 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
153 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
154 			adev->vcn.indirect_sram = true;
155 		break;
156 	case CHIP_BEIGE_GOBY:
157 		fw_name = FIRMWARE_BEIGE_GOBY;
158 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
159 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
160 			adev->vcn.indirect_sram = true;
161 		break;
162 	default:
163 		return -EINVAL;
164 	}
165 
166 	r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
167 	if (r) {
168 		dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
169 			fw_name);
170 		return r;
171 	}
172 
173 	r = amdgpu_ucode_validate(adev->vcn.fw);
174 	if (r) {
175 		dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
176 			fw_name);
177 		release_firmware(adev->vcn.fw);
178 		adev->vcn.fw = NULL;
179 		return r;
180 	}
181 
182 	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
183 	adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
184 
185 	/* Bit 20-23, it is encode major and non-zero for new naming convention.
186 	 * This field is part of version minor and DRM_DISABLED_FLAG in old naming
187 	 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG
188 	 * is zero in old naming convention, this field is always zero so far.
189 	 * These four bits are used to tell which naming convention is present.
190 	 */
191 	fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf;
192 	if (fw_check) {
193 		unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev;
194 
195 		fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff;
196 		enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff;
197 		enc_major = fw_check;
198 		dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
199 		vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
200 		DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
201 			enc_major, enc_minor, dec_ver, vep, fw_rev);
202 	} else {
203 		unsigned int version_major, version_minor, family_id;
204 
205 		family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
206 		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
207 		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
208 		DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n",
209 			version_major, version_minor, family_id);
210 	}
211 
212 	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
213 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
214 		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
215 	bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
216 
217 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
218 		if (adev->vcn.harvest_config & (1 << i))
219 			continue;
220 
221 		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
222 						AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo,
223 						&adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr);
224 		if (r) {
225 			dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
226 			return r;
227 		}
228 
229 		adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr +
230 				bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
231 		adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr +
232 				bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
233 
234 		if (adev->vcn.indirect_sram) {
235 			r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
236 					AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo,
237 					&adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr);
238 			if (r) {
239 				dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
240 				return r;
241 			}
242 		}
243 	}
244 
245 	return 0;
246 }
247 
248 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
249 {
250 	int i, j;
251 
252 	cancel_delayed_work_sync(&adev->vcn.idle_work);
253 
254 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
255 		if (adev->vcn.harvest_config & (1 << j))
256 			continue;
257 
258 		if (adev->vcn.indirect_sram) {
259 			amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
260 						  &adev->vcn.inst[j].dpg_sram_gpu_addr,
261 						  (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
262 		}
263 		kvfree(adev->vcn.inst[j].saved_bo);
264 
265 		amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
266 					  &adev->vcn.inst[j].gpu_addr,
267 					  (void **)&adev->vcn.inst[j].cpu_addr);
268 
269 		amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
270 
271 		for (i = 0; i < adev->vcn.num_enc_rings; ++i)
272 			amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
273 	}
274 
275 	release_firmware(adev->vcn.fw);
276 	mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
277 	mutex_destroy(&adev->vcn.vcn_pg_lock);
278 
279 	return 0;
280 }
281 
282 int amdgpu_vcn_suspend(struct amdgpu_device *adev)
283 {
284 	unsigned size;
285 	void *ptr;
286 	int i;
287 
288 	cancel_delayed_work_sync(&adev->vcn.idle_work);
289 
290 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
291 		if (adev->vcn.harvest_config & (1 << i))
292 			continue;
293 		if (adev->vcn.inst[i].vcpu_bo == NULL)
294 			return 0;
295 
296 		size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
297 		ptr = adev->vcn.inst[i].cpu_addr;
298 
299 		adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
300 		if (!adev->vcn.inst[i].saved_bo)
301 			return -ENOMEM;
302 
303 		memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
304 	}
305 	return 0;
306 }
307 
308 int amdgpu_vcn_resume(struct amdgpu_device *adev)
309 {
310 	unsigned size;
311 	void *ptr;
312 	int i;
313 
314 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
315 		if (adev->vcn.harvest_config & (1 << i))
316 			continue;
317 		if (adev->vcn.inst[i].vcpu_bo == NULL)
318 			return -EINVAL;
319 
320 		size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
321 		ptr = adev->vcn.inst[i].cpu_addr;
322 
323 		if (adev->vcn.inst[i].saved_bo != NULL) {
324 			memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
325 			kvfree(adev->vcn.inst[i].saved_bo);
326 			adev->vcn.inst[i].saved_bo = NULL;
327 		} else {
328 			const struct common_firmware_header *hdr;
329 			unsigned offset;
330 
331 			hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
332 			if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
333 				offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
334 				memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
335 					    le32_to_cpu(hdr->ucode_size_bytes));
336 				size -= le32_to_cpu(hdr->ucode_size_bytes);
337 				ptr += le32_to_cpu(hdr->ucode_size_bytes);
338 			}
339 			memset_io(ptr, 0, size);
340 		}
341 	}
342 	return 0;
343 }
344 
345 static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
346 {
347 	struct amdgpu_device *adev =
348 		container_of(work, struct amdgpu_device, vcn.idle_work.work);
349 	unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
350 	unsigned int i, j;
351 	int r = 0;
352 
353 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
354 		if (adev->vcn.harvest_config & (1 << j))
355 			continue;
356 
357 		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
358 			fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
359 		}
360 
361 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
362 			struct dpg_pause_state new_state;
363 
364 			if (fence[j] ||
365 				unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
366 				new_state.fw_based = VCN_DPG_STATE__PAUSE;
367 			else
368 				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
369 
370 			adev->vcn.pause_dpg_mode(adev, j, &new_state);
371 		}
372 
373 		fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
374 		fences += fence[j];
375 	}
376 
377 	if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
378 		amdgpu_gfx_off_ctrl(adev, true);
379 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
380 		       AMD_PG_STATE_GATE);
381 		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
382 				false);
383 		if (r)
384 			dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
385 	} else {
386 		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
387 	}
388 }
389 
390 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
391 {
392 	struct amdgpu_device *adev = ring->adev;
393 	int r = 0;
394 
395 	atomic_inc(&adev->vcn.total_submission_cnt);
396 
397 	if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) {
398 		amdgpu_gfx_off_ctrl(adev, false);
399 		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
400 				true);
401 		if (r)
402 			dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
403 	}
404 
405 	mutex_lock(&adev->vcn.vcn_pg_lock);
406 	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
407 	       AMD_PG_STATE_UNGATE);
408 
409 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
410 		struct dpg_pause_state new_state;
411 
412 		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
413 			atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
414 			new_state.fw_based = VCN_DPG_STATE__PAUSE;
415 		} else {
416 			unsigned int fences = 0;
417 			unsigned int i;
418 
419 			for (i = 0; i < adev->vcn.num_enc_rings; ++i)
420 				fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
421 
422 			if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
423 				new_state.fw_based = VCN_DPG_STATE__PAUSE;
424 			else
425 				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
426 		}
427 
428 		adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
429 	}
430 	mutex_unlock(&adev->vcn.vcn_pg_lock);
431 }
432 
433 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
434 {
435 	if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
436 		ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
437 		atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
438 
439 	atomic_dec(&ring->adev->vcn.total_submission_cnt);
440 
441 	schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
442 }
443 
444 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
445 {
446 	struct amdgpu_device *adev = ring->adev;
447 	uint32_t tmp = 0;
448 	unsigned i;
449 	int r;
450 
451 	/* VCN in SRIOV does not support direct register read/write */
452 	if (amdgpu_sriov_vf(adev))
453 		return 0;
454 
455 	WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
456 	r = amdgpu_ring_alloc(ring, 3);
457 	if (r)
458 		return r;
459 	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
460 	amdgpu_ring_write(ring, 0xDEADBEEF);
461 	amdgpu_ring_commit(ring);
462 	for (i = 0; i < adev->usec_timeout; i++) {
463 		tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
464 		if (tmp == 0xDEADBEEF)
465 			break;
466 		udelay(1);
467 	}
468 
469 	if (i >= adev->usec_timeout)
470 		r = -ETIMEDOUT;
471 
472 	return r;
473 }
474 
475 int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring)
476 {
477 	struct amdgpu_device *adev = ring->adev;
478 	uint32_t rptr;
479 	unsigned int i;
480 	int r;
481 
482 	if (amdgpu_sriov_vf(adev))
483 		return 0;
484 
485 	r = amdgpu_ring_alloc(ring, 16);
486 	if (r)
487 		return r;
488 
489 	rptr = amdgpu_ring_get_rptr(ring);
490 
491 	amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END);
492 	amdgpu_ring_commit(ring);
493 
494 	for (i = 0; i < adev->usec_timeout; i++) {
495 		if (amdgpu_ring_get_rptr(ring) != rptr)
496 			break;
497 		udelay(1);
498 	}
499 
500 	if (i >= adev->usec_timeout)
501 		r = -ETIMEDOUT;
502 
503 	return r;
504 }
505 
506 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
507 				   struct amdgpu_bo *bo,
508 				   struct dma_fence **fence)
509 {
510 	struct amdgpu_device *adev = ring->adev;
511 	struct dma_fence *f = NULL;
512 	struct amdgpu_job *job;
513 	struct amdgpu_ib *ib;
514 	uint64_t addr;
515 	void *msg = NULL;
516 	int i, r;
517 
518 	r = amdgpu_job_alloc_with_ib(adev, 64,
519 					AMDGPU_IB_POOL_DIRECT, &job);
520 	if (r)
521 		goto err;
522 
523 	ib = &job->ibs[0];
524 	addr = amdgpu_bo_gpu_offset(bo);
525 	msg = amdgpu_bo_kptr(bo);
526 	ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
527 	ib->ptr[1] = addr;
528 	ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
529 	ib->ptr[3] = addr >> 32;
530 	ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0);
531 	ib->ptr[5] = 0;
532 	for (i = 6; i < 16; i += 2) {
533 		ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0);
534 		ib->ptr[i+1] = 0;
535 	}
536 	ib->length_dw = 16;
537 
538 	r = amdgpu_job_submit_direct(job, ring, &f);
539 	if (r)
540 		goto err_free;
541 
542 	amdgpu_bo_fence(bo, f, false);
543 	amdgpu_bo_unreserve(bo);
544 	amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
545 
546 	if (fence)
547 		*fence = dma_fence_get(f);
548 	dma_fence_put(f);
549 
550 	return 0;
551 
552 err_free:
553 	amdgpu_job_free(job);
554 
555 err:
556 	amdgpu_bo_unreserve(bo);
557 	amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
558 	return r;
559 }
560 
561 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
562 					 struct amdgpu_bo **bo)
563 {
564 	struct amdgpu_device *adev = ring->adev;
565 	uint32_t *msg;
566 	int r, i;
567 
568 	*bo = NULL;
569 	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
570 				      AMDGPU_GEM_DOMAIN_VRAM,
571 				      bo, NULL, (void **)&msg);
572 	if (r)
573 		return r;
574 
575 	msg[0] = cpu_to_le32(0x00000028);
576 	msg[1] = cpu_to_le32(0x00000038);
577 	msg[2] = cpu_to_le32(0x00000001);
578 	msg[3] = cpu_to_le32(0x00000000);
579 	msg[4] = cpu_to_le32(handle);
580 	msg[5] = cpu_to_le32(0x00000000);
581 	msg[6] = cpu_to_le32(0x00000001);
582 	msg[7] = cpu_to_le32(0x00000028);
583 	msg[8] = cpu_to_le32(0x00000010);
584 	msg[9] = cpu_to_le32(0x00000000);
585 	msg[10] = cpu_to_le32(0x00000007);
586 	msg[11] = cpu_to_le32(0x00000000);
587 	msg[12] = cpu_to_le32(0x00000780);
588 	msg[13] = cpu_to_le32(0x00000440);
589 	for (i = 14; i < 1024; ++i)
590 		msg[i] = cpu_to_le32(0x0);
591 
592 	return 0;
593 }
594 
595 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
596 					  struct amdgpu_bo **bo)
597 {
598 	struct amdgpu_device *adev = ring->adev;
599 	uint32_t *msg;
600 	int r, i;
601 
602 	*bo = NULL;
603 	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
604 				      AMDGPU_GEM_DOMAIN_VRAM,
605 				      bo, NULL, (void **)&msg);
606 	if (r)
607 		return r;
608 
609 	msg[0] = cpu_to_le32(0x00000028);
610 	msg[1] = cpu_to_le32(0x00000018);
611 	msg[2] = cpu_to_le32(0x00000000);
612 	msg[3] = cpu_to_le32(0x00000002);
613 	msg[4] = cpu_to_le32(handle);
614 	msg[5] = cpu_to_le32(0x00000000);
615 	for (i = 6; i < 1024; ++i)
616 		msg[i] = cpu_to_le32(0x0);
617 
618 	return 0;
619 }
620 
621 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
622 {
623 	struct dma_fence *fence = NULL;
624 	struct amdgpu_bo *bo;
625 	long r;
626 
627 	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
628 	if (r)
629 		goto error;
630 
631 	r = amdgpu_vcn_dec_send_msg(ring, bo, NULL);
632 	if (r)
633 		goto error;
634 	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
635 	if (r)
636 		goto error;
637 
638 	r = amdgpu_vcn_dec_send_msg(ring, bo, &fence);
639 	if (r)
640 		goto error;
641 
642 	r = dma_fence_wait_timeout(fence, false, timeout);
643 	if (r == 0)
644 		r = -ETIMEDOUT;
645 	else if (r > 0)
646 		r = 0;
647 
648 	dma_fence_put(fence);
649 error:
650 	return r;
651 }
652 
653 static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
654 				   struct amdgpu_bo *bo,
655 				   struct dma_fence **fence)
656 {
657 	struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
658 	const unsigned int ib_size_dw = 64;
659 	struct amdgpu_device *adev = ring->adev;
660 	struct dma_fence *f = NULL;
661 	struct amdgpu_job *job;
662 	struct amdgpu_ib *ib;
663 	uint64_t addr;
664 	int i, r;
665 
666 	r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
667 				AMDGPU_IB_POOL_DIRECT, &job);
668 	if (r)
669 		goto err;
670 
671 	ib = &job->ibs[0];
672 	addr = amdgpu_bo_gpu_offset(bo);
673 	ib->length_dw = 0;
674 
675 	ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
676 	ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER);
677 	decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]);
678 	ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4;
679 	memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer));
680 
681 	decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER);
682 	decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32);
683 	decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr);
684 
685 	for (i = ib->length_dw; i < ib_size_dw; ++i)
686 		ib->ptr[i] = 0x0;
687 
688 	r = amdgpu_job_submit_direct(job, ring, &f);
689 	if (r)
690 		goto err_free;
691 
692 	amdgpu_bo_fence(bo, f, false);
693 	amdgpu_bo_unreserve(bo);
694 	amdgpu_bo_unref(&bo);
695 
696 	if (fence)
697 		*fence = dma_fence_get(f);
698 	dma_fence_put(f);
699 
700 	return 0;
701 
702 err_free:
703 	amdgpu_job_free(job);
704 
705 err:
706 	amdgpu_bo_unreserve(bo);
707 	amdgpu_bo_unref(&bo);
708 	return r;
709 }
710 
711 int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout)
712 {
713 	struct dma_fence *fence = NULL;
714 	struct amdgpu_bo *bo;
715 	long r;
716 
717 	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
718 	if (r)
719 		goto error;
720 
721 	r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL);
722 	if (r)
723 		goto error;
724 	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
725 	if (r)
726 		goto error;
727 
728 	r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence);
729 	if (r)
730 		goto error;
731 
732 	r = dma_fence_wait_timeout(fence, false, timeout);
733 	if (r == 0)
734 		r = -ETIMEDOUT;
735 	else if (r > 0)
736 		r = 0;
737 
738 	dma_fence_put(fence);
739 error:
740 	return r;
741 }
742 
743 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
744 {
745 	struct amdgpu_device *adev = ring->adev;
746 	uint32_t rptr;
747 	unsigned i;
748 	int r;
749 
750 	if (amdgpu_sriov_vf(adev))
751 		return 0;
752 
753 	r = amdgpu_ring_alloc(ring, 16);
754 	if (r)
755 		return r;
756 
757 	rptr = amdgpu_ring_get_rptr(ring);
758 
759 	amdgpu_ring_write(ring, VCN_ENC_CMD_END);
760 	amdgpu_ring_commit(ring);
761 
762 	for (i = 0; i < adev->usec_timeout; i++) {
763 		if (amdgpu_ring_get_rptr(ring) != rptr)
764 			break;
765 		udelay(1);
766 	}
767 
768 	if (i >= adev->usec_timeout)
769 		r = -ETIMEDOUT;
770 
771 	return r;
772 }
773 
774 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
775 					 struct amdgpu_bo *bo,
776 					 struct dma_fence **fence)
777 {
778 	const unsigned ib_size_dw = 16;
779 	struct amdgpu_job *job;
780 	struct amdgpu_ib *ib;
781 	struct dma_fence *f = NULL;
782 	uint64_t addr;
783 	int i, r;
784 
785 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
786 					AMDGPU_IB_POOL_DIRECT, &job);
787 	if (r)
788 		return r;
789 
790 	ib = &job->ibs[0];
791 	addr = amdgpu_bo_gpu_offset(bo);
792 
793 	ib->length_dw = 0;
794 	ib->ptr[ib->length_dw++] = 0x00000018;
795 	ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
796 	ib->ptr[ib->length_dw++] = handle;
797 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
798 	ib->ptr[ib->length_dw++] = addr;
799 	ib->ptr[ib->length_dw++] = 0x0000000b;
800 
801 	ib->ptr[ib->length_dw++] = 0x00000014;
802 	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
803 	ib->ptr[ib->length_dw++] = 0x0000001c;
804 	ib->ptr[ib->length_dw++] = 0x00000000;
805 	ib->ptr[ib->length_dw++] = 0x00000000;
806 
807 	ib->ptr[ib->length_dw++] = 0x00000008;
808 	ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
809 
810 	for (i = ib->length_dw; i < ib_size_dw; ++i)
811 		ib->ptr[i] = 0x0;
812 
813 	r = amdgpu_job_submit_direct(job, ring, &f);
814 	if (r)
815 		goto err;
816 
817 	if (fence)
818 		*fence = dma_fence_get(f);
819 	dma_fence_put(f);
820 
821 	return 0;
822 
823 err:
824 	amdgpu_job_free(job);
825 	return r;
826 }
827 
828 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
829 					  struct amdgpu_bo *bo,
830 					  struct dma_fence **fence)
831 {
832 	const unsigned ib_size_dw = 16;
833 	struct amdgpu_job *job;
834 	struct amdgpu_ib *ib;
835 	struct dma_fence *f = NULL;
836 	uint64_t addr;
837 	int i, r;
838 
839 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
840 					AMDGPU_IB_POOL_DIRECT, &job);
841 	if (r)
842 		return r;
843 
844 	ib = &job->ibs[0];
845 	addr = amdgpu_bo_gpu_offset(bo);
846 
847 	ib->length_dw = 0;
848 	ib->ptr[ib->length_dw++] = 0x00000018;
849 	ib->ptr[ib->length_dw++] = 0x00000001;
850 	ib->ptr[ib->length_dw++] = handle;
851 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
852 	ib->ptr[ib->length_dw++] = addr;
853 	ib->ptr[ib->length_dw++] = 0x0000000b;
854 
855 	ib->ptr[ib->length_dw++] = 0x00000014;
856 	ib->ptr[ib->length_dw++] = 0x00000002;
857 	ib->ptr[ib->length_dw++] = 0x0000001c;
858 	ib->ptr[ib->length_dw++] = 0x00000000;
859 	ib->ptr[ib->length_dw++] = 0x00000000;
860 
861 	ib->ptr[ib->length_dw++] = 0x00000008;
862 	ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
863 
864 	for (i = ib->length_dw; i < ib_size_dw; ++i)
865 		ib->ptr[i] = 0x0;
866 
867 	r = amdgpu_job_submit_direct(job, ring, &f);
868 	if (r)
869 		goto err;
870 
871 	if (fence)
872 		*fence = dma_fence_get(f);
873 	dma_fence_put(f);
874 
875 	return 0;
876 
877 err:
878 	amdgpu_job_free(job);
879 	return r;
880 }
881 
882 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
883 {
884 	struct dma_fence *fence = NULL;
885 	struct amdgpu_bo *bo = NULL;
886 	long r;
887 
888 	r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
889 				      AMDGPU_GEM_DOMAIN_VRAM,
890 				      &bo, NULL, NULL);
891 	if (r)
892 		return r;
893 
894 	r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
895 	if (r)
896 		goto error;
897 
898 	r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
899 	if (r)
900 		goto error;
901 
902 	r = dma_fence_wait_timeout(fence, false, timeout);
903 	if (r == 0)
904 		r = -ETIMEDOUT;
905 	else if (r > 0)
906 		r = 0;
907 
908 error:
909 	dma_fence_put(fence);
910 	amdgpu_bo_unreserve(bo);
911 	amdgpu_bo_free_kernel(&bo, NULL, NULL);
912 
913 	return r;
914 }
915