1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include "amdgpu.h"
25 #include "amdgpu_jpeg.h"
26 #include "soc15.h"
27 #include "soc15d.h"
28 #include "jpeg_v4_0_3.h"
29 
30 #include "vcn/vcn_4_0_3_offset.h"
31 #include "vcn/vcn_4_0_3_sh_mask.h"
32 #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
33 
34 enum jpeg_engin_status {
35 	UVD_PGFSM_STATUS__UVDJ_PWR_ON  = 0,
36 	UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2,
37 };
38 
39 static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev);
40 static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
41 static int jpeg_v4_0_3_set_powergating_state(void *handle,
42 				enum amd_powergating_state state);
43 
44 static int amdgpu_ih_srcid_jpeg[] = {
45 	VCN_4_0__SRCID__JPEG_DECODE,
46 	VCN_4_0__SRCID__JPEG1_DECODE,
47 	VCN_4_0__SRCID__JPEG2_DECODE,
48 	VCN_4_0__SRCID__JPEG3_DECODE,
49 	VCN_4_0__SRCID__JPEG4_DECODE,
50 	VCN_4_0__SRCID__JPEG5_DECODE,
51 	VCN_4_0__SRCID__JPEG6_DECODE,
52 	VCN_4_0__SRCID__JPEG7_DECODE
53 };
54 
55 /**
56  * jpeg_v4_0_3_early_init - set function pointers
57  *
58  * @handle: amdgpu_device pointer
59  *
60  * Set ring and irq function pointers
61  */
62 static int jpeg_v4_0_3_early_init(void *handle)
63 {
64 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
65 
66 	adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
67 
68 	jpeg_v4_0_3_set_dec_ring_funcs(adev);
69 	jpeg_v4_0_3_set_irq_funcs(adev);
70 
71 	return 0;
72 }
73 
74 /**
75  * jpeg_v4_0_3_sw_init - sw init for JPEG block
76  *
77  * @handle: amdgpu_device pointer
78  *
79  * Load firmware and sw initialization
80  */
81 static int jpeg_v4_0_3_sw_init(void *handle)
82 {
83 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
84 	struct amdgpu_ring *ring;
85 	int i, j, r;
86 
87 	for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
88 		/* JPEG TRAP */
89 		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
90 				amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
91 		if (r)
92 			return r;
93 	}
94 
95 	r = amdgpu_jpeg_sw_init(adev);
96 	if (r)
97 		return r;
98 
99 	r = amdgpu_jpeg_resume(adev);
100 	if (r)
101 		return r;
102 
103 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
104 		if (adev->jpeg.harvest_config & (1 << i))
105 			continue;
106 		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
107 			ring = &adev->jpeg.inst[i].ring_dec[j];
108 			ring->use_doorbell = true;
109 			ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
110 			ring->doorbell_index =
111 				(adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + j + 9 * i;
112 			sprintf(ring->name, "jpeg_dec_%d.%d", i, j);
113 			r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
114 						AMDGPU_RING_PRIO_DEFAULT, NULL);
115 			if (r)
116 				return r;
117 
118 			adev->jpeg.internal.jpeg_pitch[j] =
119 				regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
120 			adev->jpeg.inst[i].external.jpeg_pitch[j] =
121 				SOC15_REG_OFFSET1(JPEG, i, regUVD_JRBC0_UVD_JRBC_SCRATCH0,
122 				(j?(0x40 * j - 0xc80):0));
123 		}
124 	}
125 
126 	return 0;
127 }
128 
129 /**
130  * jpeg_v4_0_3_sw_fini - sw fini for JPEG block
131  *
132  * @handle: amdgpu_device pointer
133  *
134  * JPEG suspend and free up sw allocation
135  */
136 static int jpeg_v4_0_3_sw_fini(void *handle)
137 {
138 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
139 	int r;
140 
141 	r = amdgpu_jpeg_suspend(adev);
142 	if (r)
143 		return r;
144 
145 	r = amdgpu_jpeg_sw_fini(adev);
146 
147 	return r;
148 }
149 
150 /**
151  * jpeg_v4_0_3_hw_init - start and test JPEG block
152  *
153  * @handle: amdgpu_device pointer
154  *
155  */
156 static int jpeg_v4_0_3_hw_init(void *handle)
157 {
158 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
159 	struct amdgpu_ring *ring;
160 	int i, j, r;
161 
162 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
163 		if (adev->jpeg.harvest_config & (1 << i))
164 			continue;
165 		ring = adev->jpeg.inst[i].ring_dec;
166 
167 		if (ring->use_doorbell)
168 			adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
169 				(adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * i,
170 				adev->jpeg.inst[i].aid_id);
171 
172 		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
173 			ring = &adev->jpeg.inst[i].ring_dec[j];
174 			if (ring->use_doorbell)
175 				WREG32_SOC15_OFFSET(VCN, i, regVCN_JPEG_DB_CTRL,
176 					(ring->pipe?(ring->pipe - 0x15):0),
177 					ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
178 					VCN_JPEG_DB_CTRL__EN_MASK);
179 			r = amdgpu_ring_test_helper(ring);
180 			if (r)
181 				return r;
182 		}
183 	}
184 	DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
185 
186 	return 0;
187 }
188 
189 /**
190  * jpeg_v4_0_3_hw_fini - stop the hardware block
191  *
192  * @handle: amdgpu_device pointer
193  *
194  * Stop the JPEG block, mark ring as not ready any more
195  */
196 static int jpeg_v4_0_3_hw_fini(void *handle)
197 {
198 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
199 	int ret = 0;
200 
201 	cancel_delayed_work_sync(&adev->jpeg.idle_work);
202 
203 	if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
204 		ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
205 
206 	return ret;
207 }
208 
209 /**
210  * jpeg_v4_0_3_suspend - suspend JPEG block
211  *
212  * @handle: amdgpu_device pointer
213  *
214  * HW fini and suspend JPEG block
215  */
216 static int jpeg_v4_0_3_suspend(void *handle)
217 {
218 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
219 	int r;
220 
221 	r = jpeg_v4_0_3_hw_fini(adev);
222 	if (r)
223 		return r;
224 
225 	r = amdgpu_jpeg_suspend(adev);
226 
227 	return r;
228 }
229 
230 /**
231  * jpeg_v4_0_3_resume - resume JPEG block
232  *
233  * @handle: amdgpu_device pointer
234  *
235  * Resume firmware and hw init JPEG block
236  */
237 static int jpeg_v4_0_3_resume(void *handle)
238 {
239 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
240 	int r;
241 
242 	r = amdgpu_jpeg_resume(adev);
243 	if (r)
244 		return r;
245 
246 	r = jpeg_v4_0_3_hw_init(adev);
247 
248 	return r;
249 }
250 
251 static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
252 {
253 	uint32_t data;
254 	int i;
255 
256 	data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL);
257 	if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
258 		data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
259 		data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1));
260 	} else {
261 		data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
262 	}
263 
264 	data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
265 	data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
266 	WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL, data);
267 
268 	data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE);
269 	data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK);
270 	for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i)
271 		data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i);
272 	WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE, data);
273 }
274 
275 static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
276 {
277 	uint32_t data;
278 	int i;
279 
280 	data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL);
281 	if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
282 		data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
283 		data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1);
284 	} else {
285 		data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
286 	}
287 
288 	data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
289 	data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
290 	WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL, data);
291 
292 	data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE);
293 	data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK);
294 	for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i)
295 		data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i);
296 	WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE, data);
297 }
298 
299 /**
300  * jpeg_v4_0_3_start - start JPEG block
301  *
302  * @adev: amdgpu_device pointer
303  *
304  * Setup and start the JPEG block
305  */
306 static int jpeg_v4_0_3_start(struct amdgpu_device *adev)
307 {
308 	struct amdgpu_ring *ring;
309 	int i, j;
310 
311 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
312 		if (adev->jpeg.harvest_config & (1 << i))
313 			continue;
314 		WREG32_SOC15(JPEG, i, regUVD_PGFSM_CONFIG,
315 			1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
316 		SOC15_WAIT_ON_RREG(JPEG, i, regUVD_PGFSM_STATUS,
317 			UVD_PGFSM_STATUS__UVDJ_PWR_ON <<
318 			UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
319 			UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
320 
321 		/* disable anti hang mechanism */
322 		WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_POWER_STATUS), 0,
323 			~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
324 
325 		/* JPEG disable CGC */
326 		jpeg_v4_0_3_disable_clock_gating(adev, i);
327 
328 		/* MJPEG global tiling registers */
329 		WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX8_ADDR_CONFIG,
330 			adev->gfx.config.gb_addr_config);
331 		WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX10_ADDR_CONFIG,
332 			adev->gfx.config.gb_addr_config);
333 
334 		/* enable JMI channel */
335 		WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), 0,
336 			~UVD_JMI_CNTL__SOFT_RESET_MASK);
337 
338 		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
339 			unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
340 
341 			ring = &adev->jpeg.inst[i].ring_dec[j];
342 
343 			/* enable System Interrupt for JRBC */
344 			WREG32_P(SOC15_REG_OFFSET(JPEG, i, regJPEG_SYS_INT_EN),
345 				JPEG_SYS_INT_EN__DJRBC0_MASK << j,
346 				~(JPEG_SYS_INT_EN__DJRBC0_MASK << j));
347 
348 			WREG32_SOC15_OFFSET(JPEG, i,
349 				regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, reg_offset, 0);
350 			WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_CNTL, reg_offset,
351 				(0x00000001L | 0x00000002L));
352 			WREG32_SOC15_OFFSET(JPEG, i, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW,
353 				reg_offset, lower_32_bits(ring->gpu_addr));
354 			WREG32_SOC15_OFFSET(JPEG, i, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
355 				reg_offset, upper_32_bits(ring->gpu_addr));
356 			WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_RPTR, reg_offset, 0);
357 			WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_WPTR, reg_offset, 0);
358 			WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_CNTL, reg_offset,
359 				0x00000002L);
360 			WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_SIZE, reg_offset,
361 				ring->ring_size / 4);
362 			ring->wptr = RREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_WPTR,
363 				reg_offset);
364 		}
365 	}
366 
367 	return 0;
368 }
369 
370 /**
371  * jpeg_v4_0_3_stop - stop JPEG block
372  *
373  * @adev: amdgpu_device pointer
374  *
375  * stop the JPEG block
376  */
377 static int jpeg_v4_0_3_stop(struct amdgpu_device *adev)
378 {
379 	int i;
380 
381 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
382 		if (adev->jpeg.harvest_config & (1 << i))
383 			continue;
384 
385 		/* reset JMI */
386 		WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL),
387 			UVD_JMI_CNTL__SOFT_RESET_MASK,
388 			~UVD_JMI_CNTL__SOFT_RESET_MASK);
389 
390 		jpeg_v4_0_3_enable_clock_gating(adev, i);
391 
392 		/* enable anti hang mechanism */
393 		WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_POWER_STATUS),
394 			UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
395 			~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
396 
397 		WREG32_SOC15(JPEG, i, regUVD_PGFSM_CONFIG,
398 			2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
399 		SOC15_WAIT_ON_RREG(JPEG, i, regUVD_PGFSM_STATUS,
400 			UVD_PGFSM_STATUS__UVDJ_PWR_OFF <<
401 			UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
402 			UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
403 	}
404 
405 	return 0;
406 }
407 
408 /**
409  * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer
410  *
411  * @ring: amdgpu_ring pointer
412  *
413  * Returns the current hardware read pointer
414  */
415 static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring)
416 {
417 	struct amdgpu_device *adev = ring->adev;
418 
419 	return RREG32_SOC15_OFFSET(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_RPTR,
420 			ring->pipe?(0x40 * ring->pipe - 0xc80):0);
421 }
422 
423 /**
424  * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer
425  *
426  * @ring: amdgpu_ring pointer
427  *
428  * Returns the current hardware write pointer
429  */
430 static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring)
431 {
432 	struct amdgpu_device *adev = ring->adev;
433 
434 	if (ring->use_doorbell)
435 		return adev->wb.wb[ring->wptr_offs];
436 	else
437 		return RREG32_SOC15_OFFSET(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_WPTR,
438 			ring->pipe?(0x40 * ring->pipe - 0xc80):0);
439 }
440 
441 /**
442  * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer
443  *
444  * @ring: amdgpu_ring pointer
445  *
446  * Commits the write pointer to the hardware
447  */
448 static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring)
449 {
450 	struct amdgpu_device *adev = ring->adev;
451 
452 	if (ring->use_doorbell) {
453 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
454 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
455 	} else {
456 		WREG32_SOC15_OFFSET(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_WPTR,
457 			(ring->pipe?(0x40 * ring->pipe - 0xc80):0), lower_32_bits(ring->wptr));
458 	}
459 }
460 
461 /**
462  * jpeg_v4_0_3_dec_ring_insert_start - insert a start command
463  *
464  * @ring: amdgpu_ring pointer
465  *
466  * Write a start command to the ring.
467  */
468 static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
469 {
470 	amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
471 		0, 0, PACKETJ_TYPE0));
472 	amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
473 
474 	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
475 		0, 0, PACKETJ_TYPE0));
476 	amdgpu_ring_write(ring, 0x80004000);
477 }
478 
479 /**
480  * jpeg_v4_0_3_dec_ring_insert_end - insert a end command
481  *
482  * @ring: amdgpu_ring pointer
483  *
484  * Write a end command to the ring.
485  */
486 static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
487 {
488 	amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
489 		0, 0, PACKETJ_TYPE0));
490 	amdgpu_ring_write(ring, 0x62a04);
491 
492 	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
493 		0, 0, PACKETJ_TYPE0));
494 	amdgpu_ring_write(ring, 0x00004000);
495 }
496 
497 /**
498  * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command
499  *
500  * @ring: amdgpu_ring pointer
501  * @addr: address
502  * @seq: sequence number
503  * @flags: fence related flags
504  *
505  * Write a fence and a trap command to the ring.
506  */
507 static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
508 				unsigned int flags)
509 {
510 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
511 
512 	amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
513 		0, 0, PACKETJ_TYPE0));
514 	amdgpu_ring_write(ring, seq);
515 
516 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
517 		0, 0, PACKETJ_TYPE0));
518 	amdgpu_ring_write(ring, seq);
519 
520 	amdgpu_ring_write(ring,	PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
521 		0, 0, PACKETJ_TYPE0));
522 	amdgpu_ring_write(ring, lower_32_bits(addr));
523 
524 	amdgpu_ring_write(ring,	PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
525 		0, 0, PACKETJ_TYPE0));
526 	amdgpu_ring_write(ring, upper_32_bits(addr));
527 
528 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
529 		0, 0, PACKETJ_TYPE0));
530 	amdgpu_ring_write(ring, 0x8);
531 
532 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
533 		0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
534 	amdgpu_ring_write(ring, 0);
535 
536 	if (ring->adev->jpeg.inst[ring->me].aid_id) {
537 		amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
538 			0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
539 		amdgpu_ring_write(ring, 0x4);
540 	} else {
541 		amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
542 		amdgpu_ring_write(ring, 0);
543 	}
544 
545 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
546 		0, 0, PACKETJ_TYPE0));
547 	amdgpu_ring_write(ring, 0x3fbc);
548 
549 	if (ring->adev->jpeg.inst[ring->me].aid_id) {
550 		amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
551 			0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
552 		amdgpu_ring_write(ring, 0x0);
553 	} else {
554 		amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
555 		amdgpu_ring_write(ring, 0);
556 	}
557 
558 	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
559 		0, 0, PACKETJ_TYPE0));
560 	amdgpu_ring_write(ring, 0x1);
561 
562 	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
563 	amdgpu_ring_write(ring, 0);
564 }
565 
566 /**
567  * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer
568  *
569  * @ring: amdgpu_ring pointer
570  * @job: job to retrieve vmid from
571  * @ib: indirect buffer to execute
572  * @flags: unused
573  *
574  * Write ring commands to execute the indirect buffer.
575  */
576 static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
577 				struct amdgpu_job *job,
578 				struct amdgpu_ib *ib,
579 				uint32_t flags)
580 {
581 	unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
582 
583 	amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
584 		0, 0, PACKETJ_TYPE0));
585 	amdgpu_ring_write(ring, (vmid | (vmid << 4)));
586 
587 	amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
588 		0, 0, PACKETJ_TYPE0));
589 	amdgpu_ring_write(ring, (vmid | (vmid << 4)));
590 
591 	amdgpu_ring_write(ring,	PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
592 		0, 0, PACKETJ_TYPE0));
593 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
594 
595 	amdgpu_ring_write(ring,	PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
596 		0, 0, PACKETJ_TYPE0));
597 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
598 
599 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
600 		0, 0, PACKETJ_TYPE0));
601 	amdgpu_ring_write(ring, ib->length_dw);
602 
603 	amdgpu_ring_write(ring,	PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
604 		0, 0, PACKETJ_TYPE0));
605 	amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
606 
607 	amdgpu_ring_write(ring,	PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
608 		0, 0, PACKETJ_TYPE0));
609 	amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
610 
611 	amdgpu_ring_write(ring,	PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
612 	amdgpu_ring_write(ring, 0);
613 
614 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
615 		0, 0, PACKETJ_TYPE0));
616 	amdgpu_ring_write(ring, 0x01400200);
617 
618 	amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
619 		0, 0, PACKETJ_TYPE0));
620 	amdgpu_ring_write(ring, 0x2);
621 
622 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET,
623 		0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
624 	amdgpu_ring_write(ring, 0x2);
625 }
626 
627 static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
628 				uint32_t val, uint32_t mask)
629 {
630 	uint32_t reg_offset = (reg << 2);
631 
632 	amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
633 		0, 0, PACKETJ_TYPE0));
634 	amdgpu_ring_write(ring, 0x01400200);
635 
636 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
637 		0, 0, PACKETJ_TYPE0));
638 	amdgpu_ring_write(ring, val);
639 
640 	amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
641 		0, 0, PACKETJ_TYPE0));
642 	if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
643 		amdgpu_ring_write(ring, 0);
644 		amdgpu_ring_write(ring,
645 			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
646 	} else {
647 		amdgpu_ring_write(ring, reg_offset);
648 		amdgpu_ring_write(ring,	PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
649 			0, 0, PACKETJ_TYPE3));
650 	}
651 	amdgpu_ring_write(ring, mask);
652 }
653 
654 static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
655 				unsigned int vmid, uint64_t pd_addr)
656 {
657 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
658 	uint32_t data0, data1, mask;
659 
660 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
661 
662 	/* wait for register write */
663 	data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
664 	data1 = lower_32_bits(pd_addr);
665 	mask = 0xffffffff;
666 	jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask);
667 }
668 
669 static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
670 {
671 	uint32_t reg_offset = (reg << 2);
672 
673 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
674 		0, 0, PACKETJ_TYPE0));
675 	if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
676 		amdgpu_ring_write(ring, 0);
677 		amdgpu_ring_write(ring,
678 			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
679 	} else {
680 		amdgpu_ring_write(ring, reg_offset);
681 		amdgpu_ring_write(ring,	PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
682 			0, 0, PACKETJ_TYPE0));
683 	}
684 	amdgpu_ring_write(ring, val);
685 }
686 
687 static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
688 {
689 	int i;
690 
691 	WARN_ON(ring->wptr % 2 || count % 2);
692 
693 	for (i = 0; i < count / 2; i++) {
694 		amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
695 		amdgpu_ring_write(ring, 0);
696 	}
697 }
698 
699 static bool jpeg_v4_0_3_is_idle(void *handle)
700 {
701 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
702 	bool ret;
703 	int i, j;
704 
705 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
706 		if (adev->jpeg.harvest_config & (1 << i))
707 			continue;
708 		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
709 			unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
710 
711 			ret &= ((RREG32_SOC15_OFFSET(JPEG, i,
712 					regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset) &
713 					UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
714 					UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
715 		}
716 	}
717 
718 	return ret;
719 }
720 
721 static int jpeg_v4_0_3_wait_for_idle(void *handle)
722 {
723 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
724 	int ret;
725 	int i, j;
726 
727 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
728 		if (adev->jpeg.harvest_config & (1 << i))
729 			continue;
730 		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
731 			unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
732 
733 			ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, i,
734 				regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset,
735 				UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
736 				UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
737 		}
738 	}
739 	return ret;
740 }
741 
742 static int jpeg_v4_0_3_set_clockgating_state(void *handle,
743 					  enum amd_clockgating_state state)
744 {
745 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
746 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
747 	int i;
748 
749 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
750 		if (adev->jpeg.harvest_config & (1 << i))
751 			continue;
752 		if (enable) {
753 			if (!jpeg_v4_0_3_is_idle(handle))
754 				return -EBUSY;
755 			jpeg_v4_0_3_enable_clock_gating(adev, i);
756 		} else {
757 			jpeg_v4_0_3_disable_clock_gating(adev, i);
758 		}
759 	}
760 	return 0;
761 }
762 
763 static int jpeg_v4_0_3_set_powergating_state(void *handle,
764 					  enum amd_powergating_state state)
765 {
766 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
767 	int ret;
768 
769 	if (state == adev->jpeg.cur_state)
770 		return 0;
771 
772 	if (state == AMD_PG_STATE_GATE)
773 		ret = jpeg_v4_0_3_stop(adev);
774 	else
775 		ret = jpeg_v4_0_3_start(adev);
776 
777 	if (!ret)
778 		adev->jpeg.cur_state = state;
779 
780 	return ret;
781 }
782 
783 static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
784 					struct amdgpu_irq_src *source,
785 					unsigned int type,
786 					enum amdgpu_interrupt_state state)
787 {
788 	return 0;
789 }
790 
791 static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
792 				      struct amdgpu_irq_src *source,
793 				      struct amdgpu_iv_entry *entry)
794 {
795 	uint32_t i;
796 
797 	i = node_id_to_phys_map[entry->node_id];
798 	DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n");
799 
800 	switch (entry->src_id) {
801 	case VCN_4_0__SRCID__JPEG_DECODE:
802 		amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[0]);
803 		break;
804 	case VCN_4_0__SRCID__JPEG1_DECODE:
805 		amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[1]);
806 		break;
807 	case VCN_4_0__SRCID__JPEG2_DECODE:
808 		amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[2]);
809 		break;
810 	case VCN_4_0__SRCID__JPEG3_DECODE:
811 		amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[3]);
812 		break;
813 	case VCN_4_0__SRCID__JPEG4_DECODE:
814 		amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[4]);
815 		break;
816 	case VCN_4_0__SRCID__JPEG5_DECODE:
817 		amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[5]);
818 		break;
819 	case VCN_4_0__SRCID__JPEG6_DECODE:
820 		amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[6]);
821 		break;
822 	case VCN_4_0__SRCID__JPEG7_DECODE:
823 		amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[7]);
824 		break;
825 	default:
826 		DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
827 			  entry->src_id, entry->src_data[0]);
828 		break;
829 	}
830 
831 	return 0;
832 }
833 
834 static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
835 	.name = "jpeg_v4_0_3",
836 	.early_init = jpeg_v4_0_3_early_init,
837 	.late_init = NULL,
838 	.sw_init = jpeg_v4_0_3_sw_init,
839 	.sw_fini = jpeg_v4_0_3_sw_fini,
840 	.hw_init = jpeg_v4_0_3_hw_init,
841 	.hw_fini = jpeg_v4_0_3_hw_fini,
842 	.suspend = jpeg_v4_0_3_suspend,
843 	.resume = jpeg_v4_0_3_resume,
844 	.is_idle = jpeg_v4_0_3_is_idle,
845 	.wait_for_idle = jpeg_v4_0_3_wait_for_idle,
846 	.check_soft_reset = NULL,
847 	.pre_soft_reset = NULL,
848 	.soft_reset = NULL,
849 	.post_soft_reset = NULL,
850 	.set_clockgating_state = jpeg_v4_0_3_set_clockgating_state,
851 	.set_powergating_state = jpeg_v4_0_3_set_powergating_state,
852 };
853 
854 static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
855 	.type = AMDGPU_RING_TYPE_VCN_JPEG,
856 	.align_mask = 0xf,
857 	.get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
858 	.get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
859 	.set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
860 	.emit_frame_size =
861 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
862 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
863 		8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */
864 		22 + 22 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */
865 		8 + 16,
866 	.emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */
867 	.emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
868 	.emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
869 	.emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
870 	.test_ring = amdgpu_jpeg_dec_ring_test_ring,
871 	.test_ib = amdgpu_jpeg_dec_ring_test_ib,
872 	.insert_nop = jpeg_v4_0_3_dec_ring_nop,
873 	.insert_start = jpeg_v4_0_3_dec_ring_insert_start,
874 	.insert_end = jpeg_v4_0_3_dec_ring_insert_end,
875 	.pad_ib = amdgpu_ring_generic_pad_ib,
876 	.begin_use = amdgpu_jpeg_ring_begin_use,
877 	.end_use = amdgpu_jpeg_ring_end_use,
878 	.emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
879 	.emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
880 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
881 };
882 
883 static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev)
884 {
885 	int i, j;
886 
887 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
888 		if (adev->jpeg.harvest_config & (1 << i))
889 			continue;
890 		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
891 			adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs;
892 			adev->jpeg.inst[i].ring_dec[j].me = i;
893 			adev->jpeg.inst[i].ring_dec[j].pipe = j;
894 		}
895 		adev->jpeg.inst[i].aid_id = i / adev->jpeg.num_inst_per_aid;
896 	}
897 	DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n");
898 }
899 
900 static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = {
901 	.set = jpeg_v4_0_3_set_interrupt_state,
902 	.process = jpeg_v4_0_3_process_interrupt,
903 };
904 
905 static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
906 {
907 	int i;
908 
909 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
910 		if (adev->jpeg.harvest_config & (1 << i))
911 			continue;
912 		adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
913 	}
914 	adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs;
915 }
916 
917 const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = {
918 	.type = AMD_IP_BLOCK_TYPE_JPEG,
919 	.major = 4,
920 	.minor = 0,
921 	.rev = 3,
922 	.funcs = &jpeg_v4_0_3_ip_funcs,
923 };
924