1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include "amdgpu.h"
25 #include "amdgpu_jpeg.h"
26 #include "amdgpu_cs.h"
27 #include "soc15.h"
28 #include "soc15d.h"
29 #include "jpeg_v4_0_3.h"
30 #include "mmsch_v4_0_3.h"
31 
32 #include "vcn/vcn_4_0_3_offset.h"
33 #include "vcn/vcn_4_0_3_sh_mask.h"
34 #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
35 
36 enum jpeg_engin_status {
37 	UVD_PGFSM_STATUS__UVDJ_PWR_ON  = 0,
38 	UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2,
39 };
40 
41 static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev);
42 static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
43 static int jpeg_v4_0_3_set_powergating_state(void *handle,
44 				enum amd_powergating_state state);
45 static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
46 static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
47 
48 static int amdgpu_ih_srcid_jpeg[] = {
49 	VCN_4_0__SRCID__JPEG_DECODE,
50 	VCN_4_0__SRCID__JPEG1_DECODE,
51 	VCN_4_0__SRCID__JPEG2_DECODE,
52 	VCN_4_0__SRCID__JPEG3_DECODE,
53 	VCN_4_0__SRCID__JPEG4_DECODE,
54 	VCN_4_0__SRCID__JPEG5_DECODE,
55 	VCN_4_0__SRCID__JPEG6_DECODE,
56 	VCN_4_0__SRCID__JPEG7_DECODE
57 };
58 
59 /**
60  * jpeg_v4_0_3_early_init - set function pointers
61  *
62  * @handle: amdgpu_device pointer
63  *
64  * Set ring and irq function pointers
65  */
66 static int jpeg_v4_0_3_early_init(void *handle)
67 {
68 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
69 
70 	adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
71 
72 	jpeg_v4_0_3_set_dec_ring_funcs(adev);
73 	jpeg_v4_0_3_set_irq_funcs(adev);
74 	jpeg_v4_0_3_set_ras_funcs(adev);
75 
76 	return 0;
77 }
78 
79 /**
80  * jpeg_v4_0_3_sw_init - sw init for JPEG block
81  *
82  * @handle: amdgpu_device pointer
83  *
84  * Load firmware and sw initialization
85  */
86 static int jpeg_v4_0_3_sw_init(void *handle)
87 {
88 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
89 	struct amdgpu_ring *ring;
90 	int i, j, r, jpeg_inst;
91 
92 	for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
93 		/* JPEG TRAP */
94 		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
95 				amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
96 		if (r)
97 			return r;
98 	}
99 
100 	r = amdgpu_jpeg_sw_init(adev);
101 	if (r)
102 		return r;
103 
104 	r = amdgpu_jpeg_resume(adev);
105 	if (r)
106 		return r;
107 
108 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
109 		jpeg_inst = GET_INST(JPEG, i);
110 
111 		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
112 			ring = &adev->jpeg.inst[i].ring_dec[j];
113 			ring->use_doorbell = true;
114 			ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
115 			if (!amdgpu_sriov_vf(adev)) {
116 				ring->doorbell_index =
117 					(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
118 					1 + j + 9 * jpeg_inst;
119 			} else {
120 				if (j < 4)
121 					ring->doorbell_index =
122 						(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
123 						4 + j + 32 * jpeg_inst;
124 				else
125 					ring->doorbell_index =
126 						(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
127 						8 + j + 32 * jpeg_inst;
128 			}
129 			sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
130 			r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
131 						AMDGPU_RING_PRIO_DEFAULT, NULL);
132 			if (r)
133 				return r;
134 
135 			adev->jpeg.internal.jpeg_pitch[j] =
136 				regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
137 			adev->jpeg.inst[i].external.jpeg_pitch[j] =
138 				SOC15_REG_OFFSET1(
139 					JPEG, jpeg_inst,
140 					regUVD_JRBC0_UVD_JRBC_SCRATCH0,
141 					(j ? (0x40 * j - 0xc80) : 0));
142 		}
143 	}
144 
145 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
146 		r = amdgpu_jpeg_ras_sw_init(adev);
147 		if (r) {
148 			dev_err(adev->dev, "Failed to initialize jpeg ras block!\n");
149 			return r;
150 		}
151 	}
152 
153 	return 0;
154 }
155 
156 /**
157  * jpeg_v4_0_3_sw_fini - sw fini for JPEG block
158  *
159  * @handle: amdgpu_device pointer
160  *
161  * JPEG suspend and free up sw allocation
162  */
163 static int jpeg_v4_0_3_sw_fini(void *handle)
164 {
165 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
166 	int r;
167 
168 	r = amdgpu_jpeg_suspend(adev);
169 	if (r)
170 		return r;
171 
172 	r = amdgpu_jpeg_sw_fini(adev);
173 
174 	return r;
175 }
176 
177 static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
178 {
179 	struct amdgpu_ring *ring;
180 	uint64_t ctx_addr;
181 	uint32_t param, resp, expected;
182 	uint32_t tmp, timeout;
183 
184 	struct amdgpu_mm_table *table = &adev->virt.mm_table;
185 	uint32_t *table_loc;
186 	uint32_t table_size;
187 	uint32_t size, size_dw, item_offset;
188 	uint32_t init_status;
189 	int i, j, jpeg_inst;
190 
191 	struct mmsch_v4_0_cmd_direct_write
192 		direct_wt = { {0} };
193 	struct mmsch_v4_0_cmd_end end = { {0} };
194 	struct mmsch_v4_0_3_init_header header;
195 
196 	direct_wt.cmd_header.command_type =
197 		MMSCH_COMMAND__DIRECT_REG_WRITE;
198 	end.cmd_header.command_type =
199 		MMSCH_COMMAND__END;
200 
201 	for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
202 		jpeg_inst = GET_INST(JPEG, i);
203 
204 		memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
205 		header.version = MMSCH_VERSION;
206 		header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
207 
208 		table_loc = (uint32_t *)table->cpu_addr;
209 		table_loc += header.total_size;
210 
211 		item_offset = header.total_size;
212 
213 		for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
214 			ring = &adev->jpeg.inst[i].ring_dec[j];
215 			table_size = 0;
216 
217 			tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW);
218 			MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr));
219 			tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
220 			MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr));
221 			tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE);
222 			MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
223 
224 			if (j <= 3) {
225 				header.mjpegdec0[j].table_offset = item_offset;
226 				header.mjpegdec0[j].init_status = 0;
227 				header.mjpegdec0[j].table_size = table_size;
228 			} else {
229 				header.mjpegdec1[j - 4].table_offset = item_offset;
230 				header.mjpegdec1[j - 4].init_status = 0;
231 				header.mjpegdec1[j - 4].table_size = table_size;
232 			}
233 			header.total_size += table_size;
234 			item_offset += table_size;
235 		}
236 
237 		MMSCH_V4_0_INSERT_END();
238 
239 		/* send init table to MMSCH */
240 		size = sizeof(struct mmsch_v4_0_3_init_header);
241 		table_loc = (uint32_t *)table->cpu_addr;
242 		memcpy((void *)table_loc, &header, size);
243 
244 		ctx_addr = table->gpu_addr;
245 		WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
246 		WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
247 
248 		tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
249 		tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
250 		tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
251 		WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
252 
253 		size = header.total_size;
254 		WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
255 
256 		WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
257 
258 		param = 0x00000001;
259 		WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
260 		tmp = 0;
261 		timeout = 1000;
262 		resp = 0;
263 		expected = MMSCH_VF_MAILBOX_RESP__OK;
264 		init_status =
265 			((struct mmsch_v4_0_3_init_header *)(table_loc))->mjpegdec0[i].init_status;
266 		while (resp != expected) {
267 			resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP);
268 
269 			if (resp != 0)
270 				break;
271 			udelay(10);
272 			tmp = tmp + 10;
273 			if (tmp >= timeout) {
274 				DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
275 					" waiting for regMMSCH_VF_MAILBOX_RESP "\
276 					"(expected=0x%08x, readback=0x%08x)\n",
277 					tmp, expected, resp);
278 				return -EBUSY;
279 			}
280 		}
281 		if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE &&
282 				init_status != MMSCH_VF_ENGINE_STATUS__PASS)
283 			DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n",
284 					resp, init_status);
285 
286 	}
287 	return 0;
288 }
289 
290 /**
291  * jpeg_v4_0_3_hw_init - start and test JPEG block
292  *
293  * @handle: amdgpu_device pointer
294  *
295  */
296 static int jpeg_v4_0_3_hw_init(void *handle)
297 {
298 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
299 	struct amdgpu_ring *ring;
300 	int i, j, r, jpeg_inst;
301 
302 	if (amdgpu_sriov_vf(adev)) {
303 		r = jpeg_v4_0_3_start_sriov(adev);
304 		if (r)
305 			return r;
306 
307 		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
308 			for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
309 				ring = &adev->jpeg.inst[i].ring_dec[j];
310 				ring->wptr = 0;
311 				ring->wptr_old = 0;
312 				jpeg_v4_0_3_dec_ring_set_wptr(ring);
313 				ring->sched.ready = true;
314 			}
315 		}
316 	} else {
317 		for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
318 			jpeg_inst = GET_INST(JPEG, i);
319 
320 			ring = adev->jpeg.inst[i].ring_dec;
321 
322 			if (ring->use_doorbell)
323 				adev->nbio.funcs->vcn_doorbell_range(
324 					adev, ring->use_doorbell,
325 					(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
326 						9 * jpeg_inst,
327 					adev->jpeg.inst[i].aid_id);
328 
329 			for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
330 				ring = &adev->jpeg.inst[i].ring_dec[j];
331 				if (ring->use_doorbell)
332 					WREG32_SOC15_OFFSET(
333 						VCN, GET_INST(VCN, i),
334 						regVCN_JPEG_DB_CTRL,
335 						(ring->pipe ? (ring->pipe - 0x15) : 0),
336 						ring->doorbell_index
337 							<< VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
338 							VCN_JPEG_DB_CTRL__EN_MASK);
339 				r = amdgpu_ring_test_helper(ring);
340 				if (r)
341 					return r;
342 			}
343 		}
344 	}
345 	DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
346 
347 	return 0;
348 }
349 
350 /**
351  * jpeg_v4_0_3_hw_fini - stop the hardware block
352  *
353  * @handle: amdgpu_device pointer
354  *
355  * Stop the JPEG block, mark ring as not ready any more
356  */
357 static int jpeg_v4_0_3_hw_fini(void *handle)
358 {
359 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
360 	int ret = 0;
361 
362 	cancel_delayed_work_sync(&adev->jpeg.idle_work);
363 
364 	if (!amdgpu_sriov_vf(adev)) {
365 		if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
366 			ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
367 	}
368 
369 	return ret;
370 }
371 
372 /**
373  * jpeg_v4_0_3_suspend - suspend JPEG block
374  *
375  * @handle: amdgpu_device pointer
376  *
377  * HW fini and suspend JPEG block
378  */
379 static int jpeg_v4_0_3_suspend(void *handle)
380 {
381 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
382 	int r;
383 
384 	r = jpeg_v4_0_3_hw_fini(adev);
385 	if (r)
386 		return r;
387 
388 	r = amdgpu_jpeg_suspend(adev);
389 
390 	return r;
391 }
392 
393 /**
394  * jpeg_v4_0_3_resume - resume JPEG block
395  *
396  * @handle: amdgpu_device pointer
397  *
398  * Resume firmware and hw init JPEG block
399  */
400 static int jpeg_v4_0_3_resume(void *handle)
401 {
402 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
403 	int r;
404 
405 	r = amdgpu_jpeg_resume(adev);
406 	if (r)
407 		return r;
408 
409 	r = jpeg_v4_0_3_hw_init(adev);
410 
411 	return r;
412 }
413 
414 static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
415 {
416 	int i, jpeg_inst;
417 	uint32_t data;
418 
419 	jpeg_inst = GET_INST(JPEG, inst_idx);
420 	data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL);
421 	if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
422 		data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
423 		data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1));
424 	} else {
425 		data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
426 	}
427 
428 	data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
429 	data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
430 	WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data);
431 
432 	data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE);
433 	data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK);
434 	for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i)
435 		data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i);
436 	WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data);
437 }
438 
439 static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
440 {
441 	int i, jpeg_inst;
442 	uint32_t data;
443 
444 	jpeg_inst = GET_INST(JPEG, inst_idx);
445 	data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL);
446 	if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
447 		data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
448 		data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1);
449 	} else {
450 		data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
451 	}
452 
453 	data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
454 	data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
455 	WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data);
456 
457 	data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE);
458 	data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK);
459 	for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i)
460 		data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i);
461 	WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data);
462 }
463 
464 /**
465  * jpeg_v4_0_3_start - start JPEG block
466  *
467  * @adev: amdgpu_device pointer
468  *
469  * Setup and start the JPEG block
470  */
471 static int jpeg_v4_0_3_start(struct amdgpu_device *adev)
472 {
473 	struct amdgpu_ring *ring;
474 	int i, j, jpeg_inst;
475 
476 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
477 		jpeg_inst = GET_INST(JPEG, i);
478 
479 		WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
480 			     1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
481 		SOC15_WAIT_ON_RREG(
482 			JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
483 			UVD_PGFSM_STATUS__UVDJ_PWR_ON
484 				<< UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
485 			UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
486 
487 		/* disable anti hang mechanism */
488 		WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
489 					  regUVD_JPEG_POWER_STATUS),
490 			 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
491 
492 		/* JPEG disable CGC */
493 		jpeg_v4_0_3_disable_clock_gating(adev, i);
494 
495 		/* MJPEG global tiling registers */
496 		WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG,
497 			     adev->gfx.config.gb_addr_config);
498 		WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG,
499 			     adev->gfx.config.gb_addr_config);
500 
501 		/* enable JMI channel */
502 		WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
503 			 ~UVD_JMI_CNTL__SOFT_RESET_MASK);
504 
505 		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
506 			unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
507 
508 			ring = &adev->jpeg.inst[i].ring_dec[j];
509 
510 			/* enable System Interrupt for JRBC */
511 			WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
512 						  regJPEG_SYS_INT_EN),
513 				 JPEG_SYS_INT_EN__DJRBC0_MASK << j,
514 				 ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j));
515 
516 			WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
517 					    regUVD_JMI0_UVD_LMI_JRBC_RB_VMID,
518 					    reg_offset, 0);
519 			WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
520 					    regUVD_JRBC0_UVD_JRBC_RB_CNTL,
521 					    reg_offset,
522 					    (0x00000001L | 0x00000002L));
523 			WREG32_SOC15_OFFSET(
524 				JPEG, jpeg_inst,
525 				regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW,
526 				reg_offset, lower_32_bits(ring->gpu_addr));
527 			WREG32_SOC15_OFFSET(
528 				JPEG, jpeg_inst,
529 				regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
530 				reg_offset, upper_32_bits(ring->gpu_addr));
531 			WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
532 					    regUVD_JRBC0_UVD_JRBC_RB_RPTR,
533 					    reg_offset, 0);
534 			WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
535 					    regUVD_JRBC0_UVD_JRBC_RB_WPTR,
536 					    reg_offset, 0);
537 			WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
538 					    regUVD_JRBC0_UVD_JRBC_RB_CNTL,
539 					    reg_offset, 0x00000002L);
540 			WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
541 					    regUVD_JRBC0_UVD_JRBC_RB_SIZE,
542 					    reg_offset, ring->ring_size / 4);
543 			ring->wptr = RREG32_SOC15_OFFSET(
544 				JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR,
545 				reg_offset);
546 		}
547 	}
548 
549 	return 0;
550 }
551 
552 /**
553  * jpeg_v4_0_3_stop - stop JPEG block
554  *
555  * @adev: amdgpu_device pointer
556  *
557  * stop the JPEG block
558  */
559 static int jpeg_v4_0_3_stop(struct amdgpu_device *adev)
560 {
561 	int i, jpeg_inst;
562 
563 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
564 		jpeg_inst = GET_INST(JPEG, i);
565 		/* reset JMI */
566 		WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
567 			 UVD_JMI_CNTL__SOFT_RESET_MASK,
568 			 ~UVD_JMI_CNTL__SOFT_RESET_MASK);
569 
570 		jpeg_v4_0_3_enable_clock_gating(adev, i);
571 
572 		/* enable anti hang mechanism */
573 		WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
574 					  regUVD_JPEG_POWER_STATUS),
575 			 UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
576 			 ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
577 
578 		WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
579 			     2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
580 		SOC15_WAIT_ON_RREG(
581 			JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
582 			UVD_PGFSM_STATUS__UVDJ_PWR_OFF
583 				<< UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
584 			UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
585 	}
586 
587 	return 0;
588 }
589 
590 /**
591  * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer
592  *
593  * @ring: amdgpu_ring pointer
594  *
595  * Returns the current hardware read pointer
596  */
597 static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring)
598 {
599 	struct amdgpu_device *adev = ring->adev;
600 
601 	return RREG32_SOC15_OFFSET(
602 		JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR,
603 		ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0);
604 }
605 
606 /**
607  * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer
608  *
609  * @ring: amdgpu_ring pointer
610  *
611  * Returns the current hardware write pointer
612  */
613 static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring)
614 {
615 	struct amdgpu_device *adev = ring->adev;
616 
617 	if (ring->use_doorbell)
618 		return adev->wb.wb[ring->wptr_offs];
619 	else
620 		return RREG32_SOC15_OFFSET(
621 			JPEG, GET_INST(JPEG, ring->me),
622 			regUVD_JRBC0_UVD_JRBC_RB_WPTR,
623 			ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0);
624 }
625 
626 /**
627  * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer
628  *
629  * @ring: amdgpu_ring pointer
630  *
631  * Commits the write pointer to the hardware
632  */
633 static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring)
634 {
635 	struct amdgpu_device *adev = ring->adev;
636 
637 	if (ring->use_doorbell) {
638 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
639 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
640 	} else {
641 		WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me),
642 				    regUVD_JRBC0_UVD_JRBC_RB_WPTR,
643 				    (ring->pipe ? (0x40 * ring->pipe - 0xc80) :
644 						  0),
645 				    lower_32_bits(ring->wptr));
646 	}
647 }
648 
649 /**
650  * jpeg_v4_0_3_dec_ring_insert_start - insert a start command
651  *
652  * @ring: amdgpu_ring pointer
653  *
654  * Write a start command to the ring.
655  */
656 static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
657 {
658 	amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
659 		0, 0, PACKETJ_TYPE0));
660 	amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
661 
662 	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
663 		0, 0, PACKETJ_TYPE0));
664 	amdgpu_ring_write(ring, 0x80004000);
665 }
666 
667 /**
668  * jpeg_v4_0_3_dec_ring_insert_end - insert a end command
669  *
670  * @ring: amdgpu_ring pointer
671  *
672  * Write a end command to the ring.
673  */
674 static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
675 {
676 	amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
677 		0, 0, PACKETJ_TYPE0));
678 	amdgpu_ring_write(ring, 0x62a04);
679 
680 	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
681 		0, 0, PACKETJ_TYPE0));
682 	amdgpu_ring_write(ring, 0x00004000);
683 }
684 
685 /**
686  * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command
687  *
688  * @ring: amdgpu_ring pointer
689  * @addr: address
690  * @seq: sequence number
691  * @flags: fence related flags
692  *
693  * Write a fence and a trap command to the ring.
694  */
695 static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
696 				unsigned int flags)
697 {
698 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
699 
700 	amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
701 		0, 0, PACKETJ_TYPE0));
702 	amdgpu_ring_write(ring, seq);
703 
704 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
705 		0, 0, PACKETJ_TYPE0));
706 	amdgpu_ring_write(ring, seq);
707 
708 	amdgpu_ring_write(ring,	PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
709 		0, 0, PACKETJ_TYPE0));
710 	amdgpu_ring_write(ring, lower_32_bits(addr));
711 
712 	amdgpu_ring_write(ring,	PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
713 		0, 0, PACKETJ_TYPE0));
714 	amdgpu_ring_write(ring, upper_32_bits(addr));
715 
716 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
717 		0, 0, PACKETJ_TYPE0));
718 	amdgpu_ring_write(ring, 0x8);
719 
720 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
721 		0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
722 	amdgpu_ring_write(ring, 0);
723 
724 	if (ring->adev->jpeg.inst[ring->me].aid_id) {
725 		amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
726 			0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
727 		amdgpu_ring_write(ring, 0x4);
728 	} else {
729 		amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
730 		amdgpu_ring_write(ring, 0);
731 	}
732 
733 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
734 		0, 0, PACKETJ_TYPE0));
735 	amdgpu_ring_write(ring, 0x3fbc);
736 
737 	if (ring->adev->jpeg.inst[ring->me].aid_id) {
738 		amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
739 			0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
740 		amdgpu_ring_write(ring, 0x0);
741 	} else {
742 		amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
743 		amdgpu_ring_write(ring, 0);
744 	}
745 
746 	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
747 		0, 0, PACKETJ_TYPE0));
748 	amdgpu_ring_write(ring, 0x1);
749 
750 	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
751 	amdgpu_ring_write(ring, 0);
752 }
753 
754 /**
755  * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer
756  *
757  * @ring: amdgpu_ring pointer
758  * @job: job to retrieve vmid from
759  * @ib: indirect buffer to execute
760  * @flags: unused
761  *
762  * Write ring commands to execute the indirect buffer.
763  */
764 static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
765 				struct amdgpu_job *job,
766 				struct amdgpu_ib *ib,
767 				uint32_t flags)
768 {
769 	unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
770 
771 	amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
772 		0, 0, PACKETJ_TYPE0));
773 
774 	if (ring->funcs->parse_cs)
775 		amdgpu_ring_write(ring, 0);
776 	else
777 		amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
778 
779 	amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
780 		0, 0, PACKETJ_TYPE0));
781 	amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
782 
783 	amdgpu_ring_write(ring,	PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
784 		0, 0, PACKETJ_TYPE0));
785 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
786 
787 	amdgpu_ring_write(ring,	PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
788 		0, 0, PACKETJ_TYPE0));
789 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
790 
791 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
792 		0, 0, PACKETJ_TYPE0));
793 	amdgpu_ring_write(ring, ib->length_dw);
794 
795 	amdgpu_ring_write(ring,	PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
796 		0, 0, PACKETJ_TYPE0));
797 	amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
798 
799 	amdgpu_ring_write(ring,	PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
800 		0, 0, PACKETJ_TYPE0));
801 	amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
802 
803 	amdgpu_ring_write(ring,	PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
804 	amdgpu_ring_write(ring, 0);
805 
806 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
807 		0, 0, PACKETJ_TYPE0));
808 	amdgpu_ring_write(ring, 0x01400200);
809 
810 	amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
811 		0, 0, PACKETJ_TYPE0));
812 	amdgpu_ring_write(ring, 0x2);
813 
814 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET,
815 		0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
816 	amdgpu_ring_write(ring, 0x2);
817 }
818 
819 static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
820 				uint32_t val, uint32_t mask)
821 {
822 	uint32_t reg_offset = (reg << 2);
823 
824 	amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
825 		0, 0, PACKETJ_TYPE0));
826 	amdgpu_ring_write(ring, 0x01400200);
827 
828 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
829 		0, 0, PACKETJ_TYPE0));
830 	amdgpu_ring_write(ring, val);
831 
832 	amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
833 		0, 0, PACKETJ_TYPE0));
834 	if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
835 		amdgpu_ring_write(ring, 0);
836 		amdgpu_ring_write(ring,
837 			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
838 	} else {
839 		amdgpu_ring_write(ring, reg_offset);
840 		amdgpu_ring_write(ring,	PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
841 			0, 0, PACKETJ_TYPE3));
842 	}
843 	amdgpu_ring_write(ring, mask);
844 }
845 
846 static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
847 				unsigned int vmid, uint64_t pd_addr)
848 {
849 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
850 	uint32_t data0, data1, mask;
851 
852 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
853 
854 	/* wait for register write */
855 	data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
856 	data1 = lower_32_bits(pd_addr);
857 	mask = 0xffffffff;
858 	jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask);
859 }
860 
861 static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
862 {
863 	uint32_t reg_offset = (reg << 2);
864 
865 	amdgpu_ring_write(ring,	PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
866 		0, 0, PACKETJ_TYPE0));
867 	if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
868 		amdgpu_ring_write(ring, 0);
869 		amdgpu_ring_write(ring,
870 			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
871 	} else {
872 		amdgpu_ring_write(ring, reg_offset);
873 		amdgpu_ring_write(ring,	PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
874 			0, 0, PACKETJ_TYPE0));
875 	}
876 	amdgpu_ring_write(ring, val);
877 }
878 
879 static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
880 {
881 	int i;
882 
883 	WARN_ON(ring->wptr % 2 || count % 2);
884 
885 	for (i = 0; i < count / 2; i++) {
886 		amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
887 		amdgpu_ring_write(ring, 0);
888 	}
889 }
890 
891 static bool jpeg_v4_0_3_is_idle(void *handle)
892 {
893 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
894 	bool ret = false;
895 	int i, j;
896 
897 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
898 		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
899 			unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
900 
901 			ret &= ((RREG32_SOC15_OFFSET(
902 					 JPEG, GET_INST(JPEG, i),
903 					 regUVD_JRBC0_UVD_JRBC_STATUS,
904 					 reg_offset) &
905 				 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
906 				UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
907 		}
908 	}
909 
910 	return ret;
911 }
912 
913 static int jpeg_v4_0_3_wait_for_idle(void *handle)
914 {
915 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
916 	int ret = 0;
917 	int i, j;
918 
919 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
920 		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
921 			unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
922 
923 			ret &= SOC15_WAIT_ON_RREG_OFFSET(
924 				JPEG, GET_INST(JPEG, i),
925 				regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset,
926 				UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
927 				UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
928 		}
929 	}
930 	return ret;
931 }
932 
933 static int jpeg_v4_0_3_set_clockgating_state(void *handle,
934 					  enum amd_clockgating_state state)
935 {
936 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
937 	bool enable = state == AMD_CG_STATE_GATE;
938 	int i;
939 
940 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
941 		if (enable) {
942 			if (!jpeg_v4_0_3_is_idle(handle))
943 				return -EBUSY;
944 			jpeg_v4_0_3_enable_clock_gating(adev, i);
945 		} else {
946 			jpeg_v4_0_3_disable_clock_gating(adev, i);
947 		}
948 	}
949 	return 0;
950 }
951 
952 static int jpeg_v4_0_3_set_powergating_state(void *handle,
953 					  enum amd_powergating_state state)
954 {
955 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
956 	int ret;
957 
958 	if (state == adev->jpeg.cur_state)
959 		return 0;
960 
961 	if (state == AMD_PG_STATE_GATE)
962 		ret = jpeg_v4_0_3_stop(adev);
963 	else
964 		ret = jpeg_v4_0_3_start(adev);
965 
966 	if (!ret)
967 		adev->jpeg.cur_state = state;
968 
969 	return ret;
970 }
971 
972 static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
973 					struct amdgpu_irq_src *source,
974 					unsigned int type,
975 					enum amdgpu_interrupt_state state)
976 {
977 	return 0;
978 }
979 
980 static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
981 				      struct amdgpu_irq_src *source,
982 				      struct amdgpu_iv_entry *entry)
983 {
984 	uint32_t i, inst;
985 
986 	i = node_id_to_phys_map[entry->node_id];
987 	DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n");
988 
989 	for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst)
990 		if (adev->jpeg.inst[inst].aid_id == i)
991 			break;
992 
993 	if (inst >= adev->jpeg.num_jpeg_inst) {
994 		dev_WARN_ONCE(adev->dev, 1,
995 			      "Interrupt received for unknown JPEG instance %d",
996 			      entry->node_id);
997 		return 0;
998 	}
999 
1000 	switch (entry->src_id) {
1001 	case VCN_4_0__SRCID__JPEG_DECODE:
1002 		amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]);
1003 		break;
1004 	case VCN_4_0__SRCID__JPEG1_DECODE:
1005 		amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]);
1006 		break;
1007 	case VCN_4_0__SRCID__JPEG2_DECODE:
1008 		amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]);
1009 		break;
1010 	case VCN_4_0__SRCID__JPEG3_DECODE:
1011 		amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
1012 		break;
1013 	case VCN_4_0__SRCID__JPEG4_DECODE:
1014 		amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
1015 		break;
1016 	case VCN_4_0__SRCID__JPEG5_DECODE:
1017 		amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
1018 		break;
1019 	case VCN_4_0__SRCID__JPEG6_DECODE:
1020 		amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
1021 		break;
1022 	case VCN_4_0__SRCID__JPEG7_DECODE:
1023 		amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
1024 		break;
1025 	default:
1026 		DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
1027 			  entry->src_id, entry->src_data[0]);
1028 		break;
1029 	}
1030 
1031 	return 0;
1032 }
1033 
1034 static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
1035 	.name = "jpeg_v4_0_3",
1036 	.early_init = jpeg_v4_0_3_early_init,
1037 	.late_init = NULL,
1038 	.sw_init = jpeg_v4_0_3_sw_init,
1039 	.sw_fini = jpeg_v4_0_3_sw_fini,
1040 	.hw_init = jpeg_v4_0_3_hw_init,
1041 	.hw_fini = jpeg_v4_0_3_hw_fini,
1042 	.suspend = jpeg_v4_0_3_suspend,
1043 	.resume = jpeg_v4_0_3_resume,
1044 	.is_idle = jpeg_v4_0_3_is_idle,
1045 	.wait_for_idle = jpeg_v4_0_3_wait_for_idle,
1046 	.check_soft_reset = NULL,
1047 	.pre_soft_reset = NULL,
1048 	.soft_reset = NULL,
1049 	.post_soft_reset = NULL,
1050 	.set_clockgating_state = jpeg_v4_0_3_set_clockgating_state,
1051 	.set_powergating_state = jpeg_v4_0_3_set_powergating_state,
1052 };
1053 
1054 static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
1055 	.type = AMDGPU_RING_TYPE_VCN_JPEG,
1056 	.align_mask = 0xf,
1057 	.get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
1058 	.get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
1059 	.set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
1060 	.parse_cs = jpeg_v4_0_3_dec_ring_parse_cs,
1061 	.emit_frame_size =
1062 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
1063 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
1064 		8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */
1065 		22 + 22 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */
1066 		8 + 16,
1067 	.emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */
1068 	.emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
1069 	.emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
1070 	.emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
1071 	.test_ring = amdgpu_jpeg_dec_ring_test_ring,
1072 	.test_ib = amdgpu_jpeg_dec_ring_test_ib,
1073 	.insert_nop = jpeg_v4_0_3_dec_ring_nop,
1074 	.insert_start = jpeg_v4_0_3_dec_ring_insert_start,
1075 	.insert_end = jpeg_v4_0_3_dec_ring_insert_end,
1076 	.pad_ib = amdgpu_ring_generic_pad_ib,
1077 	.begin_use = amdgpu_jpeg_ring_begin_use,
1078 	.end_use = amdgpu_jpeg_ring_end_use,
1079 	.emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
1080 	.emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
1081 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1082 };
1083 
1084 static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev)
1085 {
1086 	int i, j, jpeg_inst;
1087 
1088 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
1089 		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
1090 			adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs;
1091 			adev->jpeg.inst[i].ring_dec[j].me = i;
1092 			adev->jpeg.inst[i].ring_dec[j].pipe = j;
1093 		}
1094 		jpeg_inst = GET_INST(JPEG, i);
1095 		adev->jpeg.inst[i].aid_id =
1096 			jpeg_inst / adev->jpeg.num_inst_per_aid;
1097 	}
1098 	DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n");
1099 }
1100 
1101 static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = {
1102 	.set = jpeg_v4_0_3_set_interrupt_state,
1103 	.process = jpeg_v4_0_3_process_interrupt,
1104 };
1105 
1106 static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
1107 {
1108 	int i;
1109 
1110 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
1111 		adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
1112 	}
1113 	adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs;
1114 }
1115 
1116 const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = {
1117 	.type = AMD_IP_BLOCK_TYPE_JPEG,
1118 	.major = 4,
1119 	.minor = 0,
1120 	.rev = 3,
1121 	.funcs = &jpeg_v4_0_3_ip_funcs,
1122 };
1123 
1124 static const struct amdgpu_ras_err_status_reg_entry jpeg_v4_0_3_ue_reg_list[] = {
1125 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0S, regVCN_UE_ERR_STATUS_HI_JPEG0S),
1126 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0S"},
1127 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0D, regVCN_UE_ERR_STATUS_HI_JPEG0D),
1128 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0D"},
1129 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1S, regVCN_UE_ERR_STATUS_HI_JPEG1S),
1130 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1S"},
1131 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1D, regVCN_UE_ERR_STATUS_HI_JPEG1D),
1132 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1D"},
1133 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2S, regVCN_UE_ERR_STATUS_HI_JPEG2S),
1134 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2S"},
1135 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2D, regVCN_UE_ERR_STATUS_HI_JPEG2D),
1136 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2D"},
1137 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3S, regVCN_UE_ERR_STATUS_HI_JPEG3S),
1138 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3S"},
1139 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3D, regVCN_UE_ERR_STATUS_HI_JPEG3D),
1140 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3D"},
1141 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4S, regVCN_UE_ERR_STATUS_HI_JPEG4S),
1142 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4S"},
1143 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4D, regVCN_UE_ERR_STATUS_HI_JPEG4D),
1144 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4D"},
1145 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5S, regVCN_UE_ERR_STATUS_HI_JPEG5S),
1146 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5S"},
1147 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5D, regVCN_UE_ERR_STATUS_HI_JPEG5D),
1148 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5D"},
1149 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6S, regVCN_UE_ERR_STATUS_HI_JPEG6S),
1150 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6S"},
1151 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6D, regVCN_UE_ERR_STATUS_HI_JPEG6D),
1152 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6D"},
1153 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7S, regVCN_UE_ERR_STATUS_HI_JPEG7S),
1154 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7S"},
1155 	{AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7D, regVCN_UE_ERR_STATUS_HI_JPEG7D),
1156 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7D"},
1157 };
1158 
1159 static void jpeg_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
1160 						   uint32_t jpeg_inst,
1161 						   void *ras_err_status)
1162 {
1163 	struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
1164 
1165 	/* jpeg v4_0_3 only support uncorrectable errors */
1166 	amdgpu_ras_inst_query_ras_error_count(adev,
1167 			jpeg_v4_0_3_ue_reg_list,
1168 			ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list),
1169 			NULL, 0, GET_INST(VCN, jpeg_inst),
1170 			AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
1171 			&err_data->ue_count);
1172 }
1173 
1174 static void jpeg_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
1175 					      void *ras_err_status)
1176 {
1177 	uint32_t i;
1178 
1179 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
1180 		dev_warn(adev->dev, "JPEG RAS is not supported\n");
1181 		return;
1182 	}
1183 
1184 	for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
1185 		jpeg_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
1186 }
1187 
1188 static void jpeg_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
1189 						   uint32_t jpeg_inst)
1190 {
1191 	amdgpu_ras_inst_reset_ras_error_count(adev,
1192 			jpeg_v4_0_3_ue_reg_list,
1193 			ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list),
1194 			GET_INST(VCN, jpeg_inst));
1195 }
1196 
1197 static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
1198 {
1199 	uint32_t i;
1200 
1201 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
1202 		dev_warn(adev->dev, "JPEG RAS is not supported\n");
1203 		return;
1204 	}
1205 
1206 	for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
1207 		jpeg_v4_0_3_inst_reset_ras_error_count(adev, i);
1208 }
1209 
1210 static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = {
1211 	.query_ras_error_count = jpeg_v4_0_3_query_ras_error_count,
1212 	.reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count,
1213 };
1214 
1215 static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = {
1216 	.ras_block = {
1217 		.hw_ops = &jpeg_v4_0_3_ras_hw_ops,
1218 	},
1219 };
1220 
1221 static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
1222 {
1223 	adev->jpeg.ras = &jpeg_v4_0_3_ras;
1224 }
1225 
1226 /**
1227  * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser
1228  *
1229  * @parser: Command submission parser context
1230  * @job: the job to parse
1231  * @ib: the IB to parse
1232  *
1233  * Parse the command stream, return -EINVAL for invalid packet,
1234  * 0 otherwise
1235  */
1236 int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
1237 			     struct amdgpu_job *job,
1238 			     struct amdgpu_ib *ib)
1239 {
1240 	uint32_t i, reg, res, cond, type;
1241 	struct amdgpu_device *adev = parser->adev;
1242 
1243 	for (i = 0; i < ib->length_dw ; i += 2) {
1244 		reg  = CP_PACKETJ_GET_REG(ib->ptr[i]);
1245 		res  = CP_PACKETJ_GET_RES(ib->ptr[i]);
1246 		cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
1247 		type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
1248 
1249 		if (res) /* only support 0 at the moment */
1250 			return -EINVAL;
1251 
1252 		switch (type) {
1253 		case PACKETJ_TYPE0:
1254 			if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) {
1255 				dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
1256 				return -EINVAL;
1257 			}
1258 			break;
1259 		case PACKETJ_TYPE3:
1260 			if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) {
1261 				dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
1262 				return -EINVAL;
1263 			}
1264 			break;
1265 		case PACKETJ_TYPE6:
1266 			if (ib->ptr[i] == CP_PACKETJ_NOP)
1267 				continue;
1268 			dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
1269 			return -EINVAL;
1270 		default:
1271 			dev_err(adev->dev, "Unknown packet type %d !\n", type);
1272 			return -EINVAL;
1273 		}
1274 	}
1275 
1276 	return 0;
1277 }
1278