1 /*
2 * Copyright 2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include "amdgpu.h"
25 #include "amdgpu_jpeg.h"
26 #include "amdgpu_cs.h"
27 #include "soc15.h"
28 #include "soc15d.h"
29 #include "vcn_v1_0.h"
30 #include "jpeg_v1_0.h"
31
32 #include "vcn/vcn_1_0_offset.h"
33 #include "vcn/vcn_1_0_sh_mask.h"
34
35 static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
36 static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
37 static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring);
38 static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
39 struct amdgpu_job *job,
40 struct amdgpu_ib *ib);
41
jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring * ring,uint32_t * ptr,uint32_t reg_offset,uint32_t val)42 static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
43 {
44 struct amdgpu_device *adev = ring->adev;
45 ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
46 if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
47 ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
48 ring->ring[(*ptr)++] = 0;
49 ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0);
50 } else {
51 ring->ring[(*ptr)++] = reg_offset;
52 ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
53 }
54 ring->ring[(*ptr)++] = val;
55 }
56
jpeg_v1_0_decode_ring_set_patch_ring(struct amdgpu_ring * ring,uint32_t ptr)57 static void jpeg_v1_0_decode_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr)
58 {
59 struct amdgpu_device *adev = ring->adev;
60
61 uint32_t reg, reg_offset, val, mask, i;
62
63 // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
64 reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
65 reg_offset = (reg << 2);
66 val = lower_32_bits(ring->gpu_addr);
67 jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
68
69 // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
70 reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
71 reg_offset = (reg << 2);
72 val = upper_32_bits(ring->gpu_addr);
73 jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
74
75 // 3rd to 5th: issue MEM_READ commands
76 for (i = 0; i <= 2; i++) {
77 ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
78 ring->ring[ptr++] = 0;
79 }
80
81 // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability
82 reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
83 reg_offset = (reg << 2);
84 val = 0x13;
85 jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
86
87 // 7th: program mmUVD_JRBC_RB_REF_DATA
88 reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA);
89 reg_offset = (reg << 2);
90 val = 0x1;
91 jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
92
93 // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
94 reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
95 reg_offset = (reg << 2);
96 val = 0x1;
97 mask = 0x1;
98
99 ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
100 ring->ring[ptr++] = 0x01400200;
101 ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
102 ring->ring[ptr++] = val;
103 ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
104 if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
105 ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
106 ring->ring[ptr++] = 0;
107 ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3);
108 } else {
109 ring->ring[ptr++] = reg_offset;
110 ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
111 }
112 ring->ring[ptr++] = mask;
113
114 //9th to 21st: insert no-op
115 for (i = 0; i <= 12; i++) {
116 ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
117 ring->ring[ptr++] = 0;
118 }
119
120 //22nd: reset mmUVD_JRBC_RB_RPTR
121 reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_RPTR);
122 reg_offset = (reg << 2);
123 val = 0;
124 jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
125
126 //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
127 reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
128 reg_offset = (reg << 2);
129 val = 0x12;
130 jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
131 }
132
133 /**
134 * jpeg_v1_0_decode_ring_get_rptr - get read pointer
135 *
136 * @ring: amdgpu_ring pointer
137 *
138 * Returns the current hardware read pointer
139 */
jpeg_v1_0_decode_ring_get_rptr(struct amdgpu_ring * ring)140 static uint64_t jpeg_v1_0_decode_ring_get_rptr(struct amdgpu_ring *ring)
141 {
142 struct amdgpu_device *adev = ring->adev;
143
144 return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
145 }
146
147 /**
148 * jpeg_v1_0_decode_ring_get_wptr - get write pointer
149 *
150 * @ring: amdgpu_ring pointer
151 *
152 * Returns the current hardware write pointer
153 */
jpeg_v1_0_decode_ring_get_wptr(struct amdgpu_ring * ring)154 static uint64_t jpeg_v1_0_decode_ring_get_wptr(struct amdgpu_ring *ring)
155 {
156 struct amdgpu_device *adev = ring->adev;
157
158 return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
159 }
160
161 /**
162 * jpeg_v1_0_decode_ring_set_wptr - set write pointer
163 *
164 * @ring: amdgpu_ring pointer
165 *
166 * Commits the write pointer to the hardware
167 */
jpeg_v1_0_decode_ring_set_wptr(struct amdgpu_ring * ring)168 static void jpeg_v1_0_decode_ring_set_wptr(struct amdgpu_ring *ring)
169 {
170 struct amdgpu_device *adev = ring->adev;
171
172 WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
173 }
174
175 /**
176 * jpeg_v1_0_decode_ring_insert_start - insert a start command
177 *
178 * @ring: amdgpu_ring pointer
179 *
180 * Write a start command to the ring.
181 */
jpeg_v1_0_decode_ring_insert_start(struct amdgpu_ring * ring)182 static void jpeg_v1_0_decode_ring_insert_start(struct amdgpu_ring *ring)
183 {
184 struct amdgpu_device *adev = ring->adev;
185
186 amdgpu_ring_write(ring,
187 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
188 amdgpu_ring_write(ring, 0x68e04);
189
190 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
191 amdgpu_ring_write(ring, 0x80010000);
192 }
193
194 /**
195 * jpeg_v1_0_decode_ring_insert_end - insert a end command
196 *
197 * @ring: amdgpu_ring pointer
198 *
199 * Write a end command to the ring.
200 */
jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring * ring)201 static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring)
202 {
203 struct amdgpu_device *adev = ring->adev;
204
205 amdgpu_ring_write(ring,
206 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
207 amdgpu_ring_write(ring, 0x68e04);
208
209 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
210 amdgpu_ring_write(ring, 0x00010000);
211 }
212
213 /**
214 * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command
215 *
216 * @ring: amdgpu_ring pointer
217 * @addr: address
218 * @seq: sequence number
219 * @flags: fence related flags
220 *
221 * Write a fence and a trap command to the ring.
222 */
jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)223 static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
224 unsigned flags)
225 {
226 struct amdgpu_device *adev = ring->adev;
227
228 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
229
230 amdgpu_ring_write(ring,
231 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0));
232 amdgpu_ring_write(ring, seq);
233
234 amdgpu_ring_write(ring,
235 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0));
236 amdgpu_ring_write(ring, seq);
237
238 amdgpu_ring_write(ring,
239 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
240 amdgpu_ring_write(ring, lower_32_bits(addr));
241
242 amdgpu_ring_write(ring,
243 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
244 amdgpu_ring_write(ring, upper_32_bits(addr));
245
246 amdgpu_ring_write(ring,
247 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0));
248 amdgpu_ring_write(ring, 0x8);
249
250 amdgpu_ring_write(ring,
251 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
252 amdgpu_ring_write(ring, 0);
253
254 amdgpu_ring_write(ring,
255 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
256 amdgpu_ring_write(ring, 0x01400200);
257
258 amdgpu_ring_write(ring,
259 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
260 amdgpu_ring_write(ring, seq);
261
262 amdgpu_ring_write(ring,
263 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
264 amdgpu_ring_write(ring, lower_32_bits(addr));
265
266 amdgpu_ring_write(ring,
267 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
268 amdgpu_ring_write(ring, upper_32_bits(addr));
269
270 amdgpu_ring_write(ring,
271 PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2));
272 amdgpu_ring_write(ring, 0xffffffff);
273
274 amdgpu_ring_write(ring,
275 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
276 amdgpu_ring_write(ring, 0x3fbc);
277
278 amdgpu_ring_write(ring,
279 PACKETJ(0, 0, 0, PACKETJ_TYPE0));
280 amdgpu_ring_write(ring, 0x1);
281
282 /* emit trap */
283 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
284 amdgpu_ring_write(ring, 0);
285 }
286
287 /**
288 * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer
289 *
290 * @ring: amdgpu_ring pointer
291 * @job: job to retrieve vmid from
292 * @ib: indirect buffer to execute
293 * @flags: unused
294 *
295 * Write ring commands to execute the indirect buffer.
296 */
jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)297 static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring,
298 struct amdgpu_job *job,
299 struct amdgpu_ib *ib,
300 uint32_t flags)
301 {
302 struct amdgpu_device *adev = ring->adev;
303 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
304
305 amdgpu_ring_write(ring,
306 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
307 if (ring->funcs->parse_cs)
308 amdgpu_ring_write(ring, 0);
309 else
310 amdgpu_ring_write(ring, (vmid | (vmid << 4)));
311
312 amdgpu_ring_write(ring,
313 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
314 amdgpu_ring_write(ring, (vmid | (vmid << 4)));
315
316 amdgpu_ring_write(ring,
317 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
318 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
319
320 amdgpu_ring_write(ring,
321 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
322 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
323
324 amdgpu_ring_write(ring,
325 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0));
326 amdgpu_ring_write(ring, ib->length_dw);
327
328 amdgpu_ring_write(ring,
329 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
330 amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
331
332 amdgpu_ring_write(ring,
333 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
334 amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
335
336 amdgpu_ring_write(ring,
337 PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
338 amdgpu_ring_write(ring, 0);
339
340 amdgpu_ring_write(ring,
341 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
342 amdgpu_ring_write(ring, 0x01400200);
343
344 amdgpu_ring_write(ring,
345 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
346 amdgpu_ring_write(ring, 0x2);
347
348 amdgpu_ring_write(ring,
349 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
350 amdgpu_ring_write(ring, 0x2);
351 }
352
jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)353 static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring,
354 uint32_t reg, uint32_t val,
355 uint32_t mask)
356 {
357 struct amdgpu_device *adev = ring->adev;
358 uint32_t reg_offset = (reg << 2);
359
360 amdgpu_ring_write(ring,
361 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
362 amdgpu_ring_write(ring, 0x01400200);
363
364 amdgpu_ring_write(ring,
365 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
366 amdgpu_ring_write(ring, val);
367
368 amdgpu_ring_write(ring,
369 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
370 if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
371 ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
372 amdgpu_ring_write(ring, 0);
373 amdgpu_ring_write(ring,
374 PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
375 } else {
376 amdgpu_ring_write(ring, reg_offset);
377 amdgpu_ring_write(ring,
378 PACKETJ(0, 0, 0, PACKETJ_TYPE3));
379 }
380 amdgpu_ring_write(ring, mask);
381 }
382
jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)383 static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring,
384 unsigned vmid, uint64_t pd_addr)
385 {
386 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
387 uint32_t data0, data1, mask;
388
389 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
390
391 /* wait for register write */
392 data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
393 data1 = lower_32_bits(pd_addr);
394 mask = 0xffffffff;
395 jpeg_v1_0_decode_ring_emit_reg_wait(ring, data0, data1, mask);
396 }
397
jpeg_v1_0_decode_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)398 static void jpeg_v1_0_decode_ring_emit_wreg(struct amdgpu_ring *ring,
399 uint32_t reg, uint32_t val)
400 {
401 struct amdgpu_device *adev = ring->adev;
402 uint32_t reg_offset = (reg << 2);
403
404 amdgpu_ring_write(ring,
405 PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
406 if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
407 ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
408 amdgpu_ring_write(ring, 0);
409 amdgpu_ring_write(ring,
410 PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
411 } else {
412 amdgpu_ring_write(ring, reg_offset);
413 amdgpu_ring_write(ring,
414 PACKETJ(0, 0, 0, PACKETJ_TYPE0));
415 }
416 amdgpu_ring_write(ring, val);
417 }
418
jpeg_v1_0_decode_ring_nop(struct amdgpu_ring * ring,uint32_t count)419 static void jpeg_v1_0_decode_ring_nop(struct amdgpu_ring *ring, uint32_t count)
420 {
421 int i;
422
423 WARN_ON(ring->wptr % 2 || count % 2);
424
425 for (i = 0; i < count / 2; i++) {
426 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
427 amdgpu_ring_write(ring, 0);
428 }
429 }
430
jpeg_v1_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)431 static int jpeg_v1_0_set_interrupt_state(struct amdgpu_device *adev,
432 struct amdgpu_irq_src *source,
433 unsigned type,
434 enum amdgpu_interrupt_state state)
435 {
436 return 0;
437 }
438
jpeg_v1_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)439 static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
440 struct amdgpu_irq_src *source,
441 struct amdgpu_iv_entry *entry)
442 {
443 DRM_DEBUG("IH: JPEG decode TRAP\n");
444
445 switch (entry->src_id) {
446 case 126:
447 amdgpu_fence_process(adev->jpeg.inst->ring_dec);
448 break;
449 default:
450 DRM_ERROR("Unhandled interrupt: %d %d\n",
451 entry->src_id, entry->src_data[0]);
452 break;
453 }
454
455 return 0;
456 }
457
458 /**
459 * jpeg_v1_0_early_init - set function pointers
460 *
461 * @handle: amdgpu_device pointer
462 *
463 * Set ring and irq function pointers
464 */
jpeg_v1_0_early_init(void * handle)465 int jpeg_v1_0_early_init(void *handle)
466 {
467 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
468
469 adev->jpeg.num_jpeg_inst = 1;
470 adev->jpeg.num_jpeg_rings = 1;
471
472 jpeg_v1_0_set_dec_ring_funcs(adev);
473 jpeg_v1_0_set_irq_funcs(adev);
474
475 return 0;
476 }
477
478 /**
479 * jpeg_v1_0_sw_init - sw init for JPEG block
480 *
481 * @handle: amdgpu_device pointer
482 *
483 */
jpeg_v1_0_sw_init(void * handle)484 int jpeg_v1_0_sw_init(void *handle)
485 {
486 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
487 struct amdgpu_ring *ring;
488 int r;
489
490 /* JPEG TRAP */
491 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->jpeg.inst->irq);
492 if (r)
493 return r;
494
495 ring = adev->jpeg.inst->ring_dec;
496 ring->vm_hub = AMDGPU_MMHUB0(0);
497 sprintf(ring->name, "jpeg_dec");
498 r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq,
499 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
500 if (r)
501 return r;
502
503 adev->jpeg.internal.jpeg_pitch[0] = adev->jpeg.inst->external.jpeg_pitch[0] =
504 SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
505
506 return 0;
507 }
508
509 /**
510 * jpeg_v1_0_sw_fini - sw fini for JPEG block
511 *
512 * @handle: amdgpu_device pointer
513 *
514 * JPEG free up sw allocation
515 */
jpeg_v1_0_sw_fini(void * handle)516 void jpeg_v1_0_sw_fini(void *handle)
517 {
518 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
519
520 amdgpu_ring_fini(adev->jpeg.inst->ring_dec);
521 }
522
523 /**
524 * jpeg_v1_0_start - start JPEG block
525 *
526 * @adev: amdgpu_device pointer
527 * @mode: SPG or DPG mode
528 *
529 * Setup and start the JPEG block
530 */
jpeg_v1_0_start(struct amdgpu_device * adev,int mode)531 void jpeg_v1_0_start(struct amdgpu_device *adev, int mode)
532 {
533 struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
534
535 if (mode == 0) {
536 WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
537 WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
538 UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
539 WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr));
540 WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr));
541 WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
542 WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
543 WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
544 }
545
546 /* initialize wptr */
547 ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
548
549 /* copy patch commands to the jpeg ring */
550 jpeg_v1_0_decode_ring_set_patch_ring(ring,
551 (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
552 }
553
554 static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
555 .type = AMDGPU_RING_TYPE_VCN_JPEG,
556 .align_mask = 0xf,
557 .nop = PACKET0(0x81ff, 0),
558 .support_64bit_ptrs = false,
559 .no_user_fence = true,
560 .extra_dw = 64,
561 .get_rptr = jpeg_v1_0_decode_ring_get_rptr,
562 .get_wptr = jpeg_v1_0_decode_ring_get_wptr,
563 .set_wptr = jpeg_v1_0_decode_ring_set_wptr,
564 .parse_cs = jpeg_v1_dec_ring_parse_cs,
565 .emit_frame_size =
566 6 + 6 + /* hdp invalidate / flush */
567 SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
568 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
569 8 + /* jpeg_v1_0_decode_ring_emit_vm_flush */
570 26 + 26 + /* jpeg_v1_0_decode_ring_emit_fence x2 vm fence */
571 6,
572 .emit_ib_size = 22, /* jpeg_v1_0_decode_ring_emit_ib */
573 .emit_ib = jpeg_v1_0_decode_ring_emit_ib,
574 .emit_fence = jpeg_v1_0_decode_ring_emit_fence,
575 .emit_vm_flush = jpeg_v1_0_decode_ring_emit_vm_flush,
576 .test_ring = amdgpu_jpeg_dec_ring_test_ring,
577 .test_ib = amdgpu_jpeg_dec_ring_test_ib,
578 .insert_nop = jpeg_v1_0_decode_ring_nop,
579 .insert_start = jpeg_v1_0_decode_ring_insert_start,
580 .insert_end = jpeg_v1_0_decode_ring_insert_end,
581 .pad_ib = amdgpu_ring_generic_pad_ib,
582 .begin_use = jpeg_v1_0_ring_begin_use,
583 .end_use = vcn_v1_0_ring_end_use,
584 .emit_wreg = jpeg_v1_0_decode_ring_emit_wreg,
585 .emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait,
586 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
587 };
588
jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device * adev)589 static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
590 {
591 adev->jpeg.inst->ring_dec->funcs = &jpeg_v1_0_decode_ring_vm_funcs;
592 DRM_INFO("JPEG decode is enabled in VM mode\n");
593 }
594
595 static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = {
596 .set = jpeg_v1_0_set_interrupt_state,
597 .process = jpeg_v1_0_process_interrupt,
598 };
599
jpeg_v1_0_set_irq_funcs(struct amdgpu_device * adev)600 static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
601 {
602 adev->jpeg.inst->irq.funcs = &jpeg_v1_0_irq_funcs;
603 }
604
jpeg_v1_0_ring_begin_use(struct amdgpu_ring * ring)605 static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
606 {
607 struct amdgpu_device *adev = ring->adev;
608 bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
609 int cnt = 0;
610
611 mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
612
613 if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_dec))
614 DRM_ERROR("JPEG dec: vcn dec ring may not be empty\n");
615
616 for (cnt = 0; cnt < adev->vcn.num_enc_rings; cnt++) {
617 if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_enc[cnt]))
618 DRM_ERROR("JPEG dec: vcn enc ring[%d] may not be empty\n", cnt);
619 }
620
621 vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
622 }
623
624 /**
625 * jpeg_v1_dec_ring_parse_cs - command submission parser
626 *
627 * @parser: Command submission parser context
628 * @job: the job to parse
629 * @ib: the IB to parse
630 *
631 * Parse the command stream, return -EINVAL for invalid packet,
632 * 0 otherwise
633 */
jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser * parser,struct amdgpu_job * job,struct amdgpu_ib * ib)634 static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
635 struct amdgpu_job *job,
636 struct amdgpu_ib *ib)
637 {
638 u32 i, reg, res, cond, type;
639 int ret = 0;
640 struct amdgpu_device *adev = parser->adev;
641
642 for (i = 0; i < ib->length_dw ; i += 2) {
643 reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
644 res = CP_PACKETJ_GET_RES(ib->ptr[i]);
645 cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
646 type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
647
648 if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */
649 return -EINVAL;
650
651 if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END)
652 continue;
653
654 switch (type) {
655 case PACKETJ_TYPE0:
656 if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH &&
657 reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW &&
658 reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH &&
659 reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW &&
660 reg != JPEG_V1_REG_CTX_INDEX &&
661 reg != JPEG_V1_REG_CTX_DATA) {
662 ret = -EINVAL;
663 }
664 break;
665 case PACKETJ_TYPE1:
666 if (reg != JPEG_V1_REG_CTX_DATA)
667 ret = -EINVAL;
668 break;
669 case PACKETJ_TYPE3:
670 if (reg != JPEG_V1_REG_SOFT_RESET)
671 ret = -EINVAL;
672 break;
673 case PACKETJ_TYPE6:
674 if (ib->ptr[i] != CP_PACKETJ_NOP)
675 ret = -EINVAL;
676 break;
677 default:
678 ret = -EINVAL;
679 }
680
681 if (ret) {
682 dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
683 break;
684 }
685 }
686
687 return ret;
688 }
689