1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include "amdgpu.h" 25 #include "amdgpu_jpeg.h" 26 #include "soc15.h" 27 #include "soc15d.h" 28 #include "jpeg_v4_0_3.h" 29 30 #include "vcn/vcn_4_0_3_offset.h" 31 #include "vcn/vcn_4_0_3_sh_mask.h" 32 #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" 33 34 enum jpeg_engin_status { 35 UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, 36 UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, 37 }; 38 39 static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev); 40 static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); 41 static int jpeg_v4_0_3_set_powergating_state(void *handle, 42 enum amd_powergating_state state); 43 44 static int amdgpu_ih_srcid_jpeg[] = { 45 VCN_4_0__SRCID__JPEG_DECODE, 46 VCN_4_0__SRCID__JPEG1_DECODE, 47 VCN_4_0__SRCID__JPEG2_DECODE, 48 VCN_4_0__SRCID__JPEG3_DECODE, 49 VCN_4_0__SRCID__JPEG4_DECODE, 50 VCN_4_0__SRCID__JPEG5_DECODE, 51 VCN_4_0__SRCID__JPEG6_DECODE, 52 VCN_4_0__SRCID__JPEG7_DECODE 53 }; 54 55 /** 56 * jpeg_v4_0_3_early_init - set function pointers 57 * 58 * @handle: amdgpu_device pointer 59 * 60 * Set ring and irq function pointers 61 */ 62 static int jpeg_v4_0_3_early_init(void *handle) 63 { 64 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 65 66 adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; 67 68 jpeg_v4_0_3_set_dec_ring_funcs(adev); 69 jpeg_v4_0_3_set_irq_funcs(adev); 70 71 return 0; 72 } 73 74 /** 75 * jpeg_v4_0_3_sw_init - sw init for JPEG block 76 * 77 * @handle: amdgpu_device pointer 78 * 79 * Load firmware and sw initialization 80 */ 81 static int jpeg_v4_0_3_sw_init(void *handle) 82 { 83 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 84 struct amdgpu_ring *ring; 85 int i, j, r; 86 87 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 88 /* JPEG TRAP */ 89 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 90 amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq); 91 if (r) 92 return r; 93 } 94 95 r = amdgpu_jpeg_sw_init(adev); 96 if (r) 97 return r; 98 99 r = amdgpu_jpeg_resume(adev); 100 if (r) 101 return r; 102 103 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 104 if (adev->jpeg.harvest_config & (1 << i)) 105 continue; 106 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 107 ring = &adev->jpeg.inst[i].ring_dec[j]; 108 ring->use_doorbell = true; 109 ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); 110 ring->doorbell_index = 111 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + j + 9 * i; 112 sprintf(ring->name, "jpeg_dec_%d.%d", i, j); 113 r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, 114 AMDGPU_RING_PRIO_DEFAULT, NULL); 115 if (r) 116 return r; 117 118 adev->jpeg.internal.jpeg_pitch[j] = 119 regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; 120 adev->jpeg.inst[i].external.jpeg_pitch[j] = 121 SOC15_REG_OFFSET1(JPEG, i, regUVD_JRBC0_UVD_JRBC_SCRATCH0, 122 (j?(0x40 * j - 0xc80):0)); 123 } 124 } 125 126 return 0; 127 } 128 129 /** 130 * jpeg_v4_0_3_sw_fini - sw fini for JPEG block 131 * 132 * @handle: amdgpu_device pointer 133 * 134 * JPEG suspend and free up sw allocation 135 */ 136 static int jpeg_v4_0_3_sw_fini(void *handle) 137 { 138 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 139 int r; 140 141 r = amdgpu_jpeg_suspend(adev); 142 if (r) 143 return r; 144 145 r = amdgpu_jpeg_sw_fini(adev); 146 147 return r; 148 } 149 150 /** 151 * jpeg_v4_0_3_hw_init - start and test JPEG block 152 * 153 * @handle: amdgpu_device pointer 154 * 155 */ 156 static int jpeg_v4_0_3_hw_init(void *handle) 157 { 158 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 159 struct amdgpu_ring *ring; 160 int i, j, r; 161 162 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 163 if (adev->jpeg.harvest_config & (1 << i)) 164 continue; 165 ring = adev->jpeg.inst[i].ring_dec; 166 167 if (ring->use_doorbell) 168 adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, 169 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * i, 170 adev->jpeg.inst[i].aid_id); 171 172 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 173 ring = &adev->jpeg.inst[i].ring_dec[j]; 174 if (ring->use_doorbell) 175 WREG32_SOC15_OFFSET(VCN, i, regVCN_JPEG_DB_CTRL, 176 (ring->pipe?(ring->pipe - 0x15):0), 177 ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | 178 VCN_JPEG_DB_CTRL__EN_MASK); 179 r = amdgpu_ring_test_helper(ring); 180 if (r) 181 return r; 182 } 183 } 184 DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); 185 186 return 0; 187 } 188 189 /** 190 * jpeg_v4_0_3_hw_fini - stop the hardware block 191 * 192 * @handle: amdgpu_device pointer 193 * 194 * Stop the JPEG block, mark ring as not ready any more 195 */ 196 static int jpeg_v4_0_3_hw_fini(void *handle) 197 { 198 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 199 int ret = 0; 200 201 cancel_delayed_work_sync(&adev->jpeg.idle_work); 202 203 if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) 204 ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); 205 206 return ret; 207 } 208 209 /** 210 * jpeg_v4_0_3_suspend - suspend JPEG block 211 * 212 * @handle: amdgpu_device pointer 213 * 214 * HW fini and suspend JPEG block 215 */ 216 static int jpeg_v4_0_3_suspend(void *handle) 217 { 218 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 219 int r; 220 221 r = jpeg_v4_0_3_hw_fini(adev); 222 if (r) 223 return r; 224 225 r = amdgpu_jpeg_suspend(adev); 226 227 return r; 228 } 229 230 /** 231 * jpeg_v4_0_3_resume - resume JPEG block 232 * 233 * @handle: amdgpu_device pointer 234 * 235 * Resume firmware and hw init JPEG block 236 */ 237 static int jpeg_v4_0_3_resume(void *handle) 238 { 239 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 240 int r; 241 242 r = amdgpu_jpeg_resume(adev); 243 if (r) 244 return r; 245 246 r = jpeg_v4_0_3_hw_init(adev); 247 248 return r; 249 } 250 251 static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx) 252 { 253 uint32_t data; 254 int i; 255 256 data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL); 257 if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { 258 data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 259 data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1)); 260 } else { 261 data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 262 } 263 264 data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 265 data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 266 WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL, data); 267 268 data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE); 269 data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); 270 for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) 271 data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i); 272 WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE, data); 273 } 274 275 static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx) 276 { 277 uint32_t data; 278 int i; 279 280 data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL); 281 if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { 282 data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 283 data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1); 284 } else { 285 data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 286 } 287 288 data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 289 data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 290 WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL, data); 291 292 data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE); 293 data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); 294 for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) 295 data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i); 296 WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE, data); 297 } 298 299 /** 300 * jpeg_v4_0_3_start - start JPEG block 301 * 302 * @adev: amdgpu_device pointer 303 * 304 * Setup and start the JPEG block 305 */ 306 static int jpeg_v4_0_3_start(struct amdgpu_device *adev) 307 { 308 struct amdgpu_ring *ring; 309 int i, j; 310 311 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 312 if (adev->jpeg.harvest_config & (1 << i)) 313 continue; 314 WREG32_SOC15(JPEG, i, regUVD_PGFSM_CONFIG, 315 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); 316 SOC15_WAIT_ON_RREG(JPEG, i, regUVD_PGFSM_STATUS, 317 UVD_PGFSM_STATUS__UVDJ_PWR_ON << 318 UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, 319 UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); 320 321 /* disable anti hang mechanism */ 322 WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_POWER_STATUS), 0, 323 ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); 324 325 /* JPEG disable CGC */ 326 jpeg_v4_0_3_disable_clock_gating(adev, i); 327 328 /* MJPEG global tiling registers */ 329 WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX8_ADDR_CONFIG, 330 adev->gfx.config.gb_addr_config); 331 WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX10_ADDR_CONFIG, 332 adev->gfx.config.gb_addr_config); 333 334 /* enable JMI channel */ 335 WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), 0, 336 ~UVD_JMI_CNTL__SOFT_RESET_MASK); 337 338 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 339 unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); 340 341 ring = &adev->jpeg.inst[i].ring_dec[j]; 342 343 /* enable System Interrupt for JRBC */ 344 WREG32_P(SOC15_REG_OFFSET(JPEG, i, regJPEG_SYS_INT_EN), 345 JPEG_SYS_INT_EN__DJRBC0_MASK << j, 346 ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j)); 347 348 WREG32_SOC15_OFFSET(JPEG, i, 349 regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, reg_offset, 0); 350 WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_CNTL, reg_offset, 351 (0x00000001L | 0x00000002L)); 352 WREG32_SOC15_OFFSET(JPEG, i, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, 353 reg_offset, lower_32_bits(ring->gpu_addr)); 354 WREG32_SOC15_OFFSET(JPEG, i, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, 355 reg_offset, upper_32_bits(ring->gpu_addr)); 356 WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_RPTR, reg_offset, 0); 357 WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_WPTR, reg_offset, 0); 358 WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_CNTL, reg_offset, 359 0x00000002L); 360 WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_SIZE, reg_offset, 361 ring->ring_size / 4); 362 ring->wptr = RREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_WPTR, 363 reg_offset); 364 } 365 } 366 367 return 0; 368 } 369 370 /** 371 * jpeg_v4_0_3_stop - stop JPEG block 372 * 373 * @adev: amdgpu_device pointer 374 * 375 * stop the JPEG block 376 */ 377 static int jpeg_v4_0_3_stop(struct amdgpu_device *adev) 378 { 379 int i; 380 381 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 382 if (adev->jpeg.harvest_config & (1 << i)) 383 continue; 384 385 /* reset JMI */ 386 WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), 387 UVD_JMI_CNTL__SOFT_RESET_MASK, 388 ~UVD_JMI_CNTL__SOFT_RESET_MASK); 389 390 jpeg_v4_0_3_enable_clock_gating(adev, i); 391 392 /* enable anti hang mechanism */ 393 WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_POWER_STATUS), 394 UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, 395 ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); 396 397 WREG32_SOC15(JPEG, i, regUVD_PGFSM_CONFIG, 398 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); 399 SOC15_WAIT_ON_RREG(JPEG, i, regUVD_PGFSM_STATUS, 400 UVD_PGFSM_STATUS__UVDJ_PWR_OFF << 401 UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, 402 UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); 403 } 404 405 return 0; 406 } 407 408 /** 409 * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer 410 * 411 * @ring: amdgpu_ring pointer 412 * 413 * Returns the current hardware read pointer 414 */ 415 static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring) 416 { 417 struct amdgpu_device *adev = ring->adev; 418 419 return RREG32_SOC15_OFFSET(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_RPTR, 420 ring->pipe?(0x40 * ring->pipe - 0xc80):0); 421 } 422 423 /** 424 * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer 425 * 426 * @ring: amdgpu_ring pointer 427 * 428 * Returns the current hardware write pointer 429 */ 430 static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) 431 { 432 struct amdgpu_device *adev = ring->adev; 433 434 if (ring->use_doorbell) 435 return adev->wb.wb[ring->wptr_offs]; 436 else 437 return RREG32_SOC15_OFFSET(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_WPTR, 438 ring->pipe?(0x40 * ring->pipe - 0xc80):0); 439 } 440 441 /** 442 * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer 443 * 444 * @ring: amdgpu_ring pointer 445 * 446 * Commits the write pointer to the hardware 447 */ 448 static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) 449 { 450 struct amdgpu_device *adev = ring->adev; 451 452 if (ring->use_doorbell) { 453 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 454 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 455 } else { 456 WREG32_SOC15_OFFSET(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_WPTR, 457 (ring->pipe?(0x40 * ring->pipe - 0xc80):0), lower_32_bits(ring->wptr)); 458 } 459 } 460 461 /** 462 * jpeg_v4_0_3_dec_ring_insert_start - insert a start command 463 * 464 * @ring: amdgpu_ring pointer 465 * 466 * Write a start command to the ring. 467 */ 468 static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) 469 { 470 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 471 0, 0, PACKETJ_TYPE0)); 472 amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ 473 474 amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 475 0, 0, PACKETJ_TYPE0)); 476 amdgpu_ring_write(ring, 0x80004000); 477 } 478 479 /** 480 * jpeg_v4_0_3_dec_ring_insert_end - insert a end command 481 * 482 * @ring: amdgpu_ring pointer 483 * 484 * Write a end command to the ring. 485 */ 486 static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) 487 { 488 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 489 0, 0, PACKETJ_TYPE0)); 490 amdgpu_ring_write(ring, 0x62a04); 491 492 amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 493 0, 0, PACKETJ_TYPE0)); 494 amdgpu_ring_write(ring, 0x00004000); 495 } 496 497 /** 498 * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command 499 * 500 * @ring: amdgpu_ring pointer 501 * @addr: address 502 * @seq: sequence number 503 * @flags: fence related flags 504 * 505 * Write a fence and a trap command to the ring. 506 */ 507 static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 508 unsigned int flags) 509 { 510 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 511 512 amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, 513 0, 0, PACKETJ_TYPE0)); 514 amdgpu_ring_write(ring, seq); 515 516 amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, 517 0, 0, PACKETJ_TYPE0)); 518 amdgpu_ring_write(ring, seq); 519 520 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, 521 0, 0, PACKETJ_TYPE0)); 522 amdgpu_ring_write(ring, lower_32_bits(addr)); 523 524 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, 525 0, 0, PACKETJ_TYPE0)); 526 amdgpu_ring_write(ring, upper_32_bits(addr)); 527 528 amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, 529 0, 0, PACKETJ_TYPE0)); 530 amdgpu_ring_write(ring, 0x8); 531 532 amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, 533 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); 534 amdgpu_ring_write(ring, 0); 535 536 if (ring->adev->jpeg.inst[ring->me].aid_id) { 537 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET, 538 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0)); 539 amdgpu_ring_write(ring, 0x4); 540 } else { 541 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); 542 amdgpu_ring_write(ring, 0); 543 } 544 545 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 546 0, 0, PACKETJ_TYPE0)); 547 amdgpu_ring_write(ring, 0x3fbc); 548 549 if (ring->adev->jpeg.inst[ring->me].aid_id) { 550 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET, 551 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0)); 552 amdgpu_ring_write(ring, 0x0); 553 } else { 554 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); 555 amdgpu_ring_write(ring, 0); 556 } 557 558 amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 559 0, 0, PACKETJ_TYPE0)); 560 amdgpu_ring_write(ring, 0x1); 561 562 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); 563 amdgpu_ring_write(ring, 0); 564 } 565 566 /** 567 * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer 568 * 569 * @ring: amdgpu_ring pointer 570 * @job: job to retrieve vmid from 571 * @ib: indirect buffer to execute 572 * @flags: unused 573 * 574 * Write ring commands to execute the indirect buffer. 575 */ 576 static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, 577 struct amdgpu_job *job, 578 struct amdgpu_ib *ib, 579 uint32_t flags) 580 { 581 unsigned int vmid = AMDGPU_JOB_GET_VMID(job); 582 583 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 584 0, 0, PACKETJ_TYPE0)); 585 amdgpu_ring_write(ring, (vmid | (vmid << 4))); 586 587 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 588 0, 0, PACKETJ_TYPE0)); 589 amdgpu_ring_write(ring, (vmid | (vmid << 4))); 590 591 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 592 0, 0, PACKETJ_TYPE0)); 593 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 594 595 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 596 0, 0, PACKETJ_TYPE0)); 597 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 598 599 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, 600 0, 0, PACKETJ_TYPE0)); 601 amdgpu_ring_write(ring, ib->length_dw); 602 603 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, 604 0, 0, PACKETJ_TYPE0)); 605 amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); 606 607 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, 608 0, 0, PACKETJ_TYPE0)); 609 amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); 610 611 amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); 612 amdgpu_ring_write(ring, 0); 613 614 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 615 0, 0, PACKETJ_TYPE0)); 616 amdgpu_ring_write(ring, 0x01400200); 617 618 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, 619 0, 0, PACKETJ_TYPE0)); 620 amdgpu_ring_write(ring, 0x2); 621 622 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET, 623 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); 624 amdgpu_ring_write(ring, 0x2); 625 } 626 627 static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 628 uint32_t val, uint32_t mask) 629 { 630 uint32_t reg_offset = (reg << 2); 631 632 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 633 0, 0, PACKETJ_TYPE0)); 634 amdgpu_ring_write(ring, 0x01400200); 635 636 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, 637 0, 0, PACKETJ_TYPE0)); 638 amdgpu_ring_write(ring, val); 639 640 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 641 0, 0, PACKETJ_TYPE0)); 642 if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { 643 amdgpu_ring_write(ring, 0); 644 amdgpu_ring_write(ring, 645 PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); 646 } else { 647 amdgpu_ring_write(ring, reg_offset); 648 amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 649 0, 0, PACKETJ_TYPE3)); 650 } 651 amdgpu_ring_write(ring, mask); 652 } 653 654 static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, 655 unsigned int vmid, uint64_t pd_addr) 656 { 657 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; 658 uint32_t data0, data1, mask; 659 660 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 661 662 /* wait for register write */ 663 data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; 664 data1 = lower_32_bits(pd_addr); 665 mask = 0xffffffff; 666 jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask); 667 } 668 669 static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) 670 { 671 uint32_t reg_offset = (reg << 2); 672 673 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 674 0, 0, PACKETJ_TYPE0)); 675 if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { 676 amdgpu_ring_write(ring, 0); 677 amdgpu_ring_write(ring, 678 PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); 679 } else { 680 amdgpu_ring_write(ring, reg_offset); 681 amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 682 0, 0, PACKETJ_TYPE0)); 683 } 684 amdgpu_ring_write(ring, val); 685 } 686 687 static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) 688 { 689 int i; 690 691 WARN_ON(ring->wptr % 2 || count % 2); 692 693 for (i = 0; i < count / 2; i++) { 694 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); 695 amdgpu_ring_write(ring, 0); 696 } 697 } 698 699 static bool jpeg_v4_0_3_is_idle(void *handle) 700 { 701 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 702 bool ret; 703 int i, j; 704 705 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 706 if (adev->jpeg.harvest_config & (1 << i)) 707 continue; 708 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 709 unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); 710 711 ret &= ((RREG32_SOC15_OFFSET(JPEG, i, 712 regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset) & 713 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == 714 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); 715 } 716 } 717 718 return ret; 719 } 720 721 static int jpeg_v4_0_3_wait_for_idle(void *handle) 722 { 723 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 724 int ret; 725 int i, j; 726 727 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 728 if (adev->jpeg.harvest_config & (1 << i)) 729 continue; 730 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 731 unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); 732 733 ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, i, 734 regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset, 735 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, 736 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); 737 } 738 } 739 return ret; 740 } 741 742 static int jpeg_v4_0_3_set_clockgating_state(void *handle, 743 enum amd_clockgating_state state) 744 { 745 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 746 bool enable = (state == AMD_CG_STATE_GATE) ? true : false; 747 int i; 748 749 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 750 if (adev->jpeg.harvest_config & (1 << i)) 751 continue; 752 if (enable) { 753 if (!jpeg_v4_0_3_is_idle(handle)) 754 return -EBUSY; 755 jpeg_v4_0_3_enable_clock_gating(adev, i); 756 } else { 757 jpeg_v4_0_3_disable_clock_gating(adev, i); 758 } 759 } 760 return 0; 761 } 762 763 static int jpeg_v4_0_3_set_powergating_state(void *handle, 764 enum amd_powergating_state state) 765 { 766 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 767 int ret; 768 769 if (state == adev->jpeg.cur_state) 770 return 0; 771 772 if (state == AMD_PG_STATE_GATE) 773 ret = jpeg_v4_0_3_stop(adev); 774 else 775 ret = jpeg_v4_0_3_start(adev); 776 777 if (!ret) 778 adev->jpeg.cur_state = state; 779 780 return ret; 781 } 782 783 static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev, 784 struct amdgpu_irq_src *source, 785 unsigned int type, 786 enum amdgpu_interrupt_state state) 787 { 788 return 0; 789 } 790 791 static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, 792 struct amdgpu_irq_src *source, 793 struct amdgpu_iv_entry *entry) 794 { 795 uint32_t i; 796 797 i = node_id_to_phys_map[entry->node_id]; 798 DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); 799 800 switch (entry->src_id) { 801 case VCN_4_0__SRCID__JPEG_DECODE: 802 amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[0]); 803 break; 804 case VCN_4_0__SRCID__JPEG1_DECODE: 805 amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[1]); 806 break; 807 case VCN_4_0__SRCID__JPEG2_DECODE: 808 amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[2]); 809 break; 810 case VCN_4_0__SRCID__JPEG3_DECODE: 811 amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[3]); 812 break; 813 case VCN_4_0__SRCID__JPEG4_DECODE: 814 amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[4]); 815 break; 816 case VCN_4_0__SRCID__JPEG5_DECODE: 817 amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[5]); 818 break; 819 case VCN_4_0__SRCID__JPEG6_DECODE: 820 amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[6]); 821 break; 822 case VCN_4_0__SRCID__JPEG7_DECODE: 823 amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[7]); 824 break; 825 default: 826 DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", 827 entry->src_id, entry->src_data[0]); 828 break; 829 } 830 831 return 0; 832 } 833 834 static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { 835 .name = "jpeg_v4_0_3", 836 .early_init = jpeg_v4_0_3_early_init, 837 .late_init = NULL, 838 .sw_init = jpeg_v4_0_3_sw_init, 839 .sw_fini = jpeg_v4_0_3_sw_fini, 840 .hw_init = jpeg_v4_0_3_hw_init, 841 .hw_fini = jpeg_v4_0_3_hw_fini, 842 .suspend = jpeg_v4_0_3_suspend, 843 .resume = jpeg_v4_0_3_resume, 844 .is_idle = jpeg_v4_0_3_is_idle, 845 .wait_for_idle = jpeg_v4_0_3_wait_for_idle, 846 .check_soft_reset = NULL, 847 .pre_soft_reset = NULL, 848 .soft_reset = NULL, 849 .post_soft_reset = NULL, 850 .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state, 851 .set_powergating_state = jpeg_v4_0_3_set_powergating_state, 852 }; 853 854 static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { 855 .type = AMDGPU_RING_TYPE_VCN_JPEG, 856 .align_mask = 0xf, 857 .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, 858 .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, 859 .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, 860 .emit_frame_size = 861 SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 862 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + 863 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */ 864 22 + 22 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */ 865 8 + 16, 866 .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */ 867 .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, 868 .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, 869 .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, 870 .test_ring = amdgpu_jpeg_dec_ring_test_ring, 871 .test_ib = amdgpu_jpeg_dec_ring_test_ib, 872 .insert_nop = jpeg_v4_0_3_dec_ring_nop, 873 .insert_start = jpeg_v4_0_3_dec_ring_insert_start, 874 .insert_end = jpeg_v4_0_3_dec_ring_insert_end, 875 .pad_ib = amdgpu_ring_generic_pad_ib, 876 .begin_use = amdgpu_jpeg_ring_begin_use, 877 .end_use = amdgpu_jpeg_ring_end_use, 878 .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, 879 .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, 880 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 881 }; 882 883 static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) 884 { 885 int i, j; 886 887 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 888 if (adev->jpeg.harvest_config & (1 << i)) 889 continue; 890 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 891 adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; 892 adev->jpeg.inst[i].ring_dec[j].me = i; 893 adev->jpeg.inst[i].ring_dec[j].pipe = j; 894 } 895 adev->jpeg.inst[i].aid_id = i / adev->jpeg.num_inst_per_aid; 896 } 897 DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); 898 } 899 900 static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = { 901 .set = jpeg_v4_0_3_set_interrupt_state, 902 .process = jpeg_v4_0_3_process_interrupt, 903 }; 904 905 static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) 906 { 907 int i; 908 909 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 910 if (adev->jpeg.harvest_config & (1 << i)) 911 continue; 912 adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; 913 } 914 adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs; 915 } 916 917 const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = { 918 .type = AMD_IP_BLOCK_TYPE_JPEG, 919 .major = 4, 920 .minor = 0, 921 .rev = 3, 922 .funcs = &jpeg_v4_0_3_ip_funcs, 923 }; 924