1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 29 #include "amdgpu.h" 30 #include "amdgpu_vce.h" 31 #include "soc15.h" 32 #include "soc15d.h" 33 #include "soc15_common.h" 34 #include "mmsch_v1_0.h" 35 36 #include "vce/vce_4_0_offset.h" 37 #include "vce/vce_4_0_default.h" 38 #include "vce/vce_4_0_sh_mask.h" 39 #include "mmhub/mmhub_1_0_offset.h" 40 #include "mmhub/mmhub_1_0_sh_mask.h" 41 42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h" 43 44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 45 46 #define VCE_V4_0_FW_SIZE (384 * 1024) 47 #define VCE_V4_0_STACK_SIZE (64 * 1024) 48 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 49 50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 53 54 /** 55 * vce_v4_0_ring_get_rptr - get read pointer 56 * 57 * @ring: amdgpu_ring pointer 58 * 59 * Returns the current hardware read pointer 60 */ 61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 62 { 63 struct amdgpu_device *adev = ring->adev; 64 65 if (ring->me == 0) 66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 67 else if (ring->me == 1) 68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 69 else 70 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 71 } 72 73 /** 74 * vce_v4_0_ring_get_wptr - get write pointer 75 * 76 * @ring: amdgpu_ring pointer 77 * 78 * Returns the current hardware write pointer 79 */ 80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 81 { 82 struct amdgpu_device *adev = ring->adev; 83 84 if (ring->use_doorbell) 85 return adev->wb.wb[ring->wptr_offs]; 86 87 if (ring->me == 0) 88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 89 else if (ring->me == 1) 90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 91 else 92 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 93 } 94 95 /** 96 * vce_v4_0_ring_set_wptr - set write pointer 97 * 98 * @ring: amdgpu_ring pointer 99 * 100 * Commits the write pointer to the hardware 101 */ 102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 103 { 104 struct amdgpu_device *adev = ring->adev; 105 106 if (ring->use_doorbell) { 107 /* XXX check if swapping is necessary on BE */ 108 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 109 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 110 return; 111 } 112 113 if (ring->me == 0) 114 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 115 lower_32_bits(ring->wptr)); 116 else if (ring->me == 1) 117 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 118 lower_32_bits(ring->wptr)); 119 else 120 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 121 lower_32_bits(ring->wptr)); 122 } 123 124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 125 { 126 int i, j; 127 128 for (i = 0; i < 10; ++i) { 129 for (j = 0; j < 100; ++j) { 130 uint32_t status = 131 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 132 133 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 134 return 0; 135 mdelay(10); 136 } 137 138 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 139 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 140 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 141 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 142 mdelay(10); 143 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 144 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 145 mdelay(10); 146 147 } 148 149 return -ETIMEDOUT; 150 } 151 152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 153 struct amdgpu_mm_table *table) 154 { 155 uint32_t data = 0, loop; 156 uint64_t addr = table->gpu_addr; 157 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 158 uint32_t size; 159 160 size = header->header_size + header->vce_table_size + header->uvd_table_size; 161 162 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 163 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 165 166 /* 2, update vmid of descriptor */ 167 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 168 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 169 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 170 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 171 172 /* 3, notify mmsch about the size of this descriptor */ 173 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 174 175 /* 4, set resp to zero */ 176 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 177 178 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); 179 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0; 180 adev->vce.ring[0].wptr = 0; 181 adev->vce.ring[0].wptr_old = 0; 182 183 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 184 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 185 186 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 187 loop = 1000; 188 while ((data & 0x10000002) != 0x10000002) { 189 udelay(10); 190 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 191 loop--; 192 if (!loop) 193 break; 194 } 195 196 if (!loop) { 197 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 198 return -EBUSY; 199 } 200 201 return 0; 202 } 203 204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 205 { 206 struct amdgpu_ring *ring; 207 uint32_t offset, size; 208 uint32_t table_size = 0; 209 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 210 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 211 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 212 struct mmsch_v1_0_cmd_end end = { { 0 } }; 213 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 214 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 215 216 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 217 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 218 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 219 end.cmd_header.command_type = MMSCH_COMMAND__END; 220 221 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 222 header->version = MMSCH_VERSION; 223 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 224 225 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 226 header->vce_table_offset = header->header_size; 227 else 228 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 229 230 init_table += header->vce_table_offset; 231 232 ring = &adev->vce.ring[0]; 233 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 234 lower_32_bits(ring->gpu_addr)); 235 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 236 upper_32_bits(ring->gpu_addr)); 237 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 238 ring->ring_size / 4); 239 240 /* BEGING OF MC_RESUME */ 241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 242 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 243 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 246 247 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 248 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 249 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; 250 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi; 251 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low; 252 253 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 254 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8); 255 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 256 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 257 (tmr_mc_addr >> 40) & 0xff); 258 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); 259 } else { 260 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 261 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 262 adev->vce.gpu_addr >> 8); 263 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 264 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 265 (adev->vce.gpu_addr >> 40) & 0xff); 266 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 267 offset & ~0x0f000000); 268 269 } 270 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 271 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 272 adev->vce.gpu_addr >> 8); 273 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 274 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), 275 (adev->vce.gpu_addr >> 40) & 0xff); 276 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 277 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 278 adev->vce.gpu_addr >> 8); 279 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 280 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), 281 (adev->vce.gpu_addr >> 40) & 0xff); 282 283 size = VCE_V4_0_FW_SIZE; 284 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 285 286 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 287 size = VCE_V4_0_STACK_SIZE; 288 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 289 (offset & ~0x0f000000) | (1 << 24)); 290 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 291 292 offset += size; 293 size = VCE_V4_0_DATA_SIZE; 294 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 295 (offset & ~0x0f000000) | (2 << 24)); 296 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 297 298 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 300 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 301 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 302 303 /* end of MC_RESUME */ 304 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 305 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 306 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 307 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 308 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 309 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 310 311 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 312 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 313 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 314 315 /* clear BUSY flag */ 316 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 317 ~VCE_STATUS__JOB_BUSY_MASK, 0); 318 319 /* add end packet */ 320 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 321 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 322 header->vce_table_size = table_size; 323 } 324 325 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 326 } 327 328 /** 329 * vce_v4_0_start - start VCE block 330 * 331 * @adev: amdgpu_device pointer 332 * 333 * Setup and start the VCE block 334 */ 335 static int vce_v4_0_start(struct amdgpu_device *adev) 336 { 337 struct amdgpu_ring *ring; 338 int r; 339 340 ring = &adev->vce.ring[0]; 341 342 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 347 348 ring = &adev->vce.ring[1]; 349 350 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 355 356 ring = &adev->vce.ring[2]; 357 358 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 362 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 363 364 vce_v4_0_mc_resume(adev); 365 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 366 ~VCE_STATUS__JOB_BUSY_MASK); 367 368 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 369 370 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 371 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 372 mdelay(100); 373 374 r = vce_v4_0_firmware_loaded(adev); 375 376 /* clear BUSY flag */ 377 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 378 379 if (r) { 380 DRM_ERROR("VCE not responding, giving up!!!\n"); 381 return r; 382 } 383 384 return 0; 385 } 386 387 static int vce_v4_0_stop(struct amdgpu_device *adev) 388 { 389 390 /* Disable VCPU */ 391 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 392 393 /* hold on ECPU */ 394 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 395 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 396 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 397 398 /* clear VCE_STATUS */ 399 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0); 400 401 /* Set Clock-Gating off */ 402 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 403 vce_v4_0_set_vce_sw_clock_gating(adev, false); 404 */ 405 406 return 0; 407 } 408 409 static int vce_v4_0_early_init(void *handle) 410 { 411 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 412 413 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 414 adev->vce.num_rings = 1; 415 else 416 adev->vce.num_rings = 3; 417 418 vce_v4_0_set_ring_funcs(adev); 419 vce_v4_0_set_irq_funcs(adev); 420 421 return 0; 422 } 423 424 static int vce_v4_0_sw_init(void *handle) 425 { 426 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 427 struct amdgpu_ring *ring; 428 429 unsigned size; 430 int r, i; 431 432 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 433 if (r) 434 return r; 435 436 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE; 437 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 438 size += VCE_V4_0_FW_SIZE; 439 440 r = amdgpu_vce_sw_init(adev, size); 441 if (r) 442 return r; 443 444 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 445 const struct common_firmware_header *hdr; 446 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 447 448 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL); 449 if (!adev->vce.saved_bo) 450 return -ENOMEM; 451 452 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 453 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 454 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 455 adev->firmware.fw_size += 456 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 457 DRM_INFO("PSP loading VCE firmware\n"); 458 } else { 459 r = amdgpu_vce_resume(adev); 460 if (r) 461 return r; 462 } 463 464 for (i = 0; i < adev->vce.num_rings; i++) { 465 ring = &adev->vce.ring[i]; 466 sprintf(ring->name, "vce%d", i); 467 if (amdgpu_sriov_vf(adev)) { 468 /* DOORBELL only works under SRIOV */ 469 ring->use_doorbell = true; 470 471 /* currently only use the first encoding ring for sriov, 472 * so set unused location for other unused rings. 473 */ 474 if (i == 0) 475 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2; 476 else 477 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1; 478 } 479 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 480 if (r) 481 return r; 482 } 483 484 485 r = amdgpu_vce_entity_init(adev); 486 if (r) 487 return r; 488 489 r = amdgpu_virt_alloc_mm_table(adev); 490 if (r) 491 return r; 492 493 return r; 494 } 495 496 static int vce_v4_0_sw_fini(void *handle) 497 { 498 int r; 499 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 500 501 /* free MM table */ 502 amdgpu_virt_free_mm_table(adev); 503 504 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 505 kvfree(adev->vce.saved_bo); 506 adev->vce.saved_bo = NULL; 507 } 508 509 r = amdgpu_vce_suspend(adev); 510 if (r) 511 return r; 512 513 return amdgpu_vce_sw_fini(adev); 514 } 515 516 static int vce_v4_0_hw_init(void *handle) 517 { 518 int r, i; 519 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 520 521 if (amdgpu_sriov_vf(adev)) 522 r = vce_v4_0_sriov_start(adev); 523 else 524 r = vce_v4_0_start(adev); 525 if (r) 526 return r; 527 528 for (i = 0; i < adev->vce.num_rings; i++) { 529 r = amdgpu_ring_test_helper(&adev->vce.ring[i]); 530 if (r) 531 return r; 532 } 533 534 DRM_INFO("VCE initialized successfully.\n"); 535 536 return 0; 537 } 538 539 static int vce_v4_0_hw_fini(void *handle) 540 { 541 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 542 543 if (!amdgpu_sriov_vf(adev)) { 544 /* vce_v4_0_wait_for_idle(handle); */ 545 vce_v4_0_stop(adev); 546 } else { 547 /* full access mode, so don't touch any VCE register */ 548 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 549 } 550 551 return 0; 552 } 553 554 static int vce_v4_0_suspend(void *handle) 555 { 556 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 557 int r; 558 559 if (adev->vce.vcpu_bo == NULL) 560 return 0; 561 562 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 563 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 564 void *ptr = adev->vce.cpu_addr; 565 566 memcpy_fromio(adev->vce.saved_bo, ptr, size); 567 } 568 569 r = vce_v4_0_hw_fini(adev); 570 if (r) 571 return r; 572 573 return amdgpu_vce_suspend(adev); 574 } 575 576 static int vce_v4_0_resume(void *handle) 577 { 578 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 579 int r; 580 581 if (adev->vce.vcpu_bo == NULL) 582 return -EINVAL; 583 584 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 585 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 586 void *ptr = adev->vce.cpu_addr; 587 588 memcpy_toio(ptr, adev->vce.saved_bo, size); 589 } else { 590 r = amdgpu_vce_resume(adev); 591 if (r) 592 return r; 593 } 594 595 return vce_v4_0_hw_init(adev); 596 } 597 598 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 599 { 600 uint32_t offset, size; 601 uint64_t tmr_mc_addr; 602 603 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 604 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 605 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 606 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 607 608 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 609 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 610 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 611 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 612 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 613 614 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 615 616 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 617 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 | 618 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; 619 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 620 (tmr_mc_addr >> 8)); 621 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 622 (tmr_mc_addr >> 40) & 0xff); 623 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); 624 } else { 625 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 626 (adev->vce.gpu_addr >> 8)); 627 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 628 (adev->vce.gpu_addr >> 40) & 0xff); 629 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 630 } 631 632 size = VCE_V4_0_FW_SIZE; 633 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 634 635 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 636 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 637 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 638 size = VCE_V4_0_STACK_SIZE; 639 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 640 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 641 642 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 643 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 644 offset += size; 645 size = VCE_V4_0_DATA_SIZE; 646 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 647 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 648 649 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 650 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 651 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 652 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 653 } 654 655 static int vce_v4_0_set_clockgating_state(void *handle, 656 enum amd_clockgating_state state) 657 { 658 /* needed for driver unload*/ 659 return 0; 660 } 661 662 #if 0 663 static bool vce_v4_0_is_idle(void *handle) 664 { 665 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 666 u32 mask = 0; 667 668 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 669 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 670 671 return !(RREG32(mmSRBM_STATUS2) & mask); 672 } 673 674 static int vce_v4_0_wait_for_idle(void *handle) 675 { 676 unsigned i; 677 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 678 679 for (i = 0; i < adev->usec_timeout; i++) 680 if (vce_v4_0_is_idle(handle)) 681 return 0; 682 683 return -ETIMEDOUT; 684 } 685 686 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 687 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 688 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 689 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 690 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 691 692 static bool vce_v4_0_check_soft_reset(void *handle) 693 { 694 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 695 u32 srbm_soft_reset = 0; 696 697 /* According to VCE team , we should use VCE_STATUS instead 698 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 699 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 700 * instance's registers are accessed 701 * (0 for 1st instance, 10 for 2nd instance). 702 * 703 *VCE_STATUS 704 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 705 *|----+----+-----------+----+----+----+----------+---------+----| 706 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 707 * 708 * VCE team suggest use bit 3--bit 6 for busy status check 709 */ 710 mutex_lock(&adev->grbm_idx_mutex); 711 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 712 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 713 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 714 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 715 } 716 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 717 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 718 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 719 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 720 } 721 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 722 mutex_unlock(&adev->grbm_idx_mutex); 723 724 if (srbm_soft_reset) { 725 adev->vce.srbm_soft_reset = srbm_soft_reset; 726 return true; 727 } else { 728 adev->vce.srbm_soft_reset = 0; 729 return false; 730 } 731 } 732 733 static int vce_v4_0_soft_reset(void *handle) 734 { 735 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 736 u32 srbm_soft_reset; 737 738 if (!adev->vce.srbm_soft_reset) 739 return 0; 740 srbm_soft_reset = adev->vce.srbm_soft_reset; 741 742 if (srbm_soft_reset) { 743 u32 tmp; 744 745 tmp = RREG32(mmSRBM_SOFT_RESET); 746 tmp |= srbm_soft_reset; 747 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 748 WREG32(mmSRBM_SOFT_RESET, tmp); 749 tmp = RREG32(mmSRBM_SOFT_RESET); 750 751 udelay(50); 752 753 tmp &= ~srbm_soft_reset; 754 WREG32(mmSRBM_SOFT_RESET, tmp); 755 tmp = RREG32(mmSRBM_SOFT_RESET); 756 757 /* Wait a little for things to settle down */ 758 udelay(50); 759 } 760 761 return 0; 762 } 763 764 static int vce_v4_0_pre_soft_reset(void *handle) 765 { 766 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 767 768 if (!adev->vce.srbm_soft_reset) 769 return 0; 770 771 mdelay(5); 772 773 return vce_v4_0_suspend(adev); 774 } 775 776 777 static int vce_v4_0_post_soft_reset(void *handle) 778 { 779 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 780 781 if (!adev->vce.srbm_soft_reset) 782 return 0; 783 784 mdelay(5); 785 786 return vce_v4_0_resume(adev); 787 } 788 789 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 790 { 791 u32 tmp, data; 792 793 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 794 if (override) 795 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 796 else 797 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 798 799 if (tmp != data) 800 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 801 } 802 803 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 804 bool gated) 805 { 806 u32 data; 807 808 /* Set Override to disable Clock Gating */ 809 vce_v4_0_override_vce_clock_gating(adev, true); 810 811 /* This function enables MGCG which is controlled by firmware. 812 With the clocks in the gated state the core is still 813 accessible but the firmware will throttle the clocks on the 814 fly as necessary. 815 */ 816 if (gated) { 817 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 818 data |= 0x1ff; 819 data &= ~0xef0000; 820 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 821 822 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 823 data |= 0x3ff000; 824 data &= ~0xffc00000; 825 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 826 827 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 828 data |= 0x2; 829 data &= ~0x00010000; 830 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 831 832 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 833 data |= 0x37f; 834 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 835 836 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 837 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 838 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 839 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 840 0x8; 841 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 842 } else { 843 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 844 data &= ~0x80010; 845 data |= 0xe70008; 846 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 847 848 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 849 data |= 0xffc00000; 850 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 851 852 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 853 data |= 0x10000; 854 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 855 856 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 857 data &= ~0xffc00000; 858 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 859 860 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 861 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 862 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 863 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 864 0x8); 865 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 866 } 867 vce_v4_0_override_vce_clock_gating(adev, false); 868 } 869 870 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 871 { 872 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 873 874 if (enable) 875 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 876 else 877 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 878 879 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 880 } 881 882 static int vce_v4_0_set_clockgating_state(void *handle, 883 enum amd_clockgating_state state) 884 { 885 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 886 bool enable = (state == AMD_CG_STATE_GATE); 887 int i; 888 889 if ((adev->asic_type == CHIP_POLARIS10) || 890 (adev->asic_type == CHIP_TONGA) || 891 (adev->asic_type == CHIP_FIJI)) 892 vce_v4_0_set_bypass_mode(adev, enable); 893 894 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 895 return 0; 896 897 mutex_lock(&adev->grbm_idx_mutex); 898 for (i = 0; i < 2; i++) { 899 /* Program VCE Instance 0 or 1 if not harvested */ 900 if (adev->vce.harvest_config & (1 << i)) 901 continue; 902 903 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 904 905 if (enable) { 906 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 907 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 908 data &= ~(0xf | 0xff0); 909 data |= ((0x0 << 0) | (0x04 << 4)); 910 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 911 912 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 913 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 914 data &= ~(0xf | 0xff0); 915 data |= ((0x0 << 0) | (0x04 << 4)); 916 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 917 } 918 919 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 920 } 921 922 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 923 mutex_unlock(&adev->grbm_idx_mutex); 924 925 return 0; 926 } 927 #endif 928 929 static int vce_v4_0_set_powergating_state(void *handle, 930 enum amd_powergating_state state) 931 { 932 /* This doesn't actually powergate the VCE block. 933 * That's done in the dpm code via the SMC. This 934 * just re-inits the block as necessary. The actual 935 * gating still happens in the dpm code. We should 936 * revisit this when there is a cleaner line between 937 * the smc and the hw blocks 938 */ 939 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 940 941 if (state == AMD_PG_STATE_GATE) 942 return vce_v4_0_stop(adev); 943 else 944 return vce_v4_0_start(adev); 945 } 946 947 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, 948 struct amdgpu_ib *ib, uint32_t flags) 949 { 950 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 951 952 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 953 amdgpu_ring_write(ring, vmid); 954 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 955 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 956 amdgpu_ring_write(ring, ib->length_dw); 957 } 958 959 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 960 u64 seq, unsigned flags) 961 { 962 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 963 964 amdgpu_ring_write(ring, VCE_CMD_FENCE); 965 amdgpu_ring_write(ring, addr); 966 amdgpu_ring_write(ring, upper_32_bits(addr)); 967 amdgpu_ring_write(ring, seq); 968 amdgpu_ring_write(ring, VCE_CMD_TRAP); 969 } 970 971 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 972 { 973 amdgpu_ring_write(ring, VCE_CMD_END); 974 } 975 976 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 977 uint32_t val, uint32_t mask) 978 { 979 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 980 amdgpu_ring_write(ring, reg << 2); 981 amdgpu_ring_write(ring, mask); 982 amdgpu_ring_write(ring, val); 983 } 984 985 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 986 unsigned int vmid, uint64_t pd_addr) 987 { 988 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 989 990 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 991 992 /* wait for reg writes */ 993 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, 994 lower_32_bits(pd_addr), 0xffffffff); 995 } 996 997 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring, 998 uint32_t reg, uint32_t val) 999 { 1000 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 1001 amdgpu_ring_write(ring, reg << 2); 1002 amdgpu_ring_write(ring, val); 1003 } 1004 1005 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 1006 struct amdgpu_irq_src *source, 1007 unsigned type, 1008 enum amdgpu_interrupt_state state) 1009 { 1010 uint32_t val = 0; 1011 1012 if (!amdgpu_sriov_vf(adev)) { 1013 if (state == AMDGPU_IRQ_STATE_ENABLE) 1014 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 1015 1016 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 1017 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 1018 } 1019 return 0; 1020 } 1021 1022 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 1023 struct amdgpu_irq_src *source, 1024 struct amdgpu_iv_entry *entry) 1025 { 1026 DRM_DEBUG("IH: VCE\n"); 1027 1028 switch (entry->src_data[0]) { 1029 case 0: 1030 case 1: 1031 case 2: 1032 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 1033 break; 1034 default: 1035 DRM_ERROR("Unhandled interrupt: %d %d\n", 1036 entry->src_id, entry->src_data[0]); 1037 break; 1038 } 1039 1040 return 0; 1041 } 1042 1043 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 1044 .name = "vce_v4_0", 1045 .early_init = vce_v4_0_early_init, 1046 .late_init = NULL, 1047 .sw_init = vce_v4_0_sw_init, 1048 .sw_fini = vce_v4_0_sw_fini, 1049 .hw_init = vce_v4_0_hw_init, 1050 .hw_fini = vce_v4_0_hw_fini, 1051 .suspend = vce_v4_0_suspend, 1052 .resume = vce_v4_0_resume, 1053 .is_idle = NULL /* vce_v4_0_is_idle */, 1054 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1055 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1056 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1057 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1058 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1059 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1060 .set_powergating_state = vce_v4_0_set_powergating_state, 1061 }; 1062 1063 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1064 .type = AMDGPU_RING_TYPE_VCE, 1065 .align_mask = 0x3f, 1066 .nop = VCE_CMD_NO_OP, 1067 .support_64bit_ptrs = false, 1068 .no_user_fence = true, 1069 .vmhub = AMDGPU_MMHUB_0, 1070 .get_rptr = vce_v4_0_ring_get_rptr, 1071 .get_wptr = vce_v4_0_ring_get_wptr, 1072 .set_wptr = vce_v4_0_ring_set_wptr, 1073 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1074 .emit_frame_size = 1075 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1076 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + 1077 4 + /* vce_v4_0_emit_vm_flush */ 1078 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1079 1, /* vce_v4_0_ring_insert_end */ 1080 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1081 .emit_ib = vce_v4_0_ring_emit_ib, 1082 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1083 .emit_fence = vce_v4_0_ring_emit_fence, 1084 .test_ring = amdgpu_vce_ring_test_ring, 1085 .test_ib = amdgpu_vce_ring_test_ib, 1086 .insert_nop = amdgpu_ring_insert_nop, 1087 .insert_end = vce_v4_0_ring_insert_end, 1088 .pad_ib = amdgpu_ring_generic_pad_ib, 1089 .begin_use = amdgpu_vce_ring_begin_use, 1090 .end_use = amdgpu_vce_ring_end_use, 1091 .emit_wreg = vce_v4_0_emit_wreg, 1092 .emit_reg_wait = vce_v4_0_emit_reg_wait, 1093 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1094 }; 1095 1096 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1097 { 1098 int i; 1099 1100 for (i = 0; i < adev->vce.num_rings; i++) { 1101 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1102 adev->vce.ring[i].me = i; 1103 } 1104 DRM_INFO("VCE enabled in VM mode\n"); 1105 } 1106 1107 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1108 .set = vce_v4_0_set_interrupt_state, 1109 .process = vce_v4_0_process_interrupt, 1110 }; 1111 1112 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1113 { 1114 adev->vce.irq.num_types = 1; 1115 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1116 }; 1117 1118 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1119 { 1120 .type = AMD_IP_BLOCK_TYPE_VCE, 1121 .major = 4, 1122 .minor = 0, 1123 .rev = 0, 1124 .funcs = &vce_v4_0_ip_funcs, 1125 }; 1126