1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drmP.h> 29 #include "amdgpu.h" 30 #include "amdgpu_vce.h" 31 #include "soc15.h" 32 #include "soc15d.h" 33 #include "soc15_common.h" 34 #include "mmsch_v1_0.h" 35 36 #include "vce/vce_4_0_offset.h" 37 #include "vce/vce_4_0_default.h" 38 #include "vce/vce_4_0_sh_mask.h" 39 #include "mmhub/mmhub_1_0_offset.h" 40 #include "mmhub/mmhub_1_0_sh_mask.h" 41 42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h" 43 44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 45 46 #define VCE_V4_0_FW_SIZE (384 * 1024) 47 #define VCE_V4_0_STACK_SIZE (64 * 1024) 48 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 49 50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 53 54 /** 55 * vce_v4_0_ring_get_rptr - get read pointer 56 * 57 * @ring: amdgpu_ring pointer 58 * 59 * Returns the current hardware read pointer 60 */ 61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 62 { 63 struct amdgpu_device *adev = ring->adev; 64 65 if (ring->me == 0) 66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 67 else if (ring->me == 1) 68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 69 else 70 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 71 } 72 73 /** 74 * vce_v4_0_ring_get_wptr - get write pointer 75 * 76 * @ring: amdgpu_ring pointer 77 * 78 * Returns the current hardware write pointer 79 */ 80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 81 { 82 struct amdgpu_device *adev = ring->adev; 83 84 if (ring->use_doorbell) 85 return adev->wb.wb[ring->wptr_offs]; 86 87 if (ring->me == 0) 88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 89 else if (ring->me == 1) 90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 91 else 92 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 93 } 94 95 /** 96 * vce_v4_0_ring_set_wptr - set write pointer 97 * 98 * @ring: amdgpu_ring pointer 99 * 100 * Commits the write pointer to the hardware 101 */ 102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 103 { 104 struct amdgpu_device *adev = ring->adev; 105 106 if (ring->use_doorbell) { 107 /* XXX check if swapping is necessary on BE */ 108 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 109 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 110 return; 111 } 112 113 if (ring->me == 0) 114 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 115 lower_32_bits(ring->wptr)); 116 else if (ring->me == 1) 117 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 118 lower_32_bits(ring->wptr)); 119 else 120 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 121 lower_32_bits(ring->wptr)); 122 } 123 124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 125 { 126 int i, j; 127 128 for (i = 0; i < 10; ++i) { 129 for (j = 0; j < 100; ++j) { 130 uint32_t status = 131 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 132 133 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 134 return 0; 135 mdelay(10); 136 } 137 138 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 139 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 140 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 141 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 142 mdelay(10); 143 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 144 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 145 mdelay(10); 146 147 } 148 149 return -ETIMEDOUT; 150 } 151 152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 153 struct amdgpu_mm_table *table) 154 { 155 uint32_t data = 0, loop; 156 uint64_t addr = table->gpu_addr; 157 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 158 uint32_t size; 159 160 size = header->header_size + header->vce_table_size + header->uvd_table_size; 161 162 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 163 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 165 166 /* 2, update vmid of descriptor */ 167 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 168 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 169 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 170 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 171 172 /* 3, notify mmsch about the size of this descriptor */ 173 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 174 175 /* 4, set resp to zero */ 176 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 177 178 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); 179 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0; 180 adev->vce.ring[0].wptr = 0; 181 adev->vce.ring[0].wptr_old = 0; 182 183 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 184 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 185 186 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 187 loop = 1000; 188 while ((data & 0x10000002) != 0x10000002) { 189 udelay(10); 190 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 191 loop--; 192 if (!loop) 193 break; 194 } 195 196 if (!loop) { 197 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 198 return -EBUSY; 199 } 200 201 return 0; 202 } 203 204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 205 { 206 struct amdgpu_ring *ring; 207 uint32_t offset, size; 208 uint32_t table_size = 0; 209 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 210 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 211 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 212 struct mmsch_v1_0_cmd_end end = { { 0 } }; 213 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 214 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 215 216 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 217 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 218 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 219 end.cmd_header.command_type = MMSCH_COMMAND__END; 220 221 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 222 header->version = MMSCH_VERSION; 223 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 224 225 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 226 header->vce_table_offset = header->header_size; 227 else 228 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 229 230 init_table += header->vce_table_offset; 231 232 ring = &adev->vce.ring[0]; 233 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 234 lower_32_bits(ring->gpu_addr)); 235 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 236 upper_32_bits(ring->gpu_addr)); 237 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 238 ring->ring_size / 4); 239 240 /* BEGING OF MC_RESUME */ 241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 242 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 243 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 246 247 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 248 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 249 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 250 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 251 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 252 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 253 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); 254 } else { 255 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 256 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 257 adev->vce.gpu_addr >> 8); 258 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 259 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 260 (adev->vce.gpu_addr >> 40) & 0xff); 261 } 262 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 263 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 264 adev->vce.gpu_addr >> 8); 265 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 266 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), 267 (adev->vce.gpu_addr >> 40) & 0xff); 268 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 269 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 270 adev->vce.gpu_addr >> 8); 271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 272 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), 273 (adev->vce.gpu_addr >> 40) & 0xff); 274 275 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 276 size = VCE_V4_0_FW_SIZE; 277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 278 offset & ~0x0f000000); 279 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 280 281 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 282 size = VCE_V4_0_STACK_SIZE; 283 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 284 (offset & ~0x0f000000) | (1 << 24)); 285 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 286 287 offset += size; 288 size = VCE_V4_0_DATA_SIZE; 289 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 290 (offset & ~0x0f000000) | (2 << 24)); 291 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 292 293 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 294 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 295 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 296 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 297 298 /* end of MC_RESUME */ 299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 300 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 301 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 302 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 303 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 304 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 305 306 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 307 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 308 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 309 310 /* clear BUSY flag */ 311 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 312 ~VCE_STATUS__JOB_BUSY_MASK, 0); 313 314 /* add end packet */ 315 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 316 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 317 header->vce_table_size = table_size; 318 } 319 320 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 321 } 322 323 /** 324 * vce_v4_0_start - start VCE block 325 * 326 * @adev: amdgpu_device pointer 327 * 328 * Setup and start the VCE block 329 */ 330 static int vce_v4_0_start(struct amdgpu_device *adev) 331 { 332 struct amdgpu_ring *ring; 333 int r; 334 335 ring = &adev->vce.ring[0]; 336 337 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 338 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 339 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 340 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 341 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 342 343 ring = &adev->vce.ring[1]; 344 345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 347 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 348 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 349 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 350 351 ring = &adev->vce.ring[2]; 352 353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 355 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 356 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 357 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 358 359 vce_v4_0_mc_resume(adev); 360 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 361 ~VCE_STATUS__JOB_BUSY_MASK); 362 363 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 364 365 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 366 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 367 mdelay(100); 368 369 r = vce_v4_0_firmware_loaded(adev); 370 371 /* clear BUSY flag */ 372 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 373 374 if (r) { 375 DRM_ERROR("VCE not responding, giving up!!!\n"); 376 return r; 377 } 378 379 return 0; 380 } 381 382 static int vce_v4_0_stop(struct amdgpu_device *adev) 383 { 384 385 /* Disable VCPU */ 386 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 387 388 /* hold on ECPU */ 389 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 390 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 391 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 392 393 /* clear VCE_STATUS */ 394 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0); 395 396 /* Set Clock-Gating off */ 397 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 398 vce_v4_0_set_vce_sw_clock_gating(adev, false); 399 */ 400 401 return 0; 402 } 403 404 static int vce_v4_0_early_init(void *handle) 405 { 406 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 407 408 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 409 adev->vce.num_rings = 1; 410 else 411 adev->vce.num_rings = 3; 412 413 vce_v4_0_set_ring_funcs(adev); 414 vce_v4_0_set_irq_funcs(adev); 415 416 return 0; 417 } 418 419 static int vce_v4_0_sw_init(void *handle) 420 { 421 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 422 struct amdgpu_ring *ring; 423 424 unsigned size; 425 int r, i; 426 427 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 428 if (r) 429 return r; 430 431 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE; 432 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 433 size += VCE_V4_0_FW_SIZE; 434 435 r = amdgpu_vce_sw_init(adev, size); 436 if (r) 437 return r; 438 439 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 440 const struct common_firmware_header *hdr; 441 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 442 443 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL); 444 if (!adev->vce.saved_bo) 445 return -ENOMEM; 446 447 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 448 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 449 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 450 adev->firmware.fw_size += 451 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 452 DRM_INFO("PSP loading VCE firmware\n"); 453 } else { 454 r = amdgpu_vce_resume(adev); 455 if (r) 456 return r; 457 } 458 459 for (i = 0; i < adev->vce.num_rings; i++) { 460 ring = &adev->vce.ring[i]; 461 sprintf(ring->name, "vce%d", i); 462 if (amdgpu_sriov_vf(adev)) { 463 /* DOORBELL only works under SRIOV */ 464 ring->use_doorbell = true; 465 466 /* currently only use the first encoding ring for sriov, 467 * so set unused location for other unused rings. 468 */ 469 if (i == 0) 470 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2; 471 else 472 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1; 473 } 474 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 475 if (r) 476 return r; 477 } 478 479 480 r = amdgpu_vce_entity_init(adev); 481 if (r) 482 return r; 483 484 r = amdgpu_virt_alloc_mm_table(adev); 485 if (r) 486 return r; 487 488 return r; 489 } 490 491 static int vce_v4_0_sw_fini(void *handle) 492 { 493 int r; 494 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 495 496 /* free MM table */ 497 amdgpu_virt_free_mm_table(adev); 498 499 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 500 kvfree(adev->vce.saved_bo); 501 adev->vce.saved_bo = NULL; 502 } 503 504 r = amdgpu_vce_suspend(adev); 505 if (r) 506 return r; 507 508 return amdgpu_vce_sw_fini(adev); 509 } 510 511 static int vce_v4_0_hw_init(void *handle) 512 { 513 int r, i; 514 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 515 516 if (amdgpu_sriov_vf(adev)) 517 r = vce_v4_0_sriov_start(adev); 518 else 519 r = vce_v4_0_start(adev); 520 if (r) 521 return r; 522 523 for (i = 0; i < adev->vce.num_rings; i++) { 524 r = amdgpu_ring_test_helper(&adev->vce.ring[i]); 525 if (r) 526 return r; 527 } 528 529 DRM_INFO("VCE initialized successfully.\n"); 530 531 return 0; 532 } 533 534 static int vce_v4_0_hw_fini(void *handle) 535 { 536 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 537 int i; 538 539 if (!amdgpu_sriov_vf(adev)) { 540 /* vce_v4_0_wait_for_idle(handle); */ 541 vce_v4_0_stop(adev); 542 } else { 543 /* full access mode, so don't touch any VCE register */ 544 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 545 } 546 547 for (i = 0; i < adev->vce.num_rings; i++) 548 adev->vce.ring[i].sched.ready = false; 549 550 return 0; 551 } 552 553 static int vce_v4_0_suspend(void *handle) 554 { 555 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 556 int r; 557 558 if (adev->vce.vcpu_bo == NULL) 559 return 0; 560 561 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 562 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 563 void *ptr = adev->vce.cpu_addr; 564 565 memcpy_fromio(adev->vce.saved_bo, ptr, size); 566 } 567 568 r = vce_v4_0_hw_fini(adev); 569 if (r) 570 return r; 571 572 return amdgpu_vce_suspend(adev); 573 } 574 575 static int vce_v4_0_resume(void *handle) 576 { 577 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 578 int r; 579 580 if (adev->vce.vcpu_bo == NULL) 581 return -EINVAL; 582 583 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 584 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 585 void *ptr = adev->vce.cpu_addr; 586 587 memcpy_toio(ptr, adev->vce.saved_bo, size); 588 } else { 589 r = amdgpu_vce_resume(adev); 590 if (r) 591 return r; 592 } 593 594 return vce_v4_0_hw_init(adev); 595 } 596 597 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 598 { 599 uint32_t offset, size; 600 uint64_t tmr_mc_addr; 601 602 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 603 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 604 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 605 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 606 607 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 608 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 609 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 610 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 611 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 612 613 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 614 615 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 616 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 | 617 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; 618 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 619 (tmr_mc_addr >> 8)); 620 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 621 (tmr_mc_addr >> 40) & 0xff); 622 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); 623 } else { 624 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 625 (adev->vce.gpu_addr >> 8)); 626 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 627 (adev->vce.gpu_addr >> 40) & 0xff); 628 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 629 } 630 631 size = VCE_V4_0_FW_SIZE; 632 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 633 634 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 635 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 636 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 637 size = VCE_V4_0_STACK_SIZE; 638 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 639 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 640 641 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 642 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 643 offset += size; 644 size = VCE_V4_0_DATA_SIZE; 645 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 646 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 647 648 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 649 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 650 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 651 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 652 } 653 654 static int vce_v4_0_set_clockgating_state(void *handle, 655 enum amd_clockgating_state state) 656 { 657 /* needed for driver unload*/ 658 return 0; 659 } 660 661 #if 0 662 static bool vce_v4_0_is_idle(void *handle) 663 { 664 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 665 u32 mask = 0; 666 667 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 668 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 669 670 return !(RREG32(mmSRBM_STATUS2) & mask); 671 } 672 673 static int vce_v4_0_wait_for_idle(void *handle) 674 { 675 unsigned i; 676 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 677 678 for (i = 0; i < adev->usec_timeout; i++) 679 if (vce_v4_0_is_idle(handle)) 680 return 0; 681 682 return -ETIMEDOUT; 683 } 684 685 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 686 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 687 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 688 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 689 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 690 691 static bool vce_v4_0_check_soft_reset(void *handle) 692 { 693 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 694 u32 srbm_soft_reset = 0; 695 696 /* According to VCE team , we should use VCE_STATUS instead 697 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 698 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 699 * instance's registers are accessed 700 * (0 for 1st instance, 10 for 2nd instance). 701 * 702 *VCE_STATUS 703 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 704 *|----+----+-----------+----+----+----+----------+---------+----| 705 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 706 * 707 * VCE team suggest use bit 3--bit 6 for busy status check 708 */ 709 mutex_lock(&adev->grbm_idx_mutex); 710 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 711 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 712 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 713 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 714 } 715 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 716 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 717 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 718 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 719 } 720 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 721 mutex_unlock(&adev->grbm_idx_mutex); 722 723 if (srbm_soft_reset) { 724 adev->vce.srbm_soft_reset = srbm_soft_reset; 725 return true; 726 } else { 727 adev->vce.srbm_soft_reset = 0; 728 return false; 729 } 730 } 731 732 static int vce_v4_0_soft_reset(void *handle) 733 { 734 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 735 u32 srbm_soft_reset; 736 737 if (!adev->vce.srbm_soft_reset) 738 return 0; 739 srbm_soft_reset = adev->vce.srbm_soft_reset; 740 741 if (srbm_soft_reset) { 742 u32 tmp; 743 744 tmp = RREG32(mmSRBM_SOFT_RESET); 745 tmp |= srbm_soft_reset; 746 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 747 WREG32(mmSRBM_SOFT_RESET, tmp); 748 tmp = RREG32(mmSRBM_SOFT_RESET); 749 750 udelay(50); 751 752 tmp &= ~srbm_soft_reset; 753 WREG32(mmSRBM_SOFT_RESET, tmp); 754 tmp = RREG32(mmSRBM_SOFT_RESET); 755 756 /* Wait a little for things to settle down */ 757 udelay(50); 758 } 759 760 return 0; 761 } 762 763 static int vce_v4_0_pre_soft_reset(void *handle) 764 { 765 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 766 767 if (!adev->vce.srbm_soft_reset) 768 return 0; 769 770 mdelay(5); 771 772 return vce_v4_0_suspend(adev); 773 } 774 775 776 static int vce_v4_0_post_soft_reset(void *handle) 777 { 778 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 779 780 if (!adev->vce.srbm_soft_reset) 781 return 0; 782 783 mdelay(5); 784 785 return vce_v4_0_resume(adev); 786 } 787 788 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 789 { 790 u32 tmp, data; 791 792 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 793 if (override) 794 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 795 else 796 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 797 798 if (tmp != data) 799 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 800 } 801 802 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 803 bool gated) 804 { 805 u32 data; 806 807 /* Set Override to disable Clock Gating */ 808 vce_v4_0_override_vce_clock_gating(adev, true); 809 810 /* This function enables MGCG which is controlled by firmware. 811 With the clocks in the gated state the core is still 812 accessible but the firmware will throttle the clocks on the 813 fly as necessary. 814 */ 815 if (gated) { 816 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 817 data |= 0x1ff; 818 data &= ~0xef0000; 819 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 820 821 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 822 data |= 0x3ff000; 823 data &= ~0xffc00000; 824 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 825 826 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 827 data |= 0x2; 828 data &= ~0x00010000; 829 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 830 831 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 832 data |= 0x37f; 833 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 834 835 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 836 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 837 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 838 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 839 0x8; 840 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 841 } else { 842 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 843 data &= ~0x80010; 844 data |= 0xe70008; 845 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 846 847 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 848 data |= 0xffc00000; 849 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 850 851 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 852 data |= 0x10000; 853 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 854 855 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 856 data &= ~0xffc00000; 857 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 858 859 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 860 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 861 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 862 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 863 0x8); 864 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 865 } 866 vce_v4_0_override_vce_clock_gating(adev, false); 867 } 868 869 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 870 { 871 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 872 873 if (enable) 874 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 875 else 876 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 877 878 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 879 } 880 881 static int vce_v4_0_set_clockgating_state(void *handle, 882 enum amd_clockgating_state state) 883 { 884 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 885 bool enable = (state == AMD_CG_STATE_GATE) ? true : false; 886 int i; 887 888 if ((adev->asic_type == CHIP_POLARIS10) || 889 (adev->asic_type == CHIP_TONGA) || 890 (adev->asic_type == CHIP_FIJI)) 891 vce_v4_0_set_bypass_mode(adev, enable); 892 893 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 894 return 0; 895 896 mutex_lock(&adev->grbm_idx_mutex); 897 for (i = 0; i < 2; i++) { 898 /* Program VCE Instance 0 or 1 if not harvested */ 899 if (adev->vce.harvest_config & (1 << i)) 900 continue; 901 902 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 903 904 if (enable) { 905 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 906 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 907 data &= ~(0xf | 0xff0); 908 data |= ((0x0 << 0) | (0x04 << 4)); 909 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 910 911 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 912 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 913 data &= ~(0xf | 0xff0); 914 data |= ((0x0 << 0) | (0x04 << 4)); 915 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 916 } 917 918 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 919 } 920 921 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 922 mutex_unlock(&adev->grbm_idx_mutex); 923 924 return 0; 925 } 926 #endif 927 928 static int vce_v4_0_set_powergating_state(void *handle, 929 enum amd_powergating_state state) 930 { 931 /* This doesn't actually powergate the VCE block. 932 * That's done in the dpm code via the SMC. This 933 * just re-inits the block as necessary. The actual 934 * gating still happens in the dpm code. We should 935 * revisit this when there is a cleaner line between 936 * the smc and the hw blocks 937 */ 938 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 939 940 if (state == AMD_PG_STATE_GATE) 941 return vce_v4_0_stop(adev); 942 else 943 return vce_v4_0_start(adev); 944 } 945 946 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, 947 struct amdgpu_ib *ib, uint32_t flags) 948 { 949 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 950 951 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 952 amdgpu_ring_write(ring, vmid); 953 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 954 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 955 amdgpu_ring_write(ring, ib->length_dw); 956 } 957 958 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 959 u64 seq, unsigned flags) 960 { 961 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 962 963 amdgpu_ring_write(ring, VCE_CMD_FENCE); 964 amdgpu_ring_write(ring, addr); 965 amdgpu_ring_write(ring, upper_32_bits(addr)); 966 amdgpu_ring_write(ring, seq); 967 amdgpu_ring_write(ring, VCE_CMD_TRAP); 968 } 969 970 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 971 { 972 amdgpu_ring_write(ring, VCE_CMD_END); 973 } 974 975 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 976 uint32_t val, uint32_t mask) 977 { 978 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 979 amdgpu_ring_write(ring, reg << 2); 980 amdgpu_ring_write(ring, mask); 981 amdgpu_ring_write(ring, val); 982 } 983 984 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 985 unsigned int vmid, uint64_t pd_addr) 986 { 987 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 988 989 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 990 991 /* wait for reg writes */ 992 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, 993 lower_32_bits(pd_addr), 0xffffffff); 994 } 995 996 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring, 997 uint32_t reg, uint32_t val) 998 { 999 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 1000 amdgpu_ring_write(ring, reg << 2); 1001 amdgpu_ring_write(ring, val); 1002 } 1003 1004 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 1005 struct amdgpu_irq_src *source, 1006 unsigned type, 1007 enum amdgpu_interrupt_state state) 1008 { 1009 uint32_t val = 0; 1010 1011 if (!amdgpu_sriov_vf(adev)) { 1012 if (state == AMDGPU_IRQ_STATE_ENABLE) 1013 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 1014 1015 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 1016 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 1017 } 1018 return 0; 1019 } 1020 1021 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 1022 struct amdgpu_irq_src *source, 1023 struct amdgpu_iv_entry *entry) 1024 { 1025 DRM_DEBUG("IH: VCE\n"); 1026 1027 switch (entry->src_data[0]) { 1028 case 0: 1029 case 1: 1030 case 2: 1031 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 1032 break; 1033 default: 1034 DRM_ERROR("Unhandled interrupt: %d %d\n", 1035 entry->src_id, entry->src_data[0]); 1036 break; 1037 } 1038 1039 return 0; 1040 } 1041 1042 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 1043 .name = "vce_v4_0", 1044 .early_init = vce_v4_0_early_init, 1045 .late_init = NULL, 1046 .sw_init = vce_v4_0_sw_init, 1047 .sw_fini = vce_v4_0_sw_fini, 1048 .hw_init = vce_v4_0_hw_init, 1049 .hw_fini = vce_v4_0_hw_fini, 1050 .suspend = vce_v4_0_suspend, 1051 .resume = vce_v4_0_resume, 1052 .is_idle = NULL /* vce_v4_0_is_idle */, 1053 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1054 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1055 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1056 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1057 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1058 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1059 .set_powergating_state = vce_v4_0_set_powergating_state, 1060 }; 1061 1062 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1063 .type = AMDGPU_RING_TYPE_VCE, 1064 .align_mask = 0x3f, 1065 .nop = VCE_CMD_NO_OP, 1066 .support_64bit_ptrs = false, 1067 .vmhub = AMDGPU_MMHUB, 1068 .get_rptr = vce_v4_0_ring_get_rptr, 1069 .get_wptr = vce_v4_0_ring_get_wptr, 1070 .set_wptr = vce_v4_0_ring_set_wptr, 1071 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1072 .emit_frame_size = 1073 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1074 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + 1075 4 + /* vce_v4_0_emit_vm_flush */ 1076 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1077 1, /* vce_v4_0_ring_insert_end */ 1078 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1079 .emit_ib = vce_v4_0_ring_emit_ib, 1080 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1081 .emit_fence = vce_v4_0_ring_emit_fence, 1082 .test_ring = amdgpu_vce_ring_test_ring, 1083 .test_ib = amdgpu_vce_ring_test_ib, 1084 .insert_nop = amdgpu_ring_insert_nop, 1085 .insert_end = vce_v4_0_ring_insert_end, 1086 .pad_ib = amdgpu_ring_generic_pad_ib, 1087 .begin_use = amdgpu_vce_ring_begin_use, 1088 .end_use = amdgpu_vce_ring_end_use, 1089 .emit_wreg = vce_v4_0_emit_wreg, 1090 .emit_reg_wait = vce_v4_0_emit_reg_wait, 1091 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1092 }; 1093 1094 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1095 { 1096 int i; 1097 1098 for (i = 0; i < adev->vce.num_rings; i++) { 1099 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1100 adev->vce.ring[i].me = i; 1101 } 1102 DRM_INFO("VCE enabled in VM mode\n"); 1103 } 1104 1105 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1106 .set = vce_v4_0_set_interrupt_state, 1107 .process = vce_v4_0_process_interrupt, 1108 }; 1109 1110 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1111 { 1112 adev->vce.irq.num_types = 1; 1113 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1114 }; 1115 1116 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1117 { 1118 .type = AMD_IP_BLOCK_TYPE_VCE, 1119 .major = 4, 1120 .minor = 0, 1121 .rev = 0, 1122 .funcs = &vce_v4_0_ip_funcs, 1123 }; 1124