1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drmP.h> 29 #include "amdgpu.h" 30 #include "amdgpu_vce.h" 31 #include "soc15.h" 32 #include "soc15d.h" 33 #include "soc15_common.h" 34 #include "mmsch_v1_0.h" 35 36 #include "vce/vce_4_0_offset.h" 37 #include "vce/vce_4_0_default.h" 38 #include "vce/vce_4_0_sh_mask.h" 39 #include "mmhub/mmhub_1_0_offset.h" 40 #include "mmhub/mmhub_1_0_sh_mask.h" 41 42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h" 43 44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 45 46 #define VCE_V4_0_FW_SIZE (384 * 1024) 47 #define VCE_V4_0_STACK_SIZE (64 * 1024) 48 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 49 50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 53 54 /** 55 * vce_v4_0_ring_get_rptr - get read pointer 56 * 57 * @ring: amdgpu_ring pointer 58 * 59 * Returns the current hardware read pointer 60 */ 61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 62 { 63 struct amdgpu_device *adev = ring->adev; 64 65 if (ring->me == 0) 66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 67 else if (ring->me == 1) 68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 69 else 70 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 71 } 72 73 /** 74 * vce_v4_0_ring_get_wptr - get write pointer 75 * 76 * @ring: amdgpu_ring pointer 77 * 78 * Returns the current hardware write pointer 79 */ 80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 81 { 82 struct amdgpu_device *adev = ring->adev; 83 84 if (ring->use_doorbell) 85 return adev->wb.wb[ring->wptr_offs]; 86 87 if (ring->me == 0) 88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 89 else if (ring->me == 1) 90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 91 else 92 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 93 } 94 95 /** 96 * vce_v4_0_ring_set_wptr - set write pointer 97 * 98 * @ring: amdgpu_ring pointer 99 * 100 * Commits the write pointer to the hardware 101 */ 102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 103 { 104 struct amdgpu_device *adev = ring->adev; 105 106 if (ring->use_doorbell) { 107 /* XXX check if swapping is necessary on BE */ 108 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 109 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 110 return; 111 } 112 113 if (ring->me == 0) 114 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 115 lower_32_bits(ring->wptr)); 116 else if (ring->me == 1) 117 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 118 lower_32_bits(ring->wptr)); 119 else 120 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 121 lower_32_bits(ring->wptr)); 122 } 123 124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 125 { 126 int i, j; 127 128 for (i = 0; i < 10; ++i) { 129 for (j = 0; j < 100; ++j) { 130 uint32_t status = 131 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 132 133 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 134 return 0; 135 mdelay(10); 136 } 137 138 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 139 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 140 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 141 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 142 mdelay(10); 143 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 144 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 145 mdelay(10); 146 147 } 148 149 return -ETIMEDOUT; 150 } 151 152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 153 struct amdgpu_mm_table *table) 154 { 155 uint32_t data = 0, loop; 156 uint64_t addr = table->gpu_addr; 157 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 158 uint32_t size; 159 160 size = header->header_size + header->vce_table_size + header->uvd_table_size; 161 162 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 163 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 165 166 /* 2, update vmid of descriptor */ 167 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 168 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 169 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 170 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 171 172 /* 3, notify mmsch about the size of this descriptor */ 173 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 174 175 /* 4, set resp to zero */ 176 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 177 178 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); 179 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0; 180 adev->vce.ring[0].wptr = 0; 181 adev->vce.ring[0].wptr_old = 0; 182 183 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 184 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 185 186 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 187 loop = 1000; 188 while ((data & 0x10000002) != 0x10000002) { 189 udelay(10); 190 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 191 loop--; 192 if (!loop) 193 break; 194 } 195 196 if (!loop) { 197 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 198 return -EBUSY; 199 } 200 201 return 0; 202 } 203 204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 205 { 206 struct amdgpu_ring *ring; 207 uint32_t offset, size; 208 uint32_t table_size = 0; 209 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 210 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 211 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 212 struct mmsch_v1_0_cmd_end end = { { 0 } }; 213 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 214 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 215 216 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 217 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 218 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 219 end.cmd_header.command_type = MMSCH_COMMAND__END; 220 221 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 222 header->version = MMSCH_VERSION; 223 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 224 225 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 226 header->vce_table_offset = header->header_size; 227 else 228 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 229 230 init_table += header->vce_table_offset; 231 232 ring = &adev->vce.ring[0]; 233 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 234 lower_32_bits(ring->gpu_addr)); 235 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 236 upper_32_bits(ring->gpu_addr)); 237 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 238 ring->ring_size / 4); 239 240 /* BEGING OF MC_RESUME */ 241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 242 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 243 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 246 247 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 248 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 249 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 250 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 251 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 252 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 253 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); 254 } else { 255 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 256 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 257 adev->vce.gpu_addr >> 8); 258 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 259 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 260 (adev->vce.gpu_addr >> 40) & 0xff); 261 } 262 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 263 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 264 adev->vce.gpu_addr >> 8); 265 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 266 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), 267 (adev->vce.gpu_addr >> 40) & 0xff); 268 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 269 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 270 adev->vce.gpu_addr >> 8); 271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 272 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), 273 (adev->vce.gpu_addr >> 40) & 0xff); 274 275 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 276 size = VCE_V4_0_FW_SIZE; 277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 278 offset & ~0x0f000000); 279 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 280 281 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 282 size = VCE_V4_0_STACK_SIZE; 283 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 284 (offset & ~0x0f000000) | (1 << 24)); 285 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 286 287 offset += size; 288 size = VCE_V4_0_DATA_SIZE; 289 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 290 (offset & ~0x0f000000) | (2 << 24)); 291 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 292 293 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 294 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 295 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 296 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 297 298 /* end of MC_RESUME */ 299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 300 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 301 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 302 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 303 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 304 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 305 306 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 307 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 308 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 309 310 /* clear BUSY flag */ 311 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 312 ~VCE_STATUS__JOB_BUSY_MASK, 0); 313 314 /* add end packet */ 315 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 316 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 317 header->vce_table_size = table_size; 318 } 319 320 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 321 } 322 323 /** 324 * vce_v4_0_start - start VCE block 325 * 326 * @adev: amdgpu_device pointer 327 * 328 * Setup and start the VCE block 329 */ 330 static int vce_v4_0_start(struct amdgpu_device *adev) 331 { 332 struct amdgpu_ring *ring; 333 int r; 334 335 ring = &adev->vce.ring[0]; 336 337 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 338 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 339 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 340 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 341 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 342 343 ring = &adev->vce.ring[1]; 344 345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 347 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 348 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 349 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 350 351 ring = &adev->vce.ring[2]; 352 353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 355 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 356 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 357 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 358 359 vce_v4_0_mc_resume(adev); 360 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 361 ~VCE_STATUS__JOB_BUSY_MASK); 362 363 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 364 365 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 366 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 367 mdelay(100); 368 369 r = vce_v4_0_firmware_loaded(adev); 370 371 /* clear BUSY flag */ 372 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 373 374 if (r) { 375 DRM_ERROR("VCE not responding, giving up!!!\n"); 376 return r; 377 } 378 379 return 0; 380 } 381 382 static int vce_v4_0_stop(struct amdgpu_device *adev) 383 { 384 385 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 386 387 /* hold on ECPU */ 388 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 389 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 390 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 391 392 /* clear BUSY flag */ 393 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 394 395 /* Set Clock-Gating off */ 396 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 397 vce_v4_0_set_vce_sw_clock_gating(adev, false); 398 */ 399 400 return 0; 401 } 402 403 static int vce_v4_0_early_init(void *handle) 404 { 405 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 406 407 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 408 adev->vce.num_rings = 1; 409 else 410 adev->vce.num_rings = 3; 411 412 vce_v4_0_set_ring_funcs(adev); 413 vce_v4_0_set_irq_funcs(adev); 414 415 return 0; 416 } 417 418 static int vce_v4_0_sw_init(void *handle) 419 { 420 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 421 struct amdgpu_ring *ring; 422 423 unsigned size; 424 int r, i; 425 426 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 427 if (r) 428 return r; 429 430 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE; 431 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 432 size += VCE_V4_0_FW_SIZE; 433 434 r = amdgpu_vce_sw_init(adev, size); 435 if (r) 436 return r; 437 438 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 439 const struct common_firmware_header *hdr; 440 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 441 442 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL); 443 if (!adev->vce.saved_bo) 444 return -ENOMEM; 445 446 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 447 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 448 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 449 adev->firmware.fw_size += 450 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 451 DRM_INFO("PSP loading VCE firmware\n"); 452 } else { 453 r = amdgpu_vce_resume(adev); 454 if (r) 455 return r; 456 } 457 458 for (i = 0; i < adev->vce.num_rings; i++) { 459 ring = &adev->vce.ring[i]; 460 sprintf(ring->name, "vce%d", i); 461 if (amdgpu_sriov_vf(adev)) { 462 /* DOORBELL only works under SRIOV */ 463 ring->use_doorbell = true; 464 465 /* currently only use the first encoding ring for sriov, 466 * so set unused location for other unused rings. 467 */ 468 if (i == 0) 469 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2; 470 else 471 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1; 472 } 473 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 474 if (r) 475 return r; 476 } 477 478 479 r = amdgpu_vce_entity_init(adev); 480 if (r) 481 return r; 482 483 r = amdgpu_virt_alloc_mm_table(adev); 484 if (r) 485 return r; 486 487 return r; 488 } 489 490 static int vce_v4_0_sw_fini(void *handle) 491 { 492 int r; 493 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 494 495 /* free MM table */ 496 amdgpu_virt_free_mm_table(adev); 497 498 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 499 kvfree(adev->vce.saved_bo); 500 adev->vce.saved_bo = NULL; 501 } 502 503 r = amdgpu_vce_suspend(adev); 504 if (r) 505 return r; 506 507 return amdgpu_vce_sw_fini(adev); 508 } 509 510 static int vce_v4_0_hw_init(void *handle) 511 { 512 int r, i; 513 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 514 515 if (amdgpu_sriov_vf(adev)) 516 r = vce_v4_0_sriov_start(adev); 517 else 518 r = vce_v4_0_start(adev); 519 if (r) 520 return r; 521 522 for (i = 0; i < adev->vce.num_rings; i++) 523 adev->vce.ring[i].ready = false; 524 525 for (i = 0; i < adev->vce.num_rings; i++) { 526 r = amdgpu_ring_test_ring(&adev->vce.ring[i]); 527 if (r) 528 return r; 529 else 530 adev->vce.ring[i].ready = true; 531 } 532 533 DRM_INFO("VCE initialized successfully.\n"); 534 535 return 0; 536 } 537 538 static int vce_v4_0_hw_fini(void *handle) 539 { 540 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 541 int i; 542 543 if (!amdgpu_sriov_vf(adev)) { 544 /* vce_v4_0_wait_for_idle(handle); */ 545 vce_v4_0_stop(adev); 546 } else { 547 /* full access mode, so don't touch any VCE register */ 548 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 549 } 550 551 for (i = 0; i < adev->vce.num_rings; i++) 552 adev->vce.ring[i].ready = false; 553 554 return 0; 555 } 556 557 static int vce_v4_0_suspend(void *handle) 558 { 559 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 560 int r; 561 562 if (adev->vce.vcpu_bo == NULL) 563 return 0; 564 565 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 566 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 567 void *ptr = adev->vce.cpu_addr; 568 569 memcpy_fromio(adev->vce.saved_bo, ptr, size); 570 } 571 572 r = vce_v4_0_hw_fini(adev); 573 if (r) 574 return r; 575 576 return amdgpu_vce_suspend(adev); 577 } 578 579 static int vce_v4_0_resume(void *handle) 580 { 581 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 582 int r; 583 584 if (adev->vce.vcpu_bo == NULL) 585 return -EINVAL; 586 587 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 588 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 589 void *ptr = adev->vce.cpu_addr; 590 591 memcpy_toio(ptr, adev->vce.saved_bo, size); 592 } else { 593 r = amdgpu_vce_resume(adev); 594 if (r) 595 return r; 596 } 597 598 return vce_v4_0_hw_init(adev); 599 } 600 601 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 602 { 603 uint32_t offset, size; 604 uint64_t tmr_mc_addr; 605 606 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 607 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 608 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 609 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 610 611 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 612 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 613 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 614 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 615 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 616 617 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 618 619 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 620 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 | 621 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; 622 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 623 (tmr_mc_addr >> 8)); 624 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 625 (tmr_mc_addr >> 40) & 0xff); 626 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); 627 } else { 628 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 629 (adev->vce.gpu_addr >> 8)); 630 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 631 (adev->vce.gpu_addr >> 40) & 0xff); 632 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 633 } 634 635 size = VCE_V4_0_FW_SIZE; 636 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 637 638 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 639 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 640 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 641 size = VCE_V4_0_STACK_SIZE; 642 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 643 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 644 645 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 646 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 647 offset += size; 648 size = VCE_V4_0_DATA_SIZE; 649 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 650 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 651 652 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 653 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 654 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 655 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 656 } 657 658 static int vce_v4_0_set_clockgating_state(void *handle, 659 enum amd_clockgating_state state) 660 { 661 /* needed for driver unload*/ 662 return 0; 663 } 664 665 #if 0 666 static bool vce_v4_0_is_idle(void *handle) 667 { 668 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 669 u32 mask = 0; 670 671 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 672 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 673 674 return !(RREG32(mmSRBM_STATUS2) & mask); 675 } 676 677 static int vce_v4_0_wait_for_idle(void *handle) 678 { 679 unsigned i; 680 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 681 682 for (i = 0; i < adev->usec_timeout; i++) 683 if (vce_v4_0_is_idle(handle)) 684 return 0; 685 686 return -ETIMEDOUT; 687 } 688 689 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 690 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 691 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 692 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 693 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 694 695 static bool vce_v4_0_check_soft_reset(void *handle) 696 { 697 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 698 u32 srbm_soft_reset = 0; 699 700 /* According to VCE team , we should use VCE_STATUS instead 701 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 702 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 703 * instance's registers are accessed 704 * (0 for 1st instance, 10 for 2nd instance). 705 * 706 *VCE_STATUS 707 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 708 *|----+----+-----------+----+----+----+----------+---------+----| 709 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 710 * 711 * VCE team suggest use bit 3--bit 6 for busy status check 712 */ 713 mutex_lock(&adev->grbm_idx_mutex); 714 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 715 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 716 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 717 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 718 } 719 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 720 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 721 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 722 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 723 } 724 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 725 mutex_unlock(&adev->grbm_idx_mutex); 726 727 if (srbm_soft_reset) { 728 adev->vce.srbm_soft_reset = srbm_soft_reset; 729 return true; 730 } else { 731 adev->vce.srbm_soft_reset = 0; 732 return false; 733 } 734 } 735 736 static int vce_v4_0_soft_reset(void *handle) 737 { 738 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 739 u32 srbm_soft_reset; 740 741 if (!adev->vce.srbm_soft_reset) 742 return 0; 743 srbm_soft_reset = adev->vce.srbm_soft_reset; 744 745 if (srbm_soft_reset) { 746 u32 tmp; 747 748 tmp = RREG32(mmSRBM_SOFT_RESET); 749 tmp |= srbm_soft_reset; 750 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 751 WREG32(mmSRBM_SOFT_RESET, tmp); 752 tmp = RREG32(mmSRBM_SOFT_RESET); 753 754 udelay(50); 755 756 tmp &= ~srbm_soft_reset; 757 WREG32(mmSRBM_SOFT_RESET, tmp); 758 tmp = RREG32(mmSRBM_SOFT_RESET); 759 760 /* Wait a little for things to settle down */ 761 udelay(50); 762 } 763 764 return 0; 765 } 766 767 static int vce_v4_0_pre_soft_reset(void *handle) 768 { 769 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 770 771 if (!adev->vce.srbm_soft_reset) 772 return 0; 773 774 mdelay(5); 775 776 return vce_v4_0_suspend(adev); 777 } 778 779 780 static int vce_v4_0_post_soft_reset(void *handle) 781 { 782 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 783 784 if (!adev->vce.srbm_soft_reset) 785 return 0; 786 787 mdelay(5); 788 789 return vce_v4_0_resume(adev); 790 } 791 792 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 793 { 794 u32 tmp, data; 795 796 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 797 if (override) 798 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 799 else 800 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 801 802 if (tmp != data) 803 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 804 } 805 806 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 807 bool gated) 808 { 809 u32 data; 810 811 /* Set Override to disable Clock Gating */ 812 vce_v4_0_override_vce_clock_gating(adev, true); 813 814 /* This function enables MGCG which is controlled by firmware. 815 With the clocks in the gated state the core is still 816 accessible but the firmware will throttle the clocks on the 817 fly as necessary. 818 */ 819 if (gated) { 820 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 821 data |= 0x1ff; 822 data &= ~0xef0000; 823 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 824 825 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 826 data |= 0x3ff000; 827 data &= ~0xffc00000; 828 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 829 830 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 831 data |= 0x2; 832 data &= ~0x00010000; 833 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 834 835 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 836 data |= 0x37f; 837 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 838 839 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 840 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 841 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 842 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 843 0x8; 844 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 845 } else { 846 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 847 data &= ~0x80010; 848 data |= 0xe70008; 849 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 850 851 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 852 data |= 0xffc00000; 853 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 854 855 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 856 data |= 0x10000; 857 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 858 859 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 860 data &= ~0xffc00000; 861 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 862 863 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 864 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 865 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 866 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 867 0x8); 868 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 869 } 870 vce_v4_0_override_vce_clock_gating(adev, false); 871 } 872 873 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 874 { 875 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 876 877 if (enable) 878 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 879 else 880 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 881 882 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 883 } 884 885 static int vce_v4_0_set_clockgating_state(void *handle, 886 enum amd_clockgating_state state) 887 { 888 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 889 bool enable = (state == AMD_CG_STATE_GATE) ? true : false; 890 int i; 891 892 if ((adev->asic_type == CHIP_POLARIS10) || 893 (adev->asic_type == CHIP_TONGA) || 894 (adev->asic_type == CHIP_FIJI)) 895 vce_v4_0_set_bypass_mode(adev, enable); 896 897 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 898 return 0; 899 900 mutex_lock(&adev->grbm_idx_mutex); 901 for (i = 0; i < 2; i++) { 902 /* Program VCE Instance 0 or 1 if not harvested */ 903 if (adev->vce.harvest_config & (1 << i)) 904 continue; 905 906 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 907 908 if (enable) { 909 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 910 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 911 data &= ~(0xf | 0xff0); 912 data |= ((0x0 << 0) | (0x04 << 4)); 913 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 914 915 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 916 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 917 data &= ~(0xf | 0xff0); 918 data |= ((0x0 << 0) | (0x04 << 4)); 919 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 920 } 921 922 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 923 } 924 925 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 926 mutex_unlock(&adev->grbm_idx_mutex); 927 928 return 0; 929 } 930 931 static int vce_v4_0_set_powergating_state(void *handle, 932 enum amd_powergating_state state) 933 { 934 /* This doesn't actually powergate the VCE block. 935 * That's done in the dpm code via the SMC. This 936 * just re-inits the block as necessary. The actual 937 * gating still happens in the dpm code. We should 938 * revisit this when there is a cleaner line between 939 * the smc and the hw blocks 940 */ 941 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 942 943 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) 944 return 0; 945 946 if (state == AMD_PG_STATE_GATE) 947 /* XXX do we need a vce_v4_0_stop()? */ 948 return 0; 949 else 950 return vce_v4_0_start(adev); 951 } 952 #endif 953 954 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 955 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) 956 { 957 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 958 amdgpu_ring_write(ring, vmid); 959 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 960 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 961 amdgpu_ring_write(ring, ib->length_dw); 962 } 963 964 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 965 u64 seq, unsigned flags) 966 { 967 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 968 969 amdgpu_ring_write(ring, VCE_CMD_FENCE); 970 amdgpu_ring_write(ring, addr); 971 amdgpu_ring_write(ring, upper_32_bits(addr)); 972 amdgpu_ring_write(ring, seq); 973 amdgpu_ring_write(ring, VCE_CMD_TRAP); 974 } 975 976 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 977 { 978 amdgpu_ring_write(ring, VCE_CMD_END); 979 } 980 981 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 982 uint32_t val, uint32_t mask) 983 { 984 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 985 amdgpu_ring_write(ring, reg << 2); 986 amdgpu_ring_write(ring, mask); 987 amdgpu_ring_write(ring, val); 988 } 989 990 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 991 unsigned int vmid, uint64_t pd_addr) 992 { 993 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 994 995 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 996 997 /* wait for reg writes */ 998 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, 999 lower_32_bits(pd_addr), 0xffffffff); 1000 } 1001 1002 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring, 1003 uint32_t reg, uint32_t val) 1004 { 1005 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 1006 amdgpu_ring_write(ring, reg << 2); 1007 amdgpu_ring_write(ring, val); 1008 } 1009 1010 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 1011 struct amdgpu_irq_src *source, 1012 unsigned type, 1013 enum amdgpu_interrupt_state state) 1014 { 1015 uint32_t val = 0; 1016 1017 if (!amdgpu_sriov_vf(adev)) { 1018 if (state == AMDGPU_IRQ_STATE_ENABLE) 1019 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 1020 1021 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 1022 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 1023 } 1024 return 0; 1025 } 1026 1027 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 1028 struct amdgpu_irq_src *source, 1029 struct amdgpu_iv_entry *entry) 1030 { 1031 DRM_DEBUG("IH: VCE\n"); 1032 1033 switch (entry->src_data[0]) { 1034 case 0: 1035 case 1: 1036 case 2: 1037 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 1038 break; 1039 default: 1040 DRM_ERROR("Unhandled interrupt: %d %d\n", 1041 entry->src_id, entry->src_data[0]); 1042 break; 1043 } 1044 1045 return 0; 1046 } 1047 1048 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 1049 .name = "vce_v4_0", 1050 .early_init = vce_v4_0_early_init, 1051 .late_init = NULL, 1052 .sw_init = vce_v4_0_sw_init, 1053 .sw_fini = vce_v4_0_sw_fini, 1054 .hw_init = vce_v4_0_hw_init, 1055 .hw_fini = vce_v4_0_hw_fini, 1056 .suspend = vce_v4_0_suspend, 1057 .resume = vce_v4_0_resume, 1058 .is_idle = NULL /* vce_v4_0_is_idle */, 1059 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1060 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1061 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1062 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1063 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1064 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1065 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, 1066 }; 1067 1068 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1069 .type = AMDGPU_RING_TYPE_VCE, 1070 .align_mask = 0x3f, 1071 .nop = VCE_CMD_NO_OP, 1072 .support_64bit_ptrs = false, 1073 .vmhub = AMDGPU_MMHUB, 1074 .get_rptr = vce_v4_0_ring_get_rptr, 1075 .get_wptr = vce_v4_0_ring_get_wptr, 1076 .set_wptr = vce_v4_0_ring_set_wptr, 1077 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1078 .emit_frame_size = 1079 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1080 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + 1081 4 + /* vce_v4_0_emit_vm_flush */ 1082 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1083 1, /* vce_v4_0_ring_insert_end */ 1084 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1085 .emit_ib = vce_v4_0_ring_emit_ib, 1086 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1087 .emit_fence = vce_v4_0_ring_emit_fence, 1088 .test_ring = amdgpu_vce_ring_test_ring, 1089 .test_ib = amdgpu_vce_ring_test_ib, 1090 .insert_nop = amdgpu_ring_insert_nop, 1091 .insert_end = vce_v4_0_ring_insert_end, 1092 .pad_ib = amdgpu_ring_generic_pad_ib, 1093 .begin_use = amdgpu_vce_ring_begin_use, 1094 .end_use = amdgpu_vce_ring_end_use, 1095 .emit_wreg = vce_v4_0_emit_wreg, 1096 .emit_reg_wait = vce_v4_0_emit_reg_wait, 1097 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1098 }; 1099 1100 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1101 { 1102 int i; 1103 1104 for (i = 0; i < adev->vce.num_rings; i++) { 1105 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1106 adev->vce.ring[i].me = i; 1107 } 1108 DRM_INFO("VCE enabled in VM mode\n"); 1109 } 1110 1111 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1112 .set = vce_v4_0_set_interrupt_state, 1113 .process = vce_v4_0_process_interrupt, 1114 }; 1115 1116 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1117 { 1118 adev->vce.irq.num_types = 1; 1119 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1120 }; 1121 1122 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1123 { 1124 .type = AMD_IP_BLOCK_TYPE_VCE, 1125 .major = 4, 1126 .minor = 0, 1127 .rev = 0, 1128 .funcs = &vce_v4_0_ip_funcs, 1129 }; 1130