1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drmP.h> 29 #include "amdgpu.h" 30 #include "amdgpu_vce.h" 31 #include "soc15.h" 32 #include "soc15d.h" 33 #include "soc15_common.h" 34 #include "mmsch_v1_0.h" 35 36 #include "vce/vce_4_0_offset.h" 37 #include "vce/vce_4_0_default.h" 38 #include "vce/vce_4_0_sh_mask.h" 39 #include "mmhub/mmhub_1_0_offset.h" 40 #include "mmhub/mmhub_1_0_sh_mask.h" 41 42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 43 44 #define VCE_V4_0_FW_SIZE (384 * 1024) 45 #define VCE_V4_0_STACK_SIZE (64 * 1024) 46 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 47 48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 51 52 /** 53 * vce_v4_0_ring_get_rptr - get read pointer 54 * 55 * @ring: amdgpu_ring pointer 56 * 57 * Returns the current hardware read pointer 58 */ 59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 60 { 61 struct amdgpu_device *adev = ring->adev; 62 63 if (ring == &adev->vce.ring[0]) 64 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 65 else if (ring == &adev->vce.ring[1]) 66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 67 else 68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 69 } 70 71 /** 72 * vce_v4_0_ring_get_wptr - get write pointer 73 * 74 * @ring: amdgpu_ring pointer 75 * 76 * Returns the current hardware write pointer 77 */ 78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 79 { 80 struct amdgpu_device *adev = ring->adev; 81 82 if (ring->use_doorbell) 83 return adev->wb.wb[ring->wptr_offs]; 84 85 if (ring == &adev->vce.ring[0]) 86 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 87 else if (ring == &adev->vce.ring[1]) 88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 89 else 90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 91 } 92 93 /** 94 * vce_v4_0_ring_set_wptr - set write pointer 95 * 96 * @ring: amdgpu_ring pointer 97 * 98 * Commits the write pointer to the hardware 99 */ 100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 101 { 102 struct amdgpu_device *adev = ring->adev; 103 104 if (ring->use_doorbell) { 105 /* XXX check if swapping is necessary on BE */ 106 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 107 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 108 return; 109 } 110 111 if (ring == &adev->vce.ring[0]) 112 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 113 lower_32_bits(ring->wptr)); 114 else if (ring == &adev->vce.ring[1]) 115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 116 lower_32_bits(ring->wptr)); 117 else 118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 119 lower_32_bits(ring->wptr)); 120 } 121 122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 123 { 124 int i, j; 125 126 for (i = 0; i < 10; ++i) { 127 for (j = 0; j < 100; ++j) { 128 uint32_t status = 129 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 130 131 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 132 return 0; 133 mdelay(10); 134 } 135 136 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 137 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 138 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 139 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 140 mdelay(10); 141 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 143 mdelay(10); 144 145 } 146 147 return -ETIMEDOUT; 148 } 149 150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 151 struct amdgpu_mm_table *table) 152 { 153 uint32_t data = 0, loop; 154 uint64_t addr = table->gpu_addr; 155 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 156 uint32_t size; 157 158 size = header->header_size + header->vce_table_size + header->uvd_table_size; 159 160 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 161 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 162 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 163 164 /* 2, update vmid of descriptor */ 165 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 166 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 167 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 168 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 169 170 /* 3, notify mmsch about the size of this descriptor */ 171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 172 173 /* 4, set resp to zero */ 174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 175 176 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); 177 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0; 178 adev->vce.ring[0].wptr = 0; 179 adev->vce.ring[0].wptr_old = 0; 180 181 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 182 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 183 184 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 185 loop = 1000; 186 while ((data & 0x10000002) != 0x10000002) { 187 udelay(10); 188 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 189 loop--; 190 if (!loop) 191 break; 192 } 193 194 if (!loop) { 195 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 196 return -EBUSY; 197 } 198 199 return 0; 200 } 201 202 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 203 { 204 struct amdgpu_ring *ring; 205 uint32_t offset, size; 206 uint32_t table_size = 0; 207 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 208 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 209 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 210 struct mmsch_v1_0_cmd_end end = { { 0 } }; 211 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 212 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 213 214 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 215 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 216 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 217 end.cmd_header.command_type = MMSCH_COMMAND__END; 218 219 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 220 header->version = MMSCH_VERSION; 221 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 222 223 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 224 header->vce_table_offset = header->header_size; 225 else 226 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 227 228 init_table += header->vce_table_offset; 229 230 ring = &adev->vce.ring[0]; 231 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 232 lower_32_bits(ring->gpu_addr)); 233 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 234 upper_32_bits(ring->gpu_addr)); 235 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 236 ring->ring_size / 4); 237 238 /* BEGING OF MC_RESUME */ 239 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 240 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 243 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 244 245 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 246 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 247 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 248 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 249 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 250 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 251 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); 252 } else { 253 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 254 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 255 adev->vce.gpu_addr >> 8); 256 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 257 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 258 (adev->vce.gpu_addr >> 40) & 0xff); 259 } 260 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 261 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 262 adev->vce.gpu_addr >> 8); 263 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 264 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), 265 (adev->vce.gpu_addr >> 40) & 0xff); 266 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 267 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 268 adev->vce.gpu_addr >> 8); 269 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 270 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), 271 (adev->vce.gpu_addr >> 40) & 0xff); 272 273 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 274 size = VCE_V4_0_FW_SIZE; 275 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 276 offset & ~0x0f000000); 277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 278 279 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 280 size = VCE_V4_0_STACK_SIZE; 281 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 282 (offset & ~0x0f000000) | (1 << 24)); 283 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 284 285 offset += size; 286 size = VCE_V4_0_DATA_SIZE; 287 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 288 (offset & ~0x0f000000) | (2 << 24)); 289 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 290 291 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 292 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 293 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 294 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 295 296 /* end of MC_RESUME */ 297 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 298 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 300 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 301 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 302 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 303 304 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 305 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 306 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 307 308 /* clear BUSY flag */ 309 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 310 ~VCE_STATUS__JOB_BUSY_MASK, 0); 311 312 /* add end packet */ 313 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 314 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 315 header->vce_table_size = table_size; 316 } 317 318 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 319 } 320 321 /** 322 * vce_v4_0_start - start VCE block 323 * 324 * @adev: amdgpu_device pointer 325 * 326 * Setup and start the VCE block 327 */ 328 static int vce_v4_0_start(struct amdgpu_device *adev) 329 { 330 struct amdgpu_ring *ring; 331 int r; 332 333 ring = &adev->vce.ring[0]; 334 335 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 336 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 337 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 338 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 339 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 340 341 ring = &adev->vce.ring[1]; 342 343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 347 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 348 349 ring = &adev->vce.ring[2]; 350 351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 355 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 356 357 vce_v4_0_mc_resume(adev); 358 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 359 ~VCE_STATUS__JOB_BUSY_MASK); 360 361 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 362 363 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 364 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 365 mdelay(100); 366 367 r = vce_v4_0_firmware_loaded(adev); 368 369 /* clear BUSY flag */ 370 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 371 372 if (r) { 373 DRM_ERROR("VCE not responding, giving up!!!\n"); 374 return r; 375 } 376 377 return 0; 378 } 379 380 static int vce_v4_0_stop(struct amdgpu_device *adev) 381 { 382 383 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 384 385 /* hold on ECPU */ 386 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 387 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 388 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 389 390 /* clear BUSY flag */ 391 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 392 393 /* Set Clock-Gating off */ 394 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 395 vce_v4_0_set_vce_sw_clock_gating(adev, false); 396 */ 397 398 return 0; 399 } 400 401 static int vce_v4_0_early_init(void *handle) 402 { 403 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 404 405 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 406 adev->vce.num_rings = 1; 407 else 408 adev->vce.num_rings = 3; 409 410 vce_v4_0_set_ring_funcs(adev); 411 vce_v4_0_set_irq_funcs(adev); 412 413 return 0; 414 } 415 416 static int vce_v4_0_sw_init(void *handle) 417 { 418 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 419 struct amdgpu_ring *ring; 420 unsigned size; 421 int r, i; 422 423 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 424 if (r) 425 return r; 426 427 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE; 428 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 429 size += VCE_V4_0_FW_SIZE; 430 431 r = amdgpu_vce_sw_init(adev, size); 432 if (r) 433 return r; 434 435 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 436 const struct common_firmware_header *hdr; 437 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 438 439 adev->vce.saved_bo = kmalloc(size, GFP_KERNEL); 440 if (!adev->vce.saved_bo) 441 return -ENOMEM; 442 443 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 444 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 445 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 446 adev->firmware.fw_size += 447 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 448 DRM_INFO("PSP loading VCE firmware\n"); 449 } else { 450 r = amdgpu_vce_resume(adev); 451 if (r) 452 return r; 453 } 454 455 for (i = 0; i < adev->vce.num_rings; i++) { 456 ring = &adev->vce.ring[i]; 457 sprintf(ring->name, "vce%d", i); 458 if (amdgpu_sriov_vf(adev)) { 459 /* DOORBELL only works under SRIOV */ 460 ring->use_doorbell = true; 461 462 /* currently only use the first encoding ring for sriov, 463 * so set unused location for other unused rings. 464 */ 465 if (i == 0) 466 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2; 467 else 468 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1; 469 } 470 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 471 if (r) 472 return r; 473 } 474 475 r = amdgpu_virt_alloc_mm_table(adev); 476 if (r) 477 return r; 478 479 return r; 480 } 481 482 static int vce_v4_0_sw_fini(void *handle) 483 { 484 int r; 485 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 486 487 /* free MM table */ 488 amdgpu_virt_free_mm_table(adev); 489 490 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 491 kfree(adev->vce.saved_bo); 492 adev->vce.saved_bo = NULL; 493 } 494 495 r = amdgpu_vce_suspend(adev); 496 if (r) 497 return r; 498 499 return amdgpu_vce_sw_fini(adev); 500 } 501 502 static int vce_v4_0_hw_init(void *handle) 503 { 504 int r, i; 505 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 506 507 if (amdgpu_sriov_vf(adev)) 508 r = vce_v4_0_sriov_start(adev); 509 else 510 r = vce_v4_0_start(adev); 511 if (r) 512 return r; 513 514 for (i = 0; i < adev->vce.num_rings; i++) 515 adev->vce.ring[i].ready = false; 516 517 for (i = 0; i < adev->vce.num_rings; i++) { 518 r = amdgpu_ring_test_ring(&adev->vce.ring[i]); 519 if (r) 520 return r; 521 else 522 adev->vce.ring[i].ready = true; 523 } 524 525 DRM_INFO("VCE initialized successfully.\n"); 526 527 return 0; 528 } 529 530 static int vce_v4_0_hw_fini(void *handle) 531 { 532 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 533 int i; 534 535 if (!amdgpu_sriov_vf(adev)) { 536 /* vce_v4_0_wait_for_idle(handle); */ 537 vce_v4_0_stop(adev); 538 } else { 539 /* full access mode, so don't touch any VCE register */ 540 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 541 } 542 543 for (i = 0; i < adev->vce.num_rings; i++) 544 adev->vce.ring[i].ready = false; 545 546 return 0; 547 } 548 549 static int vce_v4_0_suspend(void *handle) 550 { 551 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 552 int r; 553 554 if (adev->vce.vcpu_bo == NULL) 555 return 0; 556 557 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 558 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 559 void *ptr = adev->vce.cpu_addr; 560 561 memcpy_fromio(adev->vce.saved_bo, ptr, size); 562 } 563 564 r = vce_v4_0_hw_fini(adev); 565 if (r) 566 return r; 567 568 return amdgpu_vce_suspend(adev); 569 } 570 571 static int vce_v4_0_resume(void *handle) 572 { 573 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 574 int r; 575 576 if (adev->vce.vcpu_bo == NULL) 577 return -EINVAL; 578 579 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 580 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 581 void *ptr = adev->vce.cpu_addr; 582 583 memcpy_toio(ptr, adev->vce.saved_bo, size); 584 } else { 585 r = amdgpu_vce_resume(adev); 586 if (r) 587 return r; 588 } 589 590 return vce_v4_0_hw_init(adev); 591 } 592 593 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 594 { 595 uint32_t offset, size; 596 597 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 598 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 599 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 600 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 601 602 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 603 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 604 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 605 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 606 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 607 608 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 609 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 610 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8)); 611 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 612 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); 613 } else { 614 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 615 (adev->vce.gpu_addr >> 8)); 616 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 617 (adev->vce.gpu_addr >> 40) & 0xff); 618 } 619 620 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 621 size = VCE_V4_0_FW_SIZE; 622 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 623 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 624 625 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 626 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 627 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 628 size = VCE_V4_0_STACK_SIZE; 629 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 630 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 631 632 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 633 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 634 offset += size; 635 size = VCE_V4_0_DATA_SIZE; 636 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 637 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 638 639 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 640 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 641 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 642 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 643 } 644 645 static int vce_v4_0_set_clockgating_state(void *handle, 646 enum amd_clockgating_state state) 647 { 648 /* needed for driver unload*/ 649 return 0; 650 } 651 652 #if 0 653 static bool vce_v4_0_is_idle(void *handle) 654 { 655 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 656 u32 mask = 0; 657 658 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 659 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 660 661 return !(RREG32(mmSRBM_STATUS2) & mask); 662 } 663 664 static int vce_v4_0_wait_for_idle(void *handle) 665 { 666 unsigned i; 667 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 668 669 for (i = 0; i < adev->usec_timeout; i++) 670 if (vce_v4_0_is_idle(handle)) 671 return 0; 672 673 return -ETIMEDOUT; 674 } 675 676 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 677 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 678 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 679 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 680 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 681 682 static bool vce_v4_0_check_soft_reset(void *handle) 683 { 684 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 685 u32 srbm_soft_reset = 0; 686 687 /* According to VCE team , we should use VCE_STATUS instead 688 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 689 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 690 * instance's registers are accessed 691 * (0 for 1st instance, 10 for 2nd instance). 692 * 693 *VCE_STATUS 694 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 695 *|----+----+-----------+----+----+----+----------+---------+----| 696 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 697 * 698 * VCE team suggest use bit 3--bit 6 for busy status check 699 */ 700 mutex_lock(&adev->grbm_idx_mutex); 701 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 702 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 703 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 704 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 705 } 706 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 707 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 708 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 709 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 710 } 711 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 712 mutex_unlock(&adev->grbm_idx_mutex); 713 714 if (srbm_soft_reset) { 715 adev->vce.srbm_soft_reset = srbm_soft_reset; 716 return true; 717 } else { 718 adev->vce.srbm_soft_reset = 0; 719 return false; 720 } 721 } 722 723 static int vce_v4_0_soft_reset(void *handle) 724 { 725 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 726 u32 srbm_soft_reset; 727 728 if (!adev->vce.srbm_soft_reset) 729 return 0; 730 srbm_soft_reset = adev->vce.srbm_soft_reset; 731 732 if (srbm_soft_reset) { 733 u32 tmp; 734 735 tmp = RREG32(mmSRBM_SOFT_RESET); 736 tmp |= srbm_soft_reset; 737 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 738 WREG32(mmSRBM_SOFT_RESET, tmp); 739 tmp = RREG32(mmSRBM_SOFT_RESET); 740 741 udelay(50); 742 743 tmp &= ~srbm_soft_reset; 744 WREG32(mmSRBM_SOFT_RESET, tmp); 745 tmp = RREG32(mmSRBM_SOFT_RESET); 746 747 /* Wait a little for things to settle down */ 748 udelay(50); 749 } 750 751 return 0; 752 } 753 754 static int vce_v4_0_pre_soft_reset(void *handle) 755 { 756 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 757 758 if (!adev->vce.srbm_soft_reset) 759 return 0; 760 761 mdelay(5); 762 763 return vce_v4_0_suspend(adev); 764 } 765 766 767 static int vce_v4_0_post_soft_reset(void *handle) 768 { 769 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 770 771 if (!adev->vce.srbm_soft_reset) 772 return 0; 773 774 mdelay(5); 775 776 return vce_v4_0_resume(adev); 777 } 778 779 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 780 { 781 u32 tmp, data; 782 783 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 784 if (override) 785 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 786 else 787 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 788 789 if (tmp != data) 790 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 791 } 792 793 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 794 bool gated) 795 { 796 u32 data; 797 798 /* Set Override to disable Clock Gating */ 799 vce_v4_0_override_vce_clock_gating(adev, true); 800 801 /* This function enables MGCG which is controlled by firmware. 802 With the clocks in the gated state the core is still 803 accessible but the firmware will throttle the clocks on the 804 fly as necessary. 805 */ 806 if (gated) { 807 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 808 data |= 0x1ff; 809 data &= ~0xef0000; 810 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 811 812 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 813 data |= 0x3ff000; 814 data &= ~0xffc00000; 815 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 816 817 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 818 data |= 0x2; 819 data &= ~0x00010000; 820 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 821 822 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 823 data |= 0x37f; 824 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 825 826 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 827 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 828 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 829 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 830 0x8; 831 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 832 } else { 833 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 834 data &= ~0x80010; 835 data |= 0xe70008; 836 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 837 838 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 839 data |= 0xffc00000; 840 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 841 842 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 843 data |= 0x10000; 844 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 845 846 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 847 data &= ~0xffc00000; 848 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 849 850 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 851 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 852 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 853 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 854 0x8); 855 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 856 } 857 vce_v4_0_override_vce_clock_gating(adev, false); 858 } 859 860 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 861 { 862 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 863 864 if (enable) 865 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 866 else 867 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 868 869 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 870 } 871 872 static int vce_v4_0_set_clockgating_state(void *handle, 873 enum amd_clockgating_state state) 874 { 875 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 876 bool enable = (state == AMD_CG_STATE_GATE) ? true : false; 877 int i; 878 879 if ((adev->asic_type == CHIP_POLARIS10) || 880 (adev->asic_type == CHIP_TONGA) || 881 (adev->asic_type == CHIP_FIJI)) 882 vce_v4_0_set_bypass_mode(adev, enable); 883 884 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 885 return 0; 886 887 mutex_lock(&adev->grbm_idx_mutex); 888 for (i = 0; i < 2; i++) { 889 /* Program VCE Instance 0 or 1 if not harvested */ 890 if (adev->vce.harvest_config & (1 << i)) 891 continue; 892 893 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 894 895 if (enable) { 896 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 897 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 898 data &= ~(0xf | 0xff0); 899 data |= ((0x0 << 0) | (0x04 << 4)); 900 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 901 902 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 903 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 904 data &= ~(0xf | 0xff0); 905 data |= ((0x0 << 0) | (0x04 << 4)); 906 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 907 } 908 909 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 910 } 911 912 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 913 mutex_unlock(&adev->grbm_idx_mutex); 914 915 return 0; 916 } 917 918 static int vce_v4_0_set_powergating_state(void *handle, 919 enum amd_powergating_state state) 920 { 921 /* This doesn't actually powergate the VCE block. 922 * That's done in the dpm code via the SMC. This 923 * just re-inits the block as necessary. The actual 924 * gating still happens in the dpm code. We should 925 * revisit this when there is a cleaner line between 926 * the smc and the hw blocks 927 */ 928 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 929 930 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) 931 return 0; 932 933 if (state == AMD_PG_STATE_GATE) 934 /* XXX do we need a vce_v4_0_stop()? */ 935 return 0; 936 else 937 return vce_v4_0_start(adev); 938 } 939 #endif 940 941 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 942 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) 943 { 944 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 945 amdgpu_ring_write(ring, vmid); 946 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 947 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 948 amdgpu_ring_write(ring, ib->length_dw); 949 } 950 951 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 952 u64 seq, unsigned flags) 953 { 954 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 955 956 amdgpu_ring_write(ring, VCE_CMD_FENCE); 957 amdgpu_ring_write(ring, addr); 958 amdgpu_ring_write(ring, upper_32_bits(addr)); 959 amdgpu_ring_write(ring, seq); 960 amdgpu_ring_write(ring, VCE_CMD_TRAP); 961 } 962 963 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 964 { 965 amdgpu_ring_write(ring, VCE_CMD_END); 966 } 967 968 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 969 uint32_t val, uint32_t mask) 970 { 971 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 972 amdgpu_ring_write(ring, reg << 2); 973 amdgpu_ring_write(ring, mask); 974 amdgpu_ring_write(ring, val); 975 } 976 977 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 978 unsigned int vmid, uint64_t pd_addr) 979 { 980 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 981 982 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 983 984 /* wait for reg writes */ 985 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, 986 lower_32_bits(pd_addr), 0xffffffff); 987 } 988 989 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring, 990 uint32_t reg, uint32_t val) 991 { 992 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 993 amdgpu_ring_write(ring, reg << 2); 994 amdgpu_ring_write(ring, val); 995 } 996 997 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 998 struct amdgpu_irq_src *source, 999 unsigned type, 1000 enum amdgpu_interrupt_state state) 1001 { 1002 uint32_t val = 0; 1003 1004 if (!amdgpu_sriov_vf(adev)) { 1005 if (state == AMDGPU_IRQ_STATE_ENABLE) 1006 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 1007 1008 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 1009 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 1010 } 1011 return 0; 1012 } 1013 1014 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 1015 struct amdgpu_irq_src *source, 1016 struct amdgpu_iv_entry *entry) 1017 { 1018 DRM_DEBUG("IH: VCE\n"); 1019 1020 switch (entry->src_data[0]) { 1021 case 0: 1022 case 1: 1023 case 2: 1024 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 1025 break; 1026 default: 1027 DRM_ERROR("Unhandled interrupt: %d %d\n", 1028 entry->src_id, entry->src_data[0]); 1029 break; 1030 } 1031 1032 return 0; 1033 } 1034 1035 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 1036 .name = "vce_v4_0", 1037 .early_init = vce_v4_0_early_init, 1038 .late_init = NULL, 1039 .sw_init = vce_v4_0_sw_init, 1040 .sw_fini = vce_v4_0_sw_fini, 1041 .hw_init = vce_v4_0_hw_init, 1042 .hw_fini = vce_v4_0_hw_fini, 1043 .suspend = vce_v4_0_suspend, 1044 .resume = vce_v4_0_resume, 1045 .is_idle = NULL /* vce_v4_0_is_idle */, 1046 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1047 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1048 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1049 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1050 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1051 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1052 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, 1053 }; 1054 1055 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1056 .type = AMDGPU_RING_TYPE_VCE, 1057 .align_mask = 0x3f, 1058 .nop = VCE_CMD_NO_OP, 1059 .support_64bit_ptrs = false, 1060 .vmhub = AMDGPU_MMHUB, 1061 .get_rptr = vce_v4_0_ring_get_rptr, 1062 .get_wptr = vce_v4_0_ring_get_wptr, 1063 .set_wptr = vce_v4_0_ring_set_wptr, 1064 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1065 .emit_frame_size = 1066 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1067 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + 1068 4 + /* vce_v4_0_emit_vm_flush */ 1069 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1070 1, /* vce_v4_0_ring_insert_end */ 1071 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1072 .emit_ib = vce_v4_0_ring_emit_ib, 1073 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1074 .emit_fence = vce_v4_0_ring_emit_fence, 1075 .test_ring = amdgpu_vce_ring_test_ring, 1076 .test_ib = amdgpu_vce_ring_test_ib, 1077 .insert_nop = amdgpu_ring_insert_nop, 1078 .insert_end = vce_v4_0_ring_insert_end, 1079 .pad_ib = amdgpu_ring_generic_pad_ib, 1080 .begin_use = amdgpu_vce_ring_begin_use, 1081 .end_use = amdgpu_vce_ring_end_use, 1082 .emit_wreg = vce_v4_0_emit_wreg, 1083 .emit_reg_wait = vce_v4_0_emit_reg_wait, 1084 }; 1085 1086 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1087 { 1088 int i; 1089 1090 for (i = 0; i < adev->vce.num_rings; i++) 1091 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1092 DRM_INFO("VCE enabled in VM mode\n"); 1093 } 1094 1095 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1096 .set = vce_v4_0_set_interrupt_state, 1097 .process = vce_v4_0_process_interrupt, 1098 }; 1099 1100 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1101 { 1102 adev->vce.irq.num_types = 1; 1103 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1104 }; 1105 1106 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1107 { 1108 .type = AMD_IP_BLOCK_TYPE_VCE, 1109 .major = 4, 1110 .minor = 0, 1111 .rev = 0, 1112 .funcs = &vce_v4_0_ip_funcs, 1113 }; 1114