1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drmP.h> 29 #include "amdgpu.h" 30 #include "amdgpu_vce.h" 31 #include "soc15d.h" 32 #include "soc15_common.h" 33 #include "mmsch_v1_0.h" 34 35 #include "vega10/soc15ip.h" 36 #include "vega10/VCE/vce_4_0_offset.h" 37 #include "vega10/VCE/vce_4_0_default.h" 38 #include "vega10/VCE/vce_4_0_sh_mask.h" 39 #include "vega10/MMHUB/mmhub_1_0_offset.h" 40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" 41 42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 43 44 #define VCE_V4_0_FW_SIZE (384 * 1024) 45 #define VCE_V4_0_STACK_SIZE (64 * 1024) 46 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 47 48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 51 52 /** 53 * vce_v4_0_ring_get_rptr - get read pointer 54 * 55 * @ring: amdgpu_ring pointer 56 * 57 * Returns the current hardware read pointer 58 */ 59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 60 { 61 struct amdgpu_device *adev = ring->adev; 62 63 if (ring == &adev->vce.ring[0]) 64 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 65 else if (ring == &adev->vce.ring[1]) 66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 67 else 68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 69 } 70 71 /** 72 * vce_v4_0_ring_get_wptr - get write pointer 73 * 74 * @ring: amdgpu_ring pointer 75 * 76 * Returns the current hardware write pointer 77 */ 78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 79 { 80 struct amdgpu_device *adev = ring->adev; 81 82 if (ring->use_doorbell) 83 return adev->wb.wb[ring->wptr_offs]; 84 85 if (ring == &adev->vce.ring[0]) 86 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 87 else if (ring == &adev->vce.ring[1]) 88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 89 else 90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 91 } 92 93 /** 94 * vce_v4_0_ring_set_wptr - set write pointer 95 * 96 * @ring: amdgpu_ring pointer 97 * 98 * Commits the write pointer to the hardware 99 */ 100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 101 { 102 struct amdgpu_device *adev = ring->adev; 103 104 if (ring->use_doorbell) { 105 /* XXX check if swapping is necessary on BE */ 106 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 107 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 108 return; 109 } 110 111 if (ring == &adev->vce.ring[0]) 112 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 113 lower_32_bits(ring->wptr)); 114 else if (ring == &adev->vce.ring[1]) 115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 116 lower_32_bits(ring->wptr)); 117 else 118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 119 lower_32_bits(ring->wptr)); 120 } 121 122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 123 { 124 int i, j; 125 126 for (i = 0; i < 10; ++i) { 127 for (j = 0; j < 100; ++j) { 128 uint32_t status = 129 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 130 131 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 132 return 0; 133 mdelay(10); 134 } 135 136 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 137 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 138 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 139 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 140 mdelay(10); 141 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 143 mdelay(10); 144 145 } 146 147 return -ETIMEDOUT; 148 } 149 150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 151 struct amdgpu_mm_table *table) 152 { 153 uint32_t data = 0, loop; 154 uint64_t addr = table->gpu_addr; 155 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 156 uint32_t size; 157 158 size = header->header_size + header->vce_table_size + header->uvd_table_size; 159 160 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 161 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 162 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 163 164 /* 2, update vmid of descriptor */ 165 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 166 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 167 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 168 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 169 170 /* 3, notify mmsch about the size of this descriptor */ 171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 172 173 /* 4, set resp to zero */ 174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 175 176 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 178 179 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 180 loop = 1000; 181 while ((data & 0x10000002) != 0x10000002) { 182 udelay(10); 183 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 184 loop--; 185 if (!loop) 186 break; 187 } 188 189 if (!loop) { 190 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 191 return -EBUSY; 192 } 193 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); 194 195 return 0; 196 } 197 198 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 199 { 200 struct amdgpu_ring *ring; 201 uint32_t offset, size; 202 uint32_t table_size = 0; 203 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 204 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 205 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 206 struct mmsch_v1_0_cmd_end end = { { 0 } }; 207 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 208 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 209 210 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 211 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 212 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 213 end.cmd_header.command_type = MMSCH_COMMAND__END; 214 215 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 216 header->version = MMSCH_VERSION; 217 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 218 219 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 220 header->vce_table_offset = header->header_size; 221 else 222 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 223 224 init_table += header->vce_table_offset; 225 226 ring = &adev->vce.ring[0]; 227 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 228 lower_32_bits(ring->gpu_addr)); 229 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 230 upper_32_bits(ring->gpu_addr)); 231 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 232 ring->ring_size / 4); 233 234 /* BEGING OF MC_RESUME */ 235 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 236 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 237 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 239 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 240 241 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 243 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 245 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 246 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 247 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 248 } else { 249 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 250 adev->vce.gpu_addr >> 8); 251 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 252 adev->vce.gpu_addr >> 8); 253 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 254 adev->vce.gpu_addr >> 8); 255 } 256 257 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 258 size = VCE_V4_0_FW_SIZE; 259 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 260 offset & 0x7FFFFFFF); 261 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 262 263 offset += size; 264 size = VCE_V4_0_STACK_SIZE; 265 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 266 offset & 0x7FFFFFFF); 267 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 268 269 offset += size; 270 size = VCE_V4_0_DATA_SIZE; 271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 272 offset & 0x7FFFFFFF); 273 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 274 275 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 276 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 277 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 278 279 /* end of MC_RESUME */ 280 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 281 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 282 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 283 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 284 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 285 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 286 287 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 288 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 289 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 290 291 /* clear BUSY flag */ 292 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 293 ~VCE_STATUS__JOB_BUSY_MASK, 0); 294 295 /* add end packet */ 296 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 297 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 298 header->vce_table_size = table_size; 299 300 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 301 } 302 303 return -EINVAL; /* already initializaed ? */ 304 } 305 306 /** 307 * vce_v4_0_start - start VCE block 308 * 309 * @adev: amdgpu_device pointer 310 * 311 * Setup and start the VCE block 312 */ 313 static int vce_v4_0_start(struct amdgpu_device *adev) 314 { 315 struct amdgpu_ring *ring; 316 int r; 317 318 ring = &adev->vce.ring[0]; 319 320 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 321 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 322 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 323 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 324 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 325 326 ring = &adev->vce.ring[1]; 327 328 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 329 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 330 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 331 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 332 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 333 334 ring = &adev->vce.ring[2]; 335 336 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 337 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 338 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 339 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 340 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 341 342 vce_v4_0_mc_resume(adev); 343 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 344 ~VCE_STATUS__JOB_BUSY_MASK); 345 346 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 347 348 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 349 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 350 mdelay(100); 351 352 r = vce_v4_0_firmware_loaded(adev); 353 354 /* clear BUSY flag */ 355 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 356 357 if (r) { 358 DRM_ERROR("VCE not responding, giving up!!!\n"); 359 return r; 360 } 361 362 return 0; 363 } 364 365 static int vce_v4_0_stop(struct amdgpu_device *adev) 366 { 367 368 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 369 370 /* hold on ECPU */ 371 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 372 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 373 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 374 375 /* clear BUSY flag */ 376 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 377 378 /* Set Clock-Gating off */ 379 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 380 vce_v4_0_set_vce_sw_clock_gating(adev, false); 381 */ 382 383 return 0; 384 } 385 386 static int vce_v4_0_early_init(void *handle) 387 { 388 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 389 390 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 391 adev->vce.num_rings = 1; 392 else 393 adev->vce.num_rings = 3; 394 395 vce_v4_0_set_ring_funcs(adev); 396 vce_v4_0_set_irq_funcs(adev); 397 398 return 0; 399 } 400 401 static int vce_v4_0_sw_init(void *handle) 402 { 403 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 404 struct amdgpu_ring *ring; 405 unsigned size; 406 int r, i; 407 408 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 409 if (r) 410 return r; 411 412 size = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2; 413 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 414 size += VCE_V4_0_FW_SIZE; 415 416 r = amdgpu_vce_sw_init(adev, size); 417 if (r) 418 return r; 419 420 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 421 const struct common_firmware_header *hdr; 422 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 423 424 adev->vce.saved_bo = kmalloc(size, GFP_KERNEL); 425 if (!adev->vce.saved_bo) 426 return -ENOMEM; 427 428 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 429 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 430 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 431 adev->firmware.fw_size += 432 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 433 DRM_INFO("PSP loading VCE firmware\n"); 434 } else { 435 r = amdgpu_vce_resume(adev); 436 if (r) 437 return r; 438 } 439 440 for (i = 0; i < adev->vce.num_rings; i++) { 441 ring = &adev->vce.ring[i]; 442 sprintf(ring->name, "vce%d", i); 443 if (amdgpu_sriov_vf(adev)) { 444 /* DOORBELL only works under SRIOV */ 445 ring->use_doorbell = true; 446 if (i == 0) 447 ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2; 448 else if (i == 1) 449 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2; 450 else 451 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1; 452 } 453 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 454 if (r) 455 return r; 456 } 457 458 r = amdgpu_virt_alloc_mm_table(adev); 459 if (r) 460 return r; 461 462 return r; 463 } 464 465 static int vce_v4_0_sw_fini(void *handle) 466 { 467 int r; 468 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 469 470 /* free MM table */ 471 amdgpu_virt_free_mm_table(adev); 472 473 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 474 kfree(adev->vce.saved_bo); 475 adev->vce.saved_bo = NULL; 476 } 477 478 r = amdgpu_vce_suspend(adev); 479 if (r) 480 return r; 481 482 return amdgpu_vce_sw_fini(adev); 483 } 484 485 static int vce_v4_0_hw_init(void *handle) 486 { 487 int r, i; 488 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 489 490 if (amdgpu_sriov_vf(adev)) 491 r = vce_v4_0_sriov_start(adev); 492 else 493 r = vce_v4_0_start(adev); 494 if (r) 495 return r; 496 497 for (i = 0; i < adev->vce.num_rings; i++) 498 adev->vce.ring[i].ready = false; 499 500 for (i = 0; i < adev->vce.num_rings; i++) { 501 r = amdgpu_ring_test_ring(&adev->vce.ring[i]); 502 if (r) 503 return r; 504 else 505 adev->vce.ring[i].ready = true; 506 } 507 508 DRM_INFO("VCE initialized successfully.\n"); 509 510 return 0; 511 } 512 513 static int vce_v4_0_hw_fini(void *handle) 514 { 515 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 516 int i; 517 518 if (!amdgpu_sriov_vf(adev)) { 519 /* vce_v4_0_wait_for_idle(handle); */ 520 vce_v4_0_stop(adev); 521 } else { 522 /* full access mode, so don't touch any VCE register */ 523 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 524 } 525 526 for (i = 0; i < adev->vce.num_rings; i++) 527 adev->vce.ring[i].ready = false; 528 529 return 0; 530 } 531 532 static int vce_v4_0_suspend(void *handle) 533 { 534 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 535 int r; 536 537 if (adev->vce.vcpu_bo == NULL) 538 return 0; 539 540 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 541 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 542 void *ptr = adev->vce.cpu_addr; 543 544 memcpy_fromio(adev->vce.saved_bo, ptr, size); 545 } 546 547 r = vce_v4_0_hw_fini(adev); 548 if (r) 549 return r; 550 551 return amdgpu_vce_suspend(adev); 552 } 553 554 static int vce_v4_0_resume(void *handle) 555 { 556 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 557 int r; 558 559 if (adev->vce.vcpu_bo == NULL) 560 return -EINVAL; 561 562 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 563 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 564 void *ptr = adev->vce.cpu_addr; 565 566 memcpy_toio(ptr, adev->vce.saved_bo, size); 567 } else { 568 r = amdgpu_vce_resume(adev); 569 if (r) 570 return r; 571 } 572 573 return vce_v4_0_hw_init(adev); 574 } 575 576 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 577 { 578 uint32_t offset, size; 579 580 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 581 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 582 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 583 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 584 585 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 586 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 587 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 588 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 589 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 590 591 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 592 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 593 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8)); 594 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 595 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); 596 } else { 597 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 598 (adev->vce.gpu_addr >> 8)); 599 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 600 (adev->vce.gpu_addr >> 40) & 0xff); 601 } 602 603 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 604 size = VCE_V4_0_FW_SIZE; 605 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 606 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 607 608 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 609 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 610 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 611 size = VCE_V4_0_STACK_SIZE; 612 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 613 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 614 615 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 616 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 617 offset += size; 618 size = VCE_V4_0_DATA_SIZE; 619 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 620 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 621 622 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 623 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 624 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 625 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 626 } 627 628 static int vce_v4_0_set_clockgating_state(void *handle, 629 enum amd_clockgating_state state) 630 { 631 /* needed for driver unload*/ 632 return 0; 633 } 634 635 #if 0 636 static bool vce_v4_0_is_idle(void *handle) 637 { 638 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 639 u32 mask = 0; 640 641 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 642 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 643 644 return !(RREG32(mmSRBM_STATUS2) & mask); 645 } 646 647 static int vce_v4_0_wait_for_idle(void *handle) 648 { 649 unsigned i; 650 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 651 652 for (i = 0; i < adev->usec_timeout; i++) 653 if (vce_v4_0_is_idle(handle)) 654 return 0; 655 656 return -ETIMEDOUT; 657 } 658 659 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 660 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 661 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 662 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 663 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 664 665 static bool vce_v4_0_check_soft_reset(void *handle) 666 { 667 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 668 u32 srbm_soft_reset = 0; 669 670 /* According to VCE team , we should use VCE_STATUS instead 671 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 672 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 673 * instance's registers are accessed 674 * (0 for 1st instance, 10 for 2nd instance). 675 * 676 *VCE_STATUS 677 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 678 *|----+----+-----------+----+----+----+----------+---------+----| 679 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 680 * 681 * VCE team suggest use bit 3--bit 6 for busy status check 682 */ 683 mutex_lock(&adev->grbm_idx_mutex); 684 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 685 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 686 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 687 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 688 } 689 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 690 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 691 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 692 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 693 } 694 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 695 mutex_unlock(&adev->grbm_idx_mutex); 696 697 if (srbm_soft_reset) { 698 adev->vce.srbm_soft_reset = srbm_soft_reset; 699 return true; 700 } else { 701 adev->vce.srbm_soft_reset = 0; 702 return false; 703 } 704 } 705 706 static int vce_v4_0_soft_reset(void *handle) 707 { 708 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 709 u32 srbm_soft_reset; 710 711 if (!adev->vce.srbm_soft_reset) 712 return 0; 713 srbm_soft_reset = adev->vce.srbm_soft_reset; 714 715 if (srbm_soft_reset) { 716 u32 tmp; 717 718 tmp = RREG32(mmSRBM_SOFT_RESET); 719 tmp |= srbm_soft_reset; 720 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 721 WREG32(mmSRBM_SOFT_RESET, tmp); 722 tmp = RREG32(mmSRBM_SOFT_RESET); 723 724 udelay(50); 725 726 tmp &= ~srbm_soft_reset; 727 WREG32(mmSRBM_SOFT_RESET, tmp); 728 tmp = RREG32(mmSRBM_SOFT_RESET); 729 730 /* Wait a little for things to settle down */ 731 udelay(50); 732 } 733 734 return 0; 735 } 736 737 static int vce_v4_0_pre_soft_reset(void *handle) 738 { 739 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 740 741 if (!adev->vce.srbm_soft_reset) 742 return 0; 743 744 mdelay(5); 745 746 return vce_v4_0_suspend(adev); 747 } 748 749 750 static int vce_v4_0_post_soft_reset(void *handle) 751 { 752 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 753 754 if (!adev->vce.srbm_soft_reset) 755 return 0; 756 757 mdelay(5); 758 759 return vce_v4_0_resume(adev); 760 } 761 762 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 763 { 764 u32 tmp, data; 765 766 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 767 if (override) 768 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 769 else 770 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 771 772 if (tmp != data) 773 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 774 } 775 776 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 777 bool gated) 778 { 779 u32 data; 780 781 /* Set Override to disable Clock Gating */ 782 vce_v4_0_override_vce_clock_gating(adev, true); 783 784 /* This function enables MGCG which is controlled by firmware. 785 With the clocks in the gated state the core is still 786 accessible but the firmware will throttle the clocks on the 787 fly as necessary. 788 */ 789 if (gated) { 790 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 791 data |= 0x1ff; 792 data &= ~0xef0000; 793 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 794 795 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 796 data |= 0x3ff000; 797 data &= ~0xffc00000; 798 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 799 800 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 801 data |= 0x2; 802 data &= ~0x00010000; 803 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 804 805 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 806 data |= 0x37f; 807 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 808 809 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 810 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 811 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 812 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 813 0x8; 814 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 815 } else { 816 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 817 data &= ~0x80010; 818 data |= 0xe70008; 819 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 820 821 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 822 data |= 0xffc00000; 823 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 824 825 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 826 data |= 0x10000; 827 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 828 829 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 830 data &= ~0xffc00000; 831 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 832 833 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 834 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 835 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 836 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 837 0x8); 838 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 839 } 840 vce_v4_0_override_vce_clock_gating(adev, false); 841 } 842 843 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 844 { 845 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 846 847 if (enable) 848 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 849 else 850 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 851 852 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 853 } 854 855 static int vce_v4_0_set_clockgating_state(void *handle, 856 enum amd_clockgating_state state) 857 { 858 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 859 bool enable = (state == AMD_CG_STATE_GATE) ? true : false; 860 int i; 861 862 if ((adev->asic_type == CHIP_POLARIS10) || 863 (adev->asic_type == CHIP_TONGA) || 864 (adev->asic_type == CHIP_FIJI)) 865 vce_v4_0_set_bypass_mode(adev, enable); 866 867 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 868 return 0; 869 870 mutex_lock(&adev->grbm_idx_mutex); 871 for (i = 0; i < 2; i++) { 872 /* Program VCE Instance 0 or 1 if not harvested */ 873 if (adev->vce.harvest_config & (1 << i)) 874 continue; 875 876 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 877 878 if (enable) { 879 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 880 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 881 data &= ~(0xf | 0xff0); 882 data |= ((0x0 << 0) | (0x04 << 4)); 883 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 884 885 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 886 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 887 data &= ~(0xf | 0xff0); 888 data |= ((0x0 << 0) | (0x04 << 4)); 889 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 890 } 891 892 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 893 } 894 895 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 896 mutex_unlock(&adev->grbm_idx_mutex); 897 898 return 0; 899 } 900 901 static int vce_v4_0_set_powergating_state(void *handle, 902 enum amd_powergating_state state) 903 { 904 /* This doesn't actually powergate the VCE block. 905 * That's done in the dpm code via the SMC. This 906 * just re-inits the block as necessary. The actual 907 * gating still happens in the dpm code. We should 908 * revisit this when there is a cleaner line between 909 * the smc and the hw blocks 910 */ 911 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 912 913 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) 914 return 0; 915 916 if (state == AMD_PG_STATE_GATE) 917 /* XXX do we need a vce_v4_0_stop()? */ 918 return 0; 919 else 920 return vce_v4_0_start(adev); 921 } 922 #endif 923 924 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 925 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) 926 { 927 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 928 amdgpu_ring_write(ring, vm_id); 929 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 930 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 931 amdgpu_ring_write(ring, ib->length_dw); 932 } 933 934 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 935 u64 seq, unsigned flags) 936 { 937 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 938 939 amdgpu_ring_write(ring, VCE_CMD_FENCE); 940 amdgpu_ring_write(ring, addr); 941 amdgpu_ring_write(ring, upper_32_bits(addr)); 942 amdgpu_ring_write(ring, seq); 943 amdgpu_ring_write(ring, VCE_CMD_TRAP); 944 } 945 946 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 947 { 948 amdgpu_ring_write(ring, VCE_CMD_END); 949 } 950 951 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 952 unsigned int vm_id, uint64_t pd_addr) 953 { 954 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 955 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 956 unsigned eng = ring->vm_inv_eng; 957 958 pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); 959 pd_addr |= AMDGPU_PTE_VALID; 960 961 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 962 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); 963 amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 964 965 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 966 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 967 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 968 969 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 970 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 971 amdgpu_ring_write(ring, 0xffffffff); 972 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 973 974 /* flush TLB */ 975 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 976 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 977 amdgpu_ring_write(ring, req); 978 979 /* wait for flush */ 980 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 981 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 982 amdgpu_ring_write(ring, 1 << vm_id); 983 amdgpu_ring_write(ring, 1 << vm_id); 984 } 985 986 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 987 struct amdgpu_irq_src *source, 988 unsigned type, 989 enum amdgpu_interrupt_state state) 990 { 991 uint32_t val = 0; 992 993 if (state == AMDGPU_IRQ_STATE_ENABLE) 994 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 995 996 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 997 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 998 return 0; 999 } 1000 1001 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 1002 struct amdgpu_irq_src *source, 1003 struct amdgpu_iv_entry *entry) 1004 { 1005 DRM_DEBUG("IH: VCE\n"); 1006 1007 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS), 1008 VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK, 1009 ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK); 1010 1011 switch (entry->src_data[0]) { 1012 case 0: 1013 case 1: 1014 case 2: 1015 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 1016 break; 1017 default: 1018 DRM_ERROR("Unhandled interrupt: %d %d\n", 1019 entry->src_id, entry->src_data[0]); 1020 break; 1021 } 1022 1023 return 0; 1024 } 1025 1026 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 1027 .name = "vce_v4_0", 1028 .early_init = vce_v4_0_early_init, 1029 .late_init = NULL, 1030 .sw_init = vce_v4_0_sw_init, 1031 .sw_fini = vce_v4_0_sw_fini, 1032 .hw_init = vce_v4_0_hw_init, 1033 .hw_fini = vce_v4_0_hw_fini, 1034 .suspend = vce_v4_0_suspend, 1035 .resume = vce_v4_0_resume, 1036 .is_idle = NULL /* vce_v4_0_is_idle */, 1037 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1038 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1039 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1040 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1041 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1042 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1043 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, 1044 }; 1045 1046 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1047 .type = AMDGPU_RING_TYPE_VCE, 1048 .align_mask = 0x3f, 1049 .nop = VCE_CMD_NO_OP, 1050 .support_64bit_ptrs = false, 1051 .vmhub = AMDGPU_MMHUB, 1052 .get_rptr = vce_v4_0_ring_get_rptr, 1053 .get_wptr = vce_v4_0_ring_get_wptr, 1054 .set_wptr = vce_v4_0_ring_set_wptr, 1055 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1056 .emit_frame_size = 1057 17 + /* vce_v4_0_emit_vm_flush */ 1058 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1059 1, /* vce_v4_0_ring_insert_end */ 1060 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1061 .emit_ib = vce_v4_0_ring_emit_ib, 1062 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1063 .emit_fence = vce_v4_0_ring_emit_fence, 1064 .test_ring = amdgpu_vce_ring_test_ring, 1065 .test_ib = amdgpu_vce_ring_test_ib, 1066 .insert_nop = amdgpu_ring_insert_nop, 1067 .insert_end = vce_v4_0_ring_insert_end, 1068 .pad_ib = amdgpu_ring_generic_pad_ib, 1069 .begin_use = amdgpu_vce_ring_begin_use, 1070 .end_use = amdgpu_vce_ring_end_use, 1071 }; 1072 1073 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1074 { 1075 int i; 1076 1077 for (i = 0; i < adev->vce.num_rings; i++) 1078 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1079 DRM_INFO("VCE enabled in VM mode\n"); 1080 } 1081 1082 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1083 .set = vce_v4_0_set_interrupt_state, 1084 .process = vce_v4_0_process_interrupt, 1085 }; 1086 1087 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1088 { 1089 adev->vce.irq.num_types = 1; 1090 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1091 }; 1092 1093 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1094 { 1095 .type = AMD_IP_BLOCK_TYPE_VCE, 1096 .major = 4, 1097 .minor = 0, 1098 .rev = 0, 1099 .funcs = &vce_v4_0_ip_funcs, 1100 }; 1101