1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drmP.h> 29 #include "amdgpu.h" 30 #include "amdgpu_vce.h" 31 #include "soc15d.h" 32 #include "soc15_common.h" 33 #include "mmsch_v1_0.h" 34 35 #include "vega10/soc15ip.h" 36 #include "vega10/VCE/vce_4_0_offset.h" 37 #include "vega10/VCE/vce_4_0_default.h" 38 #include "vega10/VCE/vce_4_0_sh_mask.h" 39 #include "vega10/MMHUB/mmhub_1_0_offset.h" 40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" 41 42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 43 44 #define VCE_V4_0_FW_SIZE (384 * 1024) 45 #define VCE_V4_0_STACK_SIZE (64 * 1024) 46 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 47 48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 51 52 /** 53 * vce_v4_0_ring_get_rptr - get read pointer 54 * 55 * @ring: amdgpu_ring pointer 56 * 57 * Returns the current hardware read pointer 58 */ 59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 60 { 61 struct amdgpu_device *adev = ring->adev; 62 63 if (ring == &adev->vce.ring[0]) 64 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 65 else if (ring == &adev->vce.ring[1]) 66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 67 else 68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 69 } 70 71 /** 72 * vce_v4_0_ring_get_wptr - get write pointer 73 * 74 * @ring: amdgpu_ring pointer 75 * 76 * Returns the current hardware write pointer 77 */ 78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 79 { 80 struct amdgpu_device *adev = ring->adev; 81 82 if (ring->use_doorbell) 83 return adev->wb.wb[ring->wptr_offs]; 84 85 if (ring == &adev->vce.ring[0]) 86 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 87 else if (ring == &adev->vce.ring[1]) 88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 89 else 90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 91 } 92 93 /** 94 * vce_v4_0_ring_set_wptr - set write pointer 95 * 96 * @ring: amdgpu_ring pointer 97 * 98 * Commits the write pointer to the hardware 99 */ 100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 101 { 102 struct amdgpu_device *adev = ring->adev; 103 104 if (ring->use_doorbell) { 105 /* XXX check if swapping is necessary on BE */ 106 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 107 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 108 return; 109 } 110 111 if (ring == &adev->vce.ring[0]) 112 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 113 lower_32_bits(ring->wptr)); 114 else if (ring == &adev->vce.ring[1]) 115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 116 lower_32_bits(ring->wptr)); 117 else 118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 119 lower_32_bits(ring->wptr)); 120 } 121 122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 123 { 124 int i, j; 125 126 for (i = 0; i < 10; ++i) { 127 for (j = 0; j < 100; ++j) { 128 uint32_t status = 129 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 130 131 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 132 return 0; 133 mdelay(10); 134 } 135 136 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 137 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 138 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 139 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 140 mdelay(10); 141 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 143 mdelay(10); 144 145 } 146 147 return -ETIMEDOUT; 148 } 149 150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 151 struct amdgpu_mm_table *table) 152 { 153 uint32_t data = 0, loop; 154 uint64_t addr = table->gpu_addr; 155 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 156 uint32_t size; 157 158 size = header->header_size + header->vce_table_size + header->uvd_table_size; 159 160 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 161 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 162 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 163 164 /* 2, update vmid of descriptor */ 165 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 166 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 167 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 168 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 169 170 /* 3, notify mmsch about the size of this descriptor */ 171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 172 173 /* 4, set resp to zero */ 174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 175 176 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 178 179 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 180 loop = 1000; 181 while ((data & 0x10000002) != 0x10000002) { 182 udelay(10); 183 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 184 loop--; 185 if (!loop) 186 break; 187 } 188 189 if (!loop) { 190 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 191 return -EBUSY; 192 } 193 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); 194 195 return 0; 196 } 197 198 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 199 { 200 struct amdgpu_ring *ring; 201 uint32_t offset, size; 202 uint32_t table_size = 0; 203 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 204 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 205 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 206 struct mmsch_v1_0_cmd_end end = { { 0 } }; 207 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 208 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 209 210 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 211 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 212 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 213 end.cmd_header.command_type = MMSCH_COMMAND__END; 214 215 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 216 header->version = MMSCH_VERSION; 217 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 218 219 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 220 header->vce_table_offset = header->header_size; 221 else 222 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 223 224 init_table += header->vce_table_offset; 225 226 ring = &adev->vce.ring[0]; 227 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 228 lower_32_bits(ring->gpu_addr)); 229 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 230 upper_32_bits(ring->gpu_addr)); 231 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 232 ring->ring_size / 4); 233 234 /* BEGING OF MC_RESUME */ 235 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 236 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 237 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 239 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 240 241 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 243 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 245 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 246 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 247 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 248 } else { 249 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 250 adev->vce.gpu_addr >> 8); 251 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 252 adev->vce.gpu_addr >> 8); 253 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 254 adev->vce.gpu_addr >> 8); 255 } 256 257 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 258 size = VCE_V4_0_FW_SIZE; 259 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 260 offset & 0x7FFFFFFF); 261 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 262 263 offset += size; 264 size = VCE_V4_0_STACK_SIZE; 265 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 266 offset & 0x7FFFFFFF); 267 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 268 269 offset += size; 270 size = VCE_V4_0_DATA_SIZE; 271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 272 offset & 0x7FFFFFFF); 273 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 274 275 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 276 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 277 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 278 279 /* end of MC_RESUME */ 280 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 281 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 282 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 283 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 284 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 285 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 286 287 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 288 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 289 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 290 291 /* clear BUSY flag */ 292 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 293 ~VCE_STATUS__JOB_BUSY_MASK, 0); 294 295 /* add end packet */ 296 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 297 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 298 header->vce_table_size = table_size; 299 300 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 301 } 302 303 return -EINVAL; /* already initializaed ? */ 304 } 305 306 /** 307 * vce_v4_0_start - start VCE block 308 * 309 * @adev: amdgpu_device pointer 310 * 311 * Setup and start the VCE block 312 */ 313 static int vce_v4_0_start(struct amdgpu_device *adev) 314 { 315 struct amdgpu_ring *ring; 316 int r; 317 318 ring = &adev->vce.ring[0]; 319 320 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 321 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 322 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 323 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 324 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 325 326 ring = &adev->vce.ring[1]; 327 328 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 329 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 330 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 331 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 332 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 333 334 ring = &adev->vce.ring[2]; 335 336 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 337 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 338 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 339 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 340 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 341 342 vce_v4_0_mc_resume(adev); 343 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 344 ~VCE_STATUS__JOB_BUSY_MASK); 345 346 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 347 348 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 349 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 350 mdelay(100); 351 352 r = vce_v4_0_firmware_loaded(adev); 353 354 /* clear BUSY flag */ 355 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 356 357 if (r) { 358 DRM_ERROR("VCE not responding, giving up!!!\n"); 359 return r; 360 } 361 362 return 0; 363 } 364 365 static int vce_v4_0_stop(struct amdgpu_device *adev) 366 { 367 368 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 369 370 /* hold on ECPU */ 371 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 372 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 373 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 374 375 /* clear BUSY flag */ 376 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 377 378 /* Set Clock-Gating off */ 379 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 380 vce_v4_0_set_vce_sw_clock_gating(adev, false); 381 */ 382 383 return 0; 384 } 385 386 static int vce_v4_0_early_init(void *handle) 387 { 388 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 389 390 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 391 adev->vce.num_rings = 1; 392 else 393 adev->vce.num_rings = 3; 394 395 vce_v4_0_set_ring_funcs(adev); 396 vce_v4_0_set_irq_funcs(adev); 397 398 return 0; 399 } 400 401 static int vce_v4_0_sw_init(void *handle) 402 { 403 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 404 struct amdgpu_ring *ring; 405 unsigned size; 406 int r, i; 407 408 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 409 if (r) 410 return r; 411 412 size = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2; 413 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 414 size += VCE_V4_0_FW_SIZE; 415 416 r = amdgpu_vce_sw_init(adev, size); 417 if (r) 418 return r; 419 420 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 421 const struct common_firmware_header *hdr; 422 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 423 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 424 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 425 adev->firmware.fw_size += 426 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 427 DRM_INFO("PSP loading VCE firmware\n"); 428 } 429 430 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 431 r = amdgpu_vce_resume(adev); 432 if (r) 433 return r; 434 } 435 436 for (i = 0; i < adev->vce.num_rings; i++) { 437 ring = &adev->vce.ring[i]; 438 sprintf(ring->name, "vce%d", i); 439 if (amdgpu_sriov_vf(adev)) { 440 /* DOORBELL only works under SRIOV */ 441 ring->use_doorbell = true; 442 if (i == 0) 443 ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2; 444 else if (i == 1) 445 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2; 446 else 447 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1; 448 } 449 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 450 if (r) 451 return r; 452 } 453 454 r = amdgpu_virt_alloc_mm_table(adev); 455 if (r) 456 return r; 457 458 return r; 459 } 460 461 static int vce_v4_0_sw_fini(void *handle) 462 { 463 int r; 464 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 465 466 /* free MM table */ 467 amdgpu_virt_free_mm_table(adev); 468 469 r = amdgpu_vce_suspend(adev); 470 if (r) 471 return r; 472 473 return amdgpu_vce_sw_fini(adev); 474 } 475 476 static int vce_v4_0_hw_init(void *handle) 477 { 478 int r, i; 479 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 480 481 if (amdgpu_sriov_vf(adev)) 482 r = vce_v4_0_sriov_start(adev); 483 else 484 r = vce_v4_0_start(adev); 485 if (r) 486 return r; 487 488 for (i = 0; i < adev->vce.num_rings; i++) 489 adev->vce.ring[i].ready = false; 490 491 for (i = 0; i < adev->vce.num_rings; i++) { 492 r = amdgpu_ring_test_ring(&adev->vce.ring[i]); 493 if (r) 494 return r; 495 else 496 adev->vce.ring[i].ready = true; 497 } 498 499 DRM_INFO("VCE initialized successfully.\n"); 500 501 return 0; 502 } 503 504 static int vce_v4_0_hw_fini(void *handle) 505 { 506 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 507 int i; 508 509 if (!amdgpu_sriov_vf(adev)) { 510 /* vce_v4_0_wait_for_idle(handle); */ 511 vce_v4_0_stop(adev); 512 } else { 513 /* full access mode, so don't touch any VCE register */ 514 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 515 } 516 517 for (i = 0; i < adev->vce.num_rings; i++) 518 adev->vce.ring[i].ready = false; 519 520 return 0; 521 } 522 523 static int vce_v4_0_suspend(void *handle) 524 { 525 int r; 526 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 527 528 r = vce_v4_0_hw_fini(adev); 529 if (r) 530 return r; 531 532 return amdgpu_vce_suspend(adev); 533 } 534 535 static int vce_v4_0_resume(void *handle) 536 { 537 int r; 538 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 539 540 r = amdgpu_vce_resume(adev); 541 if (r) 542 return r; 543 544 return vce_v4_0_hw_init(adev); 545 } 546 547 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 548 { 549 uint32_t offset, size; 550 551 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 552 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 553 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 554 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 555 556 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 557 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 558 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 559 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 560 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 561 562 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 563 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 564 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8)); 565 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 566 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); 567 } else { 568 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 569 (adev->vce.gpu_addr >> 8)); 570 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 571 (adev->vce.gpu_addr >> 40) & 0xff); 572 } 573 574 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 575 size = VCE_V4_0_FW_SIZE; 576 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 577 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 578 579 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 580 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 581 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 582 size = VCE_V4_0_STACK_SIZE; 583 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 584 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 585 586 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 587 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 588 offset += size; 589 size = VCE_V4_0_DATA_SIZE; 590 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 591 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 592 593 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 594 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 595 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 596 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 597 } 598 599 static int vce_v4_0_set_clockgating_state(void *handle, 600 enum amd_clockgating_state state) 601 { 602 /* needed for driver unload*/ 603 return 0; 604 } 605 606 #if 0 607 static bool vce_v4_0_is_idle(void *handle) 608 { 609 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 610 u32 mask = 0; 611 612 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 613 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 614 615 return !(RREG32(mmSRBM_STATUS2) & mask); 616 } 617 618 static int vce_v4_0_wait_for_idle(void *handle) 619 { 620 unsigned i; 621 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 622 623 for (i = 0; i < adev->usec_timeout; i++) 624 if (vce_v4_0_is_idle(handle)) 625 return 0; 626 627 return -ETIMEDOUT; 628 } 629 630 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 631 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 632 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 633 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 634 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 635 636 static bool vce_v4_0_check_soft_reset(void *handle) 637 { 638 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 639 u32 srbm_soft_reset = 0; 640 641 /* According to VCE team , we should use VCE_STATUS instead 642 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 643 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 644 * instance's registers are accessed 645 * (0 for 1st instance, 10 for 2nd instance). 646 * 647 *VCE_STATUS 648 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 649 *|----+----+-----------+----+----+----+----------+---------+----| 650 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 651 * 652 * VCE team suggest use bit 3--bit 6 for busy status check 653 */ 654 mutex_lock(&adev->grbm_idx_mutex); 655 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 656 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 657 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 658 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 659 } 660 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 661 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 662 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 663 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 664 } 665 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 666 mutex_unlock(&adev->grbm_idx_mutex); 667 668 if (srbm_soft_reset) { 669 adev->vce.srbm_soft_reset = srbm_soft_reset; 670 return true; 671 } else { 672 adev->vce.srbm_soft_reset = 0; 673 return false; 674 } 675 } 676 677 static int vce_v4_0_soft_reset(void *handle) 678 { 679 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 680 u32 srbm_soft_reset; 681 682 if (!adev->vce.srbm_soft_reset) 683 return 0; 684 srbm_soft_reset = adev->vce.srbm_soft_reset; 685 686 if (srbm_soft_reset) { 687 u32 tmp; 688 689 tmp = RREG32(mmSRBM_SOFT_RESET); 690 tmp |= srbm_soft_reset; 691 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 692 WREG32(mmSRBM_SOFT_RESET, tmp); 693 tmp = RREG32(mmSRBM_SOFT_RESET); 694 695 udelay(50); 696 697 tmp &= ~srbm_soft_reset; 698 WREG32(mmSRBM_SOFT_RESET, tmp); 699 tmp = RREG32(mmSRBM_SOFT_RESET); 700 701 /* Wait a little for things to settle down */ 702 udelay(50); 703 } 704 705 return 0; 706 } 707 708 static int vce_v4_0_pre_soft_reset(void *handle) 709 { 710 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 711 712 if (!adev->vce.srbm_soft_reset) 713 return 0; 714 715 mdelay(5); 716 717 return vce_v4_0_suspend(adev); 718 } 719 720 721 static int vce_v4_0_post_soft_reset(void *handle) 722 { 723 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 724 725 if (!adev->vce.srbm_soft_reset) 726 return 0; 727 728 mdelay(5); 729 730 return vce_v4_0_resume(adev); 731 } 732 733 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 734 { 735 u32 tmp, data; 736 737 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 738 if (override) 739 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 740 else 741 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 742 743 if (tmp != data) 744 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 745 } 746 747 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 748 bool gated) 749 { 750 u32 data; 751 752 /* Set Override to disable Clock Gating */ 753 vce_v4_0_override_vce_clock_gating(adev, true); 754 755 /* This function enables MGCG which is controlled by firmware. 756 With the clocks in the gated state the core is still 757 accessible but the firmware will throttle the clocks on the 758 fly as necessary. 759 */ 760 if (gated) { 761 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 762 data |= 0x1ff; 763 data &= ~0xef0000; 764 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 765 766 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 767 data |= 0x3ff000; 768 data &= ~0xffc00000; 769 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 770 771 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 772 data |= 0x2; 773 data &= ~0x00010000; 774 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 775 776 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 777 data |= 0x37f; 778 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 779 780 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 781 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 782 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 783 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 784 0x8; 785 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 786 } else { 787 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 788 data &= ~0x80010; 789 data |= 0xe70008; 790 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 791 792 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 793 data |= 0xffc00000; 794 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 795 796 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 797 data |= 0x10000; 798 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 799 800 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 801 data &= ~0xffc00000; 802 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 803 804 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 805 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 806 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 807 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 808 0x8); 809 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 810 } 811 vce_v4_0_override_vce_clock_gating(adev, false); 812 } 813 814 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 815 { 816 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 817 818 if (enable) 819 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 820 else 821 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 822 823 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 824 } 825 826 static int vce_v4_0_set_clockgating_state(void *handle, 827 enum amd_clockgating_state state) 828 { 829 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 830 bool enable = (state == AMD_CG_STATE_GATE) ? true : false; 831 int i; 832 833 if ((adev->asic_type == CHIP_POLARIS10) || 834 (adev->asic_type == CHIP_TONGA) || 835 (adev->asic_type == CHIP_FIJI)) 836 vce_v4_0_set_bypass_mode(adev, enable); 837 838 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 839 return 0; 840 841 mutex_lock(&adev->grbm_idx_mutex); 842 for (i = 0; i < 2; i++) { 843 /* Program VCE Instance 0 or 1 if not harvested */ 844 if (adev->vce.harvest_config & (1 << i)) 845 continue; 846 847 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 848 849 if (enable) { 850 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 851 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 852 data &= ~(0xf | 0xff0); 853 data |= ((0x0 << 0) | (0x04 << 4)); 854 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 855 856 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 857 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 858 data &= ~(0xf | 0xff0); 859 data |= ((0x0 << 0) | (0x04 << 4)); 860 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 861 } 862 863 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 864 } 865 866 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 867 mutex_unlock(&adev->grbm_idx_mutex); 868 869 return 0; 870 } 871 872 static int vce_v4_0_set_powergating_state(void *handle, 873 enum amd_powergating_state state) 874 { 875 /* This doesn't actually powergate the VCE block. 876 * That's done in the dpm code via the SMC. This 877 * just re-inits the block as necessary. The actual 878 * gating still happens in the dpm code. We should 879 * revisit this when there is a cleaner line between 880 * the smc and the hw blocks 881 */ 882 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 883 884 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) 885 return 0; 886 887 if (state == AMD_PG_STATE_GATE) 888 /* XXX do we need a vce_v4_0_stop()? */ 889 return 0; 890 else 891 return vce_v4_0_start(adev); 892 } 893 #endif 894 895 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 896 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) 897 { 898 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 899 amdgpu_ring_write(ring, vm_id); 900 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 901 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 902 amdgpu_ring_write(ring, ib->length_dw); 903 } 904 905 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 906 u64 seq, unsigned flags) 907 { 908 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 909 910 amdgpu_ring_write(ring, VCE_CMD_FENCE); 911 amdgpu_ring_write(ring, addr); 912 amdgpu_ring_write(ring, upper_32_bits(addr)); 913 amdgpu_ring_write(ring, seq); 914 amdgpu_ring_write(ring, VCE_CMD_TRAP); 915 } 916 917 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 918 { 919 amdgpu_ring_write(ring, VCE_CMD_END); 920 } 921 922 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 923 unsigned int vm_id, uint64_t pd_addr) 924 { 925 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 926 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 927 unsigned eng = ring->vm_inv_eng; 928 929 pd_addr = pd_addr | 0x1; /* valid bit */ 930 /* now only use physical base address of PDE and valid */ 931 BUG_ON(pd_addr & 0xFFFF00000000003EULL); 932 933 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 934 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); 935 amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 936 937 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 938 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 939 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 940 941 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 942 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 943 amdgpu_ring_write(ring, 0xffffffff); 944 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 945 946 /* flush TLB */ 947 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 948 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 949 amdgpu_ring_write(ring, req); 950 951 /* wait for flush */ 952 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 953 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 954 amdgpu_ring_write(ring, 1 << vm_id); 955 amdgpu_ring_write(ring, 1 << vm_id); 956 } 957 958 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 959 struct amdgpu_irq_src *source, 960 unsigned type, 961 enum amdgpu_interrupt_state state) 962 { 963 uint32_t val = 0; 964 965 if (state == AMDGPU_IRQ_STATE_ENABLE) 966 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 967 968 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 969 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 970 return 0; 971 } 972 973 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 974 struct amdgpu_irq_src *source, 975 struct amdgpu_iv_entry *entry) 976 { 977 DRM_DEBUG("IH: VCE\n"); 978 979 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS), 980 VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK, 981 ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK); 982 983 switch (entry->src_data[0]) { 984 case 0: 985 case 1: 986 case 2: 987 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 988 break; 989 default: 990 DRM_ERROR("Unhandled interrupt: %d %d\n", 991 entry->src_id, entry->src_data[0]); 992 break; 993 } 994 995 return 0; 996 } 997 998 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 999 .name = "vce_v4_0", 1000 .early_init = vce_v4_0_early_init, 1001 .late_init = NULL, 1002 .sw_init = vce_v4_0_sw_init, 1003 .sw_fini = vce_v4_0_sw_fini, 1004 .hw_init = vce_v4_0_hw_init, 1005 .hw_fini = vce_v4_0_hw_fini, 1006 .suspend = vce_v4_0_suspend, 1007 .resume = vce_v4_0_resume, 1008 .is_idle = NULL /* vce_v4_0_is_idle */, 1009 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1010 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1011 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1012 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1013 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1014 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1015 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, 1016 }; 1017 1018 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1019 .type = AMDGPU_RING_TYPE_VCE, 1020 .align_mask = 0x3f, 1021 .nop = VCE_CMD_NO_OP, 1022 .support_64bit_ptrs = false, 1023 .vmhub = AMDGPU_MMHUB, 1024 .get_rptr = vce_v4_0_ring_get_rptr, 1025 .get_wptr = vce_v4_0_ring_get_wptr, 1026 .set_wptr = vce_v4_0_ring_set_wptr, 1027 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1028 .emit_frame_size = 1029 17 + /* vce_v4_0_emit_vm_flush */ 1030 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1031 1, /* vce_v4_0_ring_insert_end */ 1032 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1033 .emit_ib = vce_v4_0_ring_emit_ib, 1034 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1035 .emit_fence = vce_v4_0_ring_emit_fence, 1036 .test_ring = amdgpu_vce_ring_test_ring, 1037 .test_ib = amdgpu_vce_ring_test_ib, 1038 .insert_nop = amdgpu_ring_insert_nop, 1039 .insert_end = vce_v4_0_ring_insert_end, 1040 .pad_ib = amdgpu_ring_generic_pad_ib, 1041 .begin_use = amdgpu_vce_ring_begin_use, 1042 .end_use = amdgpu_vce_ring_end_use, 1043 }; 1044 1045 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1046 { 1047 int i; 1048 1049 for (i = 0; i < adev->vce.num_rings; i++) 1050 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1051 DRM_INFO("VCE enabled in VM mode\n"); 1052 } 1053 1054 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1055 .set = vce_v4_0_set_interrupt_state, 1056 .process = vce_v4_0_process_interrupt, 1057 }; 1058 1059 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1060 { 1061 adev->vce.irq.num_types = 1; 1062 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1063 }; 1064 1065 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1066 { 1067 .type = AMD_IP_BLOCK_TYPE_VCE, 1068 .major = 4, 1069 .minor = 0, 1070 .rev = 0, 1071 .funcs = &vce_v4_0_ip_funcs, 1072 }; 1073