1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drmP.h> 29 #include "amdgpu.h" 30 #include "amdgpu_vce.h" 31 #include "soc15d.h" 32 #include "soc15_common.h" 33 #include "mmsch_v1_0.h" 34 35 #include "vega10/soc15ip.h" 36 #include "vega10/VCE/vce_4_0_offset.h" 37 #include "vega10/VCE/vce_4_0_default.h" 38 #include "vega10/VCE/vce_4_0_sh_mask.h" 39 #include "vega10/MMHUB/mmhub_1_0_offset.h" 40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" 41 42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 43 44 #define VCE_V4_0_FW_SIZE (384 * 1024) 45 #define VCE_V4_0_STACK_SIZE (64 * 1024) 46 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 47 48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 51 52 static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt, 53 uint32_t *init_table, 54 uint32_t reg_offset, 55 uint32_t value) 56 { 57 direct_wt->cmd_header.reg_offset = reg_offset; 58 direct_wt->reg_value = value; 59 memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write)); 60 } 61 62 static inline void mmsch_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt, 63 uint32_t *init_table, 64 uint32_t reg_offset, 65 uint32_t mask, uint32_t data) 66 { 67 direct_rd_mod_wt->cmd_header.reg_offset = reg_offset; 68 direct_rd_mod_wt->mask_value = mask; 69 direct_rd_mod_wt->write_data = data; 70 memcpy((void *)init_table, direct_rd_mod_wt, 71 sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)); 72 } 73 74 static inline void mmsch_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll, 75 uint32_t *init_table, 76 uint32_t reg_offset, 77 uint32_t mask, uint32_t wait) 78 { 79 direct_poll->cmd_header.reg_offset = reg_offset; 80 direct_poll->mask_value = mask; 81 direct_poll->wait_value = wait; 82 memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling)); 83 } 84 85 #define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ 86 mmsch_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \ 87 init_table, (reg), \ 88 (mask), (data)); \ 89 init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ 90 table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ 91 } 92 93 #define INSERT_DIRECT_WT(reg, value) { \ 94 mmsch_insert_direct_wt(&direct_wt, \ 95 init_table, (reg), \ 96 (value)); \ 97 init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ 98 table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ 99 } 100 101 #define INSERT_DIRECT_POLL(reg, mask, wait) { \ 102 mmsch_insert_direct_poll(&direct_poll, \ 103 init_table, (reg), \ 104 (mask), (wait)); \ 105 init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ 106 table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ 107 } 108 109 /** 110 * vce_v4_0_ring_get_rptr - get read pointer 111 * 112 * @ring: amdgpu_ring pointer 113 * 114 * Returns the current hardware read pointer 115 */ 116 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 117 { 118 struct amdgpu_device *adev = ring->adev; 119 120 if (ring == &adev->vce.ring[0]) 121 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 122 else if (ring == &adev->vce.ring[1]) 123 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 124 else 125 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 126 } 127 128 /** 129 * vce_v4_0_ring_get_wptr - get write pointer 130 * 131 * @ring: amdgpu_ring pointer 132 * 133 * Returns the current hardware write pointer 134 */ 135 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 136 { 137 struct amdgpu_device *adev = ring->adev; 138 139 if (ring->use_doorbell) 140 return adev->wb.wb[ring->wptr_offs]; 141 142 if (ring == &adev->vce.ring[0]) 143 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 144 else if (ring == &adev->vce.ring[1]) 145 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 146 else 147 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 148 } 149 150 /** 151 * vce_v4_0_ring_set_wptr - set write pointer 152 * 153 * @ring: amdgpu_ring pointer 154 * 155 * Commits the write pointer to the hardware 156 */ 157 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 158 { 159 struct amdgpu_device *adev = ring->adev; 160 161 if (ring->use_doorbell) { 162 /* XXX check if swapping is necessary on BE */ 163 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 164 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 165 return; 166 } 167 168 if (ring == &adev->vce.ring[0]) 169 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 170 lower_32_bits(ring->wptr)); 171 else if (ring == &adev->vce.ring[1]) 172 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 173 lower_32_bits(ring->wptr)); 174 else 175 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 176 lower_32_bits(ring->wptr)); 177 } 178 179 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 180 { 181 int i, j; 182 183 for (i = 0; i < 10; ++i) { 184 for (j = 0; j < 100; ++j) { 185 uint32_t status = 186 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 187 188 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 189 return 0; 190 mdelay(10); 191 } 192 193 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 194 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 195 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 196 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 197 mdelay(10); 198 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 199 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 200 mdelay(10); 201 202 } 203 204 return -ETIMEDOUT; 205 } 206 207 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 208 struct amdgpu_mm_table *table) 209 { 210 uint32_t data = 0, loop; 211 uint64_t addr = table->gpu_addr; 212 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 213 uint32_t size; 214 215 size = header->header_size + header->vce_table_size + header->uvd_table_size; 216 217 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 218 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 219 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 220 221 /* 2, update vmid of descriptor */ 222 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 223 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 224 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 225 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 226 227 /* 3, notify mmsch about the size of this descriptor */ 228 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 229 230 /* 4, set resp to zero */ 231 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 232 233 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 234 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 235 236 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 237 loop = 1000; 238 while ((data & 0x10000002) != 0x10000002) { 239 udelay(10); 240 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 241 loop--; 242 if (!loop) 243 break; 244 } 245 246 if (!loop) { 247 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 248 return -EBUSY; 249 } 250 251 return 0; 252 } 253 254 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 255 { 256 struct amdgpu_ring *ring; 257 uint32_t offset, size; 258 uint32_t table_size = 0; 259 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 260 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 261 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 262 struct mmsch_v1_0_cmd_end end = { { 0 } }; 263 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 264 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 265 266 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 267 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 268 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 269 end.cmd_header.command_type = MMSCH_COMMAND__END; 270 271 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 272 header->version = MMSCH_VERSION; 273 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 274 275 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 276 header->vce_table_offset = header->header_size; 277 else 278 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 279 280 init_table += header->vce_table_offset; 281 282 ring = &adev->vce.ring[0]; 283 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), ring->wptr); 284 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), ring->wptr); 285 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), lower_32_bits(ring->gpu_addr)); 286 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 287 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 288 289 /* BEGING OF MC_RESUME */ 290 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), ~(1 << 16), 0); 291 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), ~0xFF9FF000, 0x1FF000); 292 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), ~0x3F, 0x3F); 293 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 294 295 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 296 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 297 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 298 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 299 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 300 301 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8); 302 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8); 303 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8); 304 305 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 306 size = VCE_V4_0_FW_SIZE; 307 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & 0x7FFFFFFF); 308 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 309 310 offset += size; 311 size = VCE_V4_0_STACK_SIZE; 312 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), offset & 0x7FFFFFFF); 313 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 314 315 offset += size; 316 size = VCE_V4_0_DATA_SIZE; 317 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), offset & 0x7FFFFFFF); 318 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 319 320 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 321 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 322 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 323 324 /* end of MC_RESUME */ 325 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 326 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 327 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 328 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 329 330 INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 331 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 332 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 333 334 /* clear BUSY flag */ 335 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 336 ~VCE_STATUS__JOB_BUSY_MASK, 0); 337 338 /* add end packet */ 339 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 340 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 341 header->vce_table_size = table_size; 342 343 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 344 } 345 346 return -EINVAL; /* already initializaed ? */ 347 } 348 349 /** 350 * vce_v4_0_start - start VCE block 351 * 352 * @adev: amdgpu_device pointer 353 * 354 * Setup and start the VCE block 355 */ 356 static int vce_v4_0_start(struct amdgpu_device *adev) 357 { 358 struct amdgpu_ring *ring; 359 int r; 360 361 ring = &adev->vce.ring[0]; 362 363 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 364 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 365 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 366 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 367 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 368 369 ring = &adev->vce.ring[1]; 370 371 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 372 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 373 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 374 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 375 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 376 377 ring = &adev->vce.ring[2]; 378 379 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 380 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 381 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 382 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 383 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 384 385 vce_v4_0_mc_resume(adev); 386 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 387 ~VCE_STATUS__JOB_BUSY_MASK); 388 389 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 390 391 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 392 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 393 mdelay(100); 394 395 r = vce_v4_0_firmware_loaded(adev); 396 397 /* clear BUSY flag */ 398 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 399 400 if (r) { 401 DRM_ERROR("VCE not responding, giving up!!!\n"); 402 return r; 403 } 404 405 return 0; 406 } 407 408 static int vce_v4_0_stop(struct amdgpu_device *adev) 409 { 410 411 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 412 413 /* hold on ECPU */ 414 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 415 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 416 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 417 418 /* clear BUSY flag */ 419 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 420 421 /* Set Clock-Gating off */ 422 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 423 vce_v4_0_set_vce_sw_clock_gating(adev, false); 424 */ 425 426 return 0; 427 } 428 429 static int vce_v4_0_early_init(void *handle) 430 { 431 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 432 433 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 434 adev->vce.num_rings = 1; 435 else 436 adev->vce.num_rings = 3; 437 438 vce_v4_0_set_ring_funcs(adev); 439 vce_v4_0_set_irq_funcs(adev); 440 441 return 0; 442 } 443 444 static int vce_v4_0_sw_init(void *handle) 445 { 446 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 447 struct amdgpu_ring *ring; 448 unsigned size; 449 int r, i; 450 451 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 452 if (r) 453 return r; 454 455 size = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2; 456 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 457 size += VCE_V4_0_FW_SIZE; 458 459 r = amdgpu_vce_sw_init(adev, size); 460 if (r) 461 return r; 462 463 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 464 const struct common_firmware_header *hdr; 465 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 466 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 467 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 468 adev->firmware.fw_size += 469 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 470 DRM_INFO("PSP loading VCE firmware\n"); 471 } 472 473 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 474 r = amdgpu_vce_resume(adev); 475 if (r) 476 return r; 477 } 478 479 for (i = 0; i < adev->vce.num_rings; i++) { 480 ring = &adev->vce.ring[i]; 481 sprintf(ring->name, "vce%d", i); 482 if (amdgpu_sriov_vf(adev)) { 483 /* DOORBELL only works under SRIOV */ 484 ring->use_doorbell = true; 485 if (i == 0) 486 ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2; 487 else if (i == 1) 488 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2; 489 else 490 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1; 491 } 492 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 493 if (r) 494 return r; 495 } 496 497 if (amdgpu_sriov_vf(adev)) { 498 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 499 AMDGPU_GEM_DOMAIN_VRAM, 500 &adev->virt.mm_table.bo, 501 &adev->virt.mm_table.gpu_addr, 502 (void *)&adev->virt.mm_table.cpu_addr); 503 if (!r) { 504 memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE); 505 printk("mm table gpu addr = 0x%llx, cpu addr = %p. \n", 506 adev->virt.mm_table.gpu_addr, 507 adev->virt.mm_table.cpu_addr); 508 } 509 return r; 510 } 511 512 return r; 513 } 514 515 static int vce_v4_0_sw_fini(void *handle) 516 { 517 int r; 518 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 519 520 /* free MM table */ 521 if (amdgpu_sriov_vf(adev)) 522 amdgpu_bo_free_kernel(&adev->virt.mm_table.bo, 523 &adev->virt.mm_table.gpu_addr, 524 (void *)&adev->virt.mm_table.cpu_addr); 525 526 r = amdgpu_vce_suspend(adev); 527 if (r) 528 return r; 529 530 r = amdgpu_vce_sw_fini(adev); 531 if (r) 532 return r; 533 534 return r; 535 } 536 537 static int vce_v4_0_hw_init(void *handle) 538 { 539 int r, i; 540 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 541 542 if (amdgpu_sriov_vf(adev)) 543 r = vce_v4_0_sriov_start(adev); 544 else 545 r = vce_v4_0_start(adev); 546 if (r) 547 return r; 548 549 for (i = 0; i < adev->vce.num_rings; i++) 550 adev->vce.ring[i].ready = false; 551 552 for (i = 0; i < adev->vce.num_rings; i++) { 553 r = amdgpu_ring_test_ring(&adev->vce.ring[i]); 554 if (r) 555 return r; 556 else 557 adev->vce.ring[i].ready = true; 558 } 559 560 DRM_INFO("VCE initialized successfully.\n"); 561 562 return 0; 563 } 564 565 static int vce_v4_0_hw_fini(void *handle) 566 { 567 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 568 int i; 569 570 /* vce_v4_0_wait_for_idle(handle); */ 571 vce_v4_0_stop(adev); 572 for (i = 0; i < adev->vce.num_rings; i++) 573 adev->vce.ring[i].ready = false; 574 575 return 0; 576 } 577 578 static int vce_v4_0_suspend(void *handle) 579 { 580 int r; 581 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 582 583 r = vce_v4_0_hw_fini(adev); 584 if (r) 585 return r; 586 587 r = amdgpu_vce_suspend(adev); 588 if (r) 589 return r; 590 591 return r; 592 } 593 594 static int vce_v4_0_resume(void *handle) 595 { 596 int r; 597 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 598 599 r = amdgpu_vce_resume(adev); 600 if (r) 601 return r; 602 603 r = vce_v4_0_hw_init(adev); 604 if (r) 605 return r; 606 607 return r; 608 } 609 610 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 611 { 612 uint32_t offset, size; 613 614 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 615 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 616 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 617 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 618 619 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 620 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 621 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 622 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 623 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 624 625 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 626 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 627 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8)); 628 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 629 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); 630 } else { 631 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 632 (adev->vce.gpu_addr >> 8)); 633 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 634 (adev->vce.gpu_addr >> 40) & 0xff); 635 } 636 637 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 638 size = VCE_V4_0_FW_SIZE; 639 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 640 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 641 642 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 643 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 644 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 645 size = VCE_V4_0_STACK_SIZE; 646 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 647 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 648 649 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 650 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 651 offset += size; 652 size = VCE_V4_0_DATA_SIZE; 653 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 654 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 655 656 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 657 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 658 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 659 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 660 } 661 662 static int vce_v4_0_set_clockgating_state(void *handle, 663 enum amd_clockgating_state state) 664 { 665 /* needed for driver unload*/ 666 return 0; 667 } 668 669 #if 0 670 static bool vce_v4_0_is_idle(void *handle) 671 { 672 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 673 u32 mask = 0; 674 675 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 676 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 677 678 return !(RREG32(mmSRBM_STATUS2) & mask); 679 } 680 681 static int vce_v4_0_wait_for_idle(void *handle) 682 { 683 unsigned i; 684 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 685 686 for (i = 0; i < adev->usec_timeout; i++) 687 if (vce_v4_0_is_idle(handle)) 688 return 0; 689 690 return -ETIMEDOUT; 691 } 692 693 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 694 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 695 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 696 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 697 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 698 699 static bool vce_v4_0_check_soft_reset(void *handle) 700 { 701 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 702 u32 srbm_soft_reset = 0; 703 704 /* According to VCE team , we should use VCE_STATUS instead 705 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 706 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 707 * instance's registers are accessed 708 * (0 for 1st instance, 10 for 2nd instance). 709 * 710 *VCE_STATUS 711 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 712 *|----+----+-----------+----+----+----+----------+---------+----| 713 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 714 * 715 * VCE team suggest use bit 3--bit 6 for busy status check 716 */ 717 mutex_lock(&adev->grbm_idx_mutex); 718 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 719 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 720 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 721 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 722 } 723 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 724 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 725 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 726 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 727 } 728 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 729 mutex_unlock(&adev->grbm_idx_mutex); 730 731 if (srbm_soft_reset) { 732 adev->vce.srbm_soft_reset = srbm_soft_reset; 733 return true; 734 } else { 735 adev->vce.srbm_soft_reset = 0; 736 return false; 737 } 738 } 739 740 static int vce_v4_0_soft_reset(void *handle) 741 { 742 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 743 u32 srbm_soft_reset; 744 745 if (!adev->vce.srbm_soft_reset) 746 return 0; 747 srbm_soft_reset = adev->vce.srbm_soft_reset; 748 749 if (srbm_soft_reset) { 750 u32 tmp; 751 752 tmp = RREG32(mmSRBM_SOFT_RESET); 753 tmp |= srbm_soft_reset; 754 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 755 WREG32(mmSRBM_SOFT_RESET, tmp); 756 tmp = RREG32(mmSRBM_SOFT_RESET); 757 758 udelay(50); 759 760 tmp &= ~srbm_soft_reset; 761 WREG32(mmSRBM_SOFT_RESET, tmp); 762 tmp = RREG32(mmSRBM_SOFT_RESET); 763 764 /* Wait a little for things to settle down */ 765 udelay(50); 766 } 767 768 return 0; 769 } 770 771 static int vce_v4_0_pre_soft_reset(void *handle) 772 { 773 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 774 775 if (!adev->vce.srbm_soft_reset) 776 return 0; 777 778 mdelay(5); 779 780 return vce_v4_0_suspend(adev); 781 } 782 783 784 static int vce_v4_0_post_soft_reset(void *handle) 785 { 786 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 787 788 if (!adev->vce.srbm_soft_reset) 789 return 0; 790 791 mdelay(5); 792 793 return vce_v4_0_resume(adev); 794 } 795 796 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 797 { 798 u32 tmp, data; 799 800 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 801 if (override) 802 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 803 else 804 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 805 806 if (tmp != data) 807 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 808 } 809 810 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 811 bool gated) 812 { 813 u32 data; 814 815 /* Set Override to disable Clock Gating */ 816 vce_v4_0_override_vce_clock_gating(adev, true); 817 818 /* This function enables MGCG which is controlled by firmware. 819 With the clocks in the gated state the core is still 820 accessible but the firmware will throttle the clocks on the 821 fly as necessary. 822 */ 823 if (gated) { 824 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 825 data |= 0x1ff; 826 data &= ~0xef0000; 827 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 828 829 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 830 data |= 0x3ff000; 831 data &= ~0xffc00000; 832 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 833 834 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 835 data |= 0x2; 836 data &= ~0x00010000; 837 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 838 839 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 840 data |= 0x37f; 841 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 842 843 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 844 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 845 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 846 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 847 0x8; 848 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 849 } else { 850 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 851 data &= ~0x80010; 852 data |= 0xe70008; 853 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 854 855 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 856 data |= 0xffc00000; 857 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 858 859 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 860 data |= 0x10000; 861 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 862 863 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 864 data &= ~0xffc00000; 865 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 866 867 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 868 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 869 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 870 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 871 0x8); 872 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 873 } 874 vce_v4_0_override_vce_clock_gating(adev, false); 875 } 876 877 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 878 { 879 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 880 881 if (enable) 882 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 883 else 884 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 885 886 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 887 } 888 889 static int vce_v4_0_set_clockgating_state(void *handle, 890 enum amd_clockgating_state state) 891 { 892 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 893 bool enable = (state == AMD_CG_STATE_GATE) ? true : false; 894 int i; 895 896 if ((adev->asic_type == CHIP_POLARIS10) || 897 (adev->asic_type == CHIP_TONGA) || 898 (adev->asic_type == CHIP_FIJI)) 899 vce_v4_0_set_bypass_mode(adev, enable); 900 901 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 902 return 0; 903 904 mutex_lock(&adev->grbm_idx_mutex); 905 for (i = 0; i < 2; i++) { 906 /* Program VCE Instance 0 or 1 if not harvested */ 907 if (adev->vce.harvest_config & (1 << i)) 908 continue; 909 910 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 911 912 if (enable) { 913 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 914 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 915 data &= ~(0xf | 0xff0); 916 data |= ((0x0 << 0) | (0x04 << 4)); 917 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 918 919 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 920 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 921 data &= ~(0xf | 0xff0); 922 data |= ((0x0 << 0) | (0x04 << 4)); 923 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 924 } 925 926 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 927 } 928 929 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 930 mutex_unlock(&adev->grbm_idx_mutex); 931 932 return 0; 933 } 934 935 static int vce_v4_0_set_powergating_state(void *handle, 936 enum amd_powergating_state state) 937 { 938 /* This doesn't actually powergate the VCE block. 939 * That's done in the dpm code via the SMC. This 940 * just re-inits the block as necessary. The actual 941 * gating still happens in the dpm code. We should 942 * revisit this when there is a cleaner line between 943 * the smc and the hw blocks 944 */ 945 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 946 947 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) 948 return 0; 949 950 if (state == AMD_PG_STATE_GATE) 951 /* XXX do we need a vce_v4_0_stop()? */ 952 return 0; 953 else 954 return vce_v4_0_start(adev); 955 } 956 #endif 957 958 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 959 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) 960 { 961 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 962 amdgpu_ring_write(ring, vm_id); 963 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 964 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 965 amdgpu_ring_write(ring, ib->length_dw); 966 } 967 968 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 969 u64 seq, unsigned flags) 970 { 971 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 972 973 amdgpu_ring_write(ring, VCE_CMD_FENCE); 974 amdgpu_ring_write(ring, addr); 975 amdgpu_ring_write(ring, upper_32_bits(addr)); 976 amdgpu_ring_write(ring, seq); 977 amdgpu_ring_write(ring, VCE_CMD_TRAP); 978 } 979 980 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 981 { 982 amdgpu_ring_write(ring, VCE_CMD_END); 983 } 984 985 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 986 unsigned int vm_id, uint64_t pd_addr) 987 { 988 unsigned eng = ring->idx; 989 unsigned i; 990 991 pd_addr = pd_addr | 0x1; /* valid bit */ 992 /* now only use physical base address of PDE and valid */ 993 BUG_ON(pd_addr & 0xFFFF00000000003EULL); 994 995 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 996 struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; 997 uint32_t req = hub->get_invalidate_req(vm_id); 998 999 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 1000 amdgpu_ring_write(ring, 1001 (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); 1002 amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 1003 1004 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 1005 amdgpu_ring_write(ring, 1006 (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 1007 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 1008 1009 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 1010 amdgpu_ring_write(ring, 1011 (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 1012 amdgpu_ring_write(ring, 0xffffffff); 1013 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 1014 1015 /* flush TLB */ 1016 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 1017 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 1018 amdgpu_ring_write(ring, req); 1019 1020 /* wait for flush */ 1021 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 1022 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 1023 amdgpu_ring_write(ring, 1 << vm_id); 1024 amdgpu_ring_write(ring, 1 << vm_id); 1025 } 1026 } 1027 1028 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 1029 struct amdgpu_irq_src *source, 1030 unsigned type, 1031 enum amdgpu_interrupt_state state) 1032 { 1033 uint32_t val = 0; 1034 1035 if (state == AMDGPU_IRQ_STATE_ENABLE) 1036 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 1037 1038 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 1039 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 1040 return 0; 1041 } 1042 1043 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 1044 struct amdgpu_irq_src *source, 1045 struct amdgpu_iv_entry *entry) 1046 { 1047 DRM_DEBUG("IH: VCE\n"); 1048 1049 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS), 1050 VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK, 1051 ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK); 1052 1053 switch (entry->src_data[0]) { 1054 case 0: 1055 case 1: 1056 case 2: 1057 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 1058 break; 1059 default: 1060 DRM_ERROR("Unhandled interrupt: %d %d\n", 1061 entry->src_id, entry->src_data[0]); 1062 break; 1063 } 1064 1065 return 0; 1066 } 1067 1068 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 1069 .name = "vce_v4_0", 1070 .early_init = vce_v4_0_early_init, 1071 .late_init = NULL, 1072 .sw_init = vce_v4_0_sw_init, 1073 .sw_fini = vce_v4_0_sw_fini, 1074 .hw_init = vce_v4_0_hw_init, 1075 .hw_fini = vce_v4_0_hw_fini, 1076 .suspend = vce_v4_0_suspend, 1077 .resume = vce_v4_0_resume, 1078 .is_idle = NULL /* vce_v4_0_is_idle */, 1079 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1080 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1081 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1082 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1083 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1084 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1085 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, 1086 }; 1087 1088 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1089 .type = AMDGPU_RING_TYPE_VCE, 1090 .align_mask = 0x3f, 1091 .nop = VCE_CMD_NO_OP, 1092 .support_64bit_ptrs = false, 1093 .get_rptr = vce_v4_0_ring_get_rptr, 1094 .get_wptr = vce_v4_0_ring_get_wptr, 1095 .set_wptr = vce_v4_0_ring_set_wptr, 1096 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1097 .emit_frame_size = 1098 17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */ 1099 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1100 1, /* vce_v4_0_ring_insert_end */ 1101 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1102 .emit_ib = vce_v4_0_ring_emit_ib, 1103 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1104 .emit_fence = vce_v4_0_ring_emit_fence, 1105 .test_ring = amdgpu_vce_ring_test_ring, 1106 .test_ib = amdgpu_vce_ring_test_ib, 1107 .insert_nop = amdgpu_ring_insert_nop, 1108 .insert_end = vce_v4_0_ring_insert_end, 1109 .pad_ib = amdgpu_ring_generic_pad_ib, 1110 .begin_use = amdgpu_vce_ring_begin_use, 1111 .end_use = amdgpu_vce_ring_end_use, 1112 }; 1113 1114 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1115 { 1116 int i; 1117 1118 for (i = 0; i < adev->vce.num_rings; i++) 1119 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1120 DRM_INFO("VCE enabled in VM mode\n"); 1121 } 1122 1123 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1124 .set = vce_v4_0_set_interrupt_state, 1125 .process = vce_v4_0_process_interrupt, 1126 }; 1127 1128 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1129 { 1130 adev->vce.irq.num_types = 1; 1131 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1132 }; 1133 1134 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1135 { 1136 .type = AMD_IP_BLOCK_TYPE_VCE, 1137 .major = 4, 1138 .minor = 0, 1139 .rev = 0, 1140 .funcs = &vce_v4_0_ip_funcs, 1141 }; 1142