1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_ucode.h" 28 #include "amdgpu_trace.h" 29 #include "vi.h" 30 #include "vid.h" 31 32 #include "oss/oss_2_4_d.h" 33 #include "oss/oss_2_4_sh_mask.h" 34 35 #include "gmc/gmc_7_1_d.h" 36 #include "gmc/gmc_7_1_sh_mask.h" 37 38 #include "gca/gfx_8_0_d.h" 39 #include "gca/gfx_8_0_enum.h" 40 #include "gca/gfx_8_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 45 #include "iceland_sdma_pkt_open.h" 46 47 #include "ivsrcid/ivsrcid_vislands30.h" 48 49 static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev); 50 static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev); 51 static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev); 52 static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev); 53 54 MODULE_FIRMWARE("amdgpu/topaz_sdma.bin"); 55 MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin"); 56 57 static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 58 { 59 SDMA0_REGISTER_OFFSET, 60 SDMA1_REGISTER_OFFSET 61 }; 62 63 static const u32 golden_settings_iceland_a11[] = 64 { 65 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 66 mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, 67 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 68 mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, 69 }; 70 71 static const u32 iceland_mgcg_cgcg_init[] = 72 { 73 mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, 74 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 75 }; 76 77 /* 78 * sDMA - System DMA 79 * Starting with CIK, the GPU has new asynchronous 80 * DMA engines. These engines are used for compute 81 * and gfx. There are two DMA engines (SDMA0, SDMA1) 82 * and each one supports 1 ring buffer used for gfx 83 * and 2 queues used for compute. 84 * 85 * The programming model is very similar to the CP 86 * (ring buffer, IBs, etc.), but sDMA has it's own 87 * packet format that is different from the PM4 format 88 * used by the CP. sDMA supports copying data, writing 89 * embedded data, solid fills, and a number of other 90 * things. It also has support for tiling/detiling of 91 * buffers. 92 */ 93 94 static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev) 95 { 96 switch (adev->asic_type) { 97 case CHIP_TOPAZ: 98 amdgpu_device_program_register_sequence(adev, 99 iceland_mgcg_cgcg_init, 100 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 101 amdgpu_device_program_register_sequence(adev, 102 golden_settings_iceland_a11, 103 ARRAY_SIZE(golden_settings_iceland_a11)); 104 break; 105 default: 106 break; 107 } 108 } 109 110 static void sdma_v2_4_free_microcode(struct amdgpu_device *adev) 111 { 112 int i; 113 for (i = 0; i < adev->sdma.num_instances; i++) { 114 release_firmware(adev->sdma.instance[i].fw); 115 adev->sdma.instance[i].fw = NULL; 116 } 117 } 118 119 /** 120 * sdma_v2_4_init_microcode - load ucode images from disk 121 * 122 * @adev: amdgpu_device pointer 123 * 124 * Use the firmware interface to load the ucode images into 125 * the driver (not loaded into hw). 126 * Returns 0 on success, error on failure. 127 */ 128 static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) 129 { 130 const char *chip_name; 131 char fw_name[30]; 132 int err = 0, i; 133 struct amdgpu_firmware_info *info = NULL; 134 const struct common_firmware_header *header = NULL; 135 const struct sdma_firmware_header_v1_0 *hdr; 136 137 DRM_DEBUG("\n"); 138 139 switch (adev->asic_type) { 140 case CHIP_TOPAZ: 141 chip_name = "topaz"; 142 break; 143 default: BUG(); 144 } 145 146 for (i = 0; i < adev->sdma.num_instances; i++) { 147 if (i == 0) 148 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); 149 else 150 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); 151 err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); 152 if (err) 153 goto out; 154 err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); 155 if (err) 156 goto out; 157 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 158 adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 159 adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 160 if (adev->sdma.instance[i].feature_version >= 20) 161 adev->sdma.instance[i].burst_nop = true; 162 163 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 164 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; 165 info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; 166 info->fw = adev->sdma.instance[i].fw; 167 header = (const struct common_firmware_header *)info->fw->data; 168 adev->firmware.fw_size += 169 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 170 } 171 } 172 173 out: 174 if (err) { 175 pr_err("sdma_v2_4: Failed to load firmware \"%s\"\n", fw_name); 176 for (i = 0; i < adev->sdma.num_instances; i++) { 177 release_firmware(adev->sdma.instance[i].fw); 178 adev->sdma.instance[i].fw = NULL; 179 } 180 } 181 return err; 182 } 183 184 /** 185 * sdma_v2_4_ring_get_rptr - get the current read pointer 186 * 187 * @ring: amdgpu ring pointer 188 * 189 * Get the current rptr from the hardware (VI+). 190 */ 191 static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) 192 { 193 /* XXX check if swapping is necessary on BE */ 194 return ring->adev->wb.wb[ring->rptr_offs] >> 2; 195 } 196 197 /** 198 * sdma_v2_4_ring_get_wptr - get the current write pointer 199 * 200 * @ring: amdgpu ring pointer 201 * 202 * Get the current wptr from the hardware (VI+). 203 */ 204 static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) 205 { 206 struct amdgpu_device *adev = ring->adev; 207 u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2; 208 209 return wptr; 210 } 211 212 /** 213 * sdma_v2_4_ring_set_wptr - commit the write pointer 214 * 215 * @ring: amdgpu ring pointer 216 * 217 * Write the wptr back to the hardware (VI+). 218 */ 219 static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) 220 { 221 struct amdgpu_device *adev = ring->adev; 222 223 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2); 224 } 225 226 static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 227 { 228 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 229 int i; 230 231 for (i = 0; i < count; i++) 232 if (sdma && sdma->burst_nop && (i == 0)) 233 amdgpu_ring_write(ring, ring->funcs->nop | 234 SDMA_PKT_NOP_HEADER_COUNT(count - 1)); 235 else 236 amdgpu_ring_write(ring, ring->funcs->nop); 237 } 238 239 /** 240 * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine 241 * 242 * @ring: amdgpu ring pointer 243 * @ib: IB object to schedule 244 * 245 * Schedule an IB in the DMA ring (VI). 246 */ 247 static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, 248 struct amdgpu_ib *ib, 249 unsigned vmid, bool ctx_switch) 250 { 251 /* IB packet must end on a 8 DW boundary */ 252 sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); 253 254 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 255 SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); 256 /* base must be 32 byte aligned */ 257 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); 258 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 259 amdgpu_ring_write(ring, ib->length_dw); 260 amdgpu_ring_write(ring, 0); 261 amdgpu_ring_write(ring, 0); 262 263 } 264 265 /** 266 * sdma_v2_4_hdp_flush_ring_emit - emit an hdp flush on the DMA ring 267 * 268 * @ring: amdgpu ring pointer 269 * 270 * Emit an hdp flush packet on the requested DMA ring. 271 */ 272 static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring) 273 { 274 u32 ref_and_mask = 0; 275 276 if (ring->me == 0) 277 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1); 278 else 279 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1); 280 281 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 282 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | 283 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ 284 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2); 285 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2); 286 amdgpu_ring_write(ring, ref_and_mask); /* reference */ 287 amdgpu_ring_write(ring, ref_and_mask); /* mask */ 288 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 289 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 290 } 291 292 /** 293 * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring 294 * 295 * @ring: amdgpu ring pointer 296 * @fence: amdgpu fence object 297 * 298 * Add a DMA fence packet to the ring to write 299 * the fence seq number and DMA trap packet to generate 300 * an interrupt if needed (VI). 301 */ 302 static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 303 unsigned flags) 304 { 305 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 306 /* write the fence */ 307 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); 308 amdgpu_ring_write(ring, lower_32_bits(addr)); 309 amdgpu_ring_write(ring, upper_32_bits(addr)); 310 amdgpu_ring_write(ring, lower_32_bits(seq)); 311 312 /* optionally write high bits as well */ 313 if (write64bit) { 314 addr += 4; 315 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); 316 amdgpu_ring_write(ring, lower_32_bits(addr)); 317 amdgpu_ring_write(ring, upper_32_bits(addr)); 318 amdgpu_ring_write(ring, upper_32_bits(seq)); 319 } 320 321 /* generate an interrupt */ 322 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); 323 amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); 324 } 325 326 /** 327 * sdma_v2_4_gfx_stop - stop the gfx async dma engines 328 * 329 * @adev: amdgpu_device pointer 330 * 331 * Stop the gfx async dma ring buffers (VI). 332 */ 333 static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev) 334 { 335 struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; 336 struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; 337 u32 rb_cntl, ib_cntl; 338 int i; 339 340 if ((adev->mman.buffer_funcs_ring == sdma0) || 341 (adev->mman.buffer_funcs_ring == sdma1)) 342 amdgpu_ttm_set_buffer_funcs_status(adev, false); 343 344 for (i = 0; i < adev->sdma.num_instances; i++) { 345 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 346 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); 347 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 348 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]); 349 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); 350 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 351 } 352 sdma0->ready = false; 353 sdma1->ready = false; 354 } 355 356 /** 357 * sdma_v2_4_rlc_stop - stop the compute async dma engines 358 * 359 * @adev: amdgpu_device pointer 360 * 361 * Stop the compute async dma queues (VI). 362 */ 363 static void sdma_v2_4_rlc_stop(struct amdgpu_device *adev) 364 { 365 /* XXX todo */ 366 } 367 368 /** 369 * sdma_v2_4_enable - stop the async dma engines 370 * 371 * @adev: amdgpu_device pointer 372 * @enable: enable/disable the DMA MEs. 373 * 374 * Halt or unhalt the async dma engines (VI). 375 */ 376 static void sdma_v2_4_enable(struct amdgpu_device *adev, bool enable) 377 { 378 u32 f32_cntl; 379 int i; 380 381 if (!enable) { 382 sdma_v2_4_gfx_stop(adev); 383 sdma_v2_4_rlc_stop(adev); 384 } 385 386 for (i = 0; i < adev->sdma.num_instances; i++) { 387 f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]); 388 if (enable) 389 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0); 390 else 391 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1); 392 WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], f32_cntl); 393 } 394 } 395 396 /** 397 * sdma_v2_4_gfx_resume - setup and start the async dma engines 398 * 399 * @adev: amdgpu_device pointer 400 * 401 * Set up the gfx DMA ring buffers and enable them (VI). 402 * Returns 0 for success, error for failure. 403 */ 404 static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) 405 { 406 struct amdgpu_ring *ring; 407 u32 rb_cntl, ib_cntl; 408 u32 rb_bufsz; 409 u32 wb_offset; 410 int i, j, r; 411 412 for (i = 0; i < adev->sdma.num_instances; i++) { 413 ring = &adev->sdma.instance[i].ring; 414 wb_offset = (ring->rptr_offs * 4); 415 416 mutex_lock(&adev->srbm_mutex); 417 for (j = 0; j < 16; j++) { 418 vi_srbm_select(adev, 0, 0, 0, j); 419 /* SDMA GFX */ 420 WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0); 421 WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0); 422 } 423 vi_srbm_select(adev, 0, 0, 0, 0); 424 mutex_unlock(&adev->srbm_mutex); 425 426 WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i], 427 adev->gfx.config.gb_addr_config & 0x70); 428 429 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 430 431 /* Set ring buffer size in dwords */ 432 rb_bufsz = order_base_2(ring->ring_size / 4); 433 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 434 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); 435 #ifdef __BIG_ENDIAN 436 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); 437 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, 438 RPTR_WRITEBACK_SWAP_ENABLE, 1); 439 #endif 440 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 441 442 /* Initialize the ring buffer's read and write pointers */ 443 WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); 444 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); 445 WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); 446 WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); 447 448 /* set the wb address whether it's enabled or not */ 449 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], 450 upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); 451 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], 452 lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); 453 454 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); 455 456 WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 457 WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); 458 459 ring->wptr = 0; 460 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); 461 462 /* enable DMA RB */ 463 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); 464 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 465 466 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]); 467 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); 468 #ifdef __BIG_ENDIAN 469 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); 470 #endif 471 /* enable DMA IBs */ 472 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 473 474 ring->ready = true; 475 } 476 477 sdma_v2_4_enable(adev, true); 478 for (i = 0; i < adev->sdma.num_instances; i++) { 479 ring = &adev->sdma.instance[i].ring; 480 r = amdgpu_ring_test_ring(ring); 481 if (r) { 482 ring->ready = false; 483 return r; 484 } 485 486 if (adev->mman.buffer_funcs_ring == ring) 487 amdgpu_ttm_set_buffer_funcs_status(adev, true); 488 } 489 490 return 0; 491 } 492 493 /** 494 * sdma_v2_4_rlc_resume - setup and start the async dma engines 495 * 496 * @adev: amdgpu_device pointer 497 * 498 * Set up the compute DMA queues and enable them (VI). 499 * Returns 0 for success, error for failure. 500 */ 501 static int sdma_v2_4_rlc_resume(struct amdgpu_device *adev) 502 { 503 /* XXX todo */ 504 return 0; 505 } 506 507 508 /** 509 * sdma_v2_4_start - setup and start the async dma engines 510 * 511 * @adev: amdgpu_device pointer 512 * 513 * Set up the DMA engines and enable them (VI). 514 * Returns 0 for success, error for failure. 515 */ 516 static int sdma_v2_4_start(struct amdgpu_device *adev) 517 { 518 int r; 519 520 /* halt the engine before programing */ 521 sdma_v2_4_enable(adev, false); 522 523 /* start the gfx rings and rlc compute queues */ 524 r = sdma_v2_4_gfx_resume(adev); 525 if (r) 526 return r; 527 r = sdma_v2_4_rlc_resume(adev); 528 if (r) 529 return r; 530 531 return 0; 532 } 533 534 /** 535 * sdma_v2_4_ring_test_ring - simple async dma engine test 536 * 537 * @ring: amdgpu_ring structure holding ring information 538 * 539 * Test the DMA engine by writing using it to write an 540 * value to memory. (VI). 541 * Returns 0 for success, error for failure. 542 */ 543 static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) 544 { 545 struct amdgpu_device *adev = ring->adev; 546 unsigned i; 547 unsigned index; 548 int r; 549 u32 tmp; 550 u64 gpu_addr; 551 552 r = amdgpu_device_wb_get(adev, &index); 553 if (r) { 554 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 555 return r; 556 } 557 558 gpu_addr = adev->wb.gpu_addr + (index * 4); 559 tmp = 0xCAFEDEAD; 560 adev->wb.wb[index] = cpu_to_le32(tmp); 561 562 r = amdgpu_ring_alloc(ring, 5); 563 if (r) { 564 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 565 amdgpu_device_wb_free(adev, index); 566 return r; 567 } 568 569 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 570 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 571 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 572 amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 573 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); 574 amdgpu_ring_write(ring, 0xDEADBEEF); 575 amdgpu_ring_commit(ring); 576 577 for (i = 0; i < adev->usec_timeout; i++) { 578 tmp = le32_to_cpu(adev->wb.wb[index]); 579 if (tmp == 0xDEADBEEF) 580 break; 581 DRM_UDELAY(1); 582 } 583 584 if (i < adev->usec_timeout) { 585 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); 586 } else { 587 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", 588 ring->idx, tmp); 589 r = -EINVAL; 590 } 591 amdgpu_device_wb_free(adev, index); 592 593 return r; 594 } 595 596 /** 597 * sdma_v2_4_ring_test_ib - test an IB on the DMA engine 598 * 599 * @ring: amdgpu_ring structure holding ring information 600 * 601 * Test a simple IB in the DMA ring (VI). 602 * Returns 0 on success, error on failure. 603 */ 604 static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) 605 { 606 struct amdgpu_device *adev = ring->adev; 607 struct amdgpu_ib ib; 608 struct dma_fence *f = NULL; 609 unsigned index; 610 u32 tmp = 0; 611 u64 gpu_addr; 612 long r; 613 614 r = amdgpu_device_wb_get(adev, &index); 615 if (r) { 616 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 617 return r; 618 } 619 620 gpu_addr = adev->wb.gpu_addr + (index * 4); 621 tmp = 0xCAFEDEAD; 622 adev->wb.wb[index] = cpu_to_le32(tmp); 623 memset(&ib, 0, sizeof(ib)); 624 r = amdgpu_ib_get(adev, NULL, 256, &ib); 625 if (r) { 626 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 627 goto err0; 628 } 629 630 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 631 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 632 ib.ptr[1] = lower_32_bits(gpu_addr); 633 ib.ptr[2] = upper_32_bits(gpu_addr); 634 ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1); 635 ib.ptr[4] = 0xDEADBEEF; 636 ib.ptr[5] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 637 ib.ptr[6] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 638 ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 639 ib.length_dw = 8; 640 641 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 642 if (r) 643 goto err1; 644 645 r = dma_fence_wait_timeout(f, false, timeout); 646 if (r == 0) { 647 DRM_ERROR("amdgpu: IB test timed out\n"); 648 r = -ETIMEDOUT; 649 goto err1; 650 } else if (r < 0) { 651 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 652 goto err1; 653 } 654 tmp = le32_to_cpu(adev->wb.wb[index]); 655 if (tmp == 0xDEADBEEF) { 656 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 657 r = 0; 658 } else { 659 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); 660 r = -EINVAL; 661 } 662 663 err1: 664 amdgpu_ib_free(adev, &ib, NULL); 665 dma_fence_put(f); 666 err0: 667 amdgpu_device_wb_free(adev, index); 668 return r; 669 } 670 671 /** 672 * sdma_v2_4_vm_copy_pte - update PTEs by copying them from the GART 673 * 674 * @ib: indirect buffer to fill with commands 675 * @pe: addr of the page entry 676 * @src: src addr to copy from 677 * @count: number of page entries to update 678 * 679 * Update PTEs by copying them from the GART using sDMA (CIK). 680 */ 681 static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib, 682 uint64_t pe, uint64_t src, 683 unsigned count) 684 { 685 unsigned bytes = count * 8; 686 687 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 688 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 689 ib->ptr[ib->length_dw++] = bytes; 690 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 691 ib->ptr[ib->length_dw++] = lower_32_bits(src); 692 ib->ptr[ib->length_dw++] = upper_32_bits(src); 693 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 694 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 695 } 696 697 /** 698 * sdma_v2_4_vm_write_pte - update PTEs by writing them manually 699 * 700 * @ib: indirect buffer to fill with commands 701 * @pe: addr of the page entry 702 * @value: dst addr to write into pe 703 * @count: number of page entries to update 704 * @incr: increase next addr by incr bytes 705 * 706 * Update PTEs by writing them manually using sDMA (CIK). 707 */ 708 static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 709 uint64_t value, unsigned count, 710 uint32_t incr) 711 { 712 unsigned ndw = count * 2; 713 714 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 715 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 716 ib->ptr[ib->length_dw++] = pe; 717 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 718 ib->ptr[ib->length_dw++] = ndw; 719 for (; ndw > 0; ndw -= 2) { 720 ib->ptr[ib->length_dw++] = lower_32_bits(value); 721 ib->ptr[ib->length_dw++] = upper_32_bits(value); 722 value += incr; 723 } 724 } 725 726 /** 727 * sdma_v2_4_vm_set_pte_pde - update the page tables using sDMA 728 * 729 * @ib: indirect buffer to fill with commands 730 * @pe: addr of the page entry 731 * @addr: dst addr to write into pe 732 * @count: number of page entries to update 733 * @incr: increase next addr by incr bytes 734 * @flags: access flags 735 * 736 * Update the page tables using sDMA (CIK). 737 */ 738 static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, 739 uint64_t addr, unsigned count, 740 uint32_t incr, uint64_t flags) 741 { 742 /* for physically contiguous pages (vram) */ 743 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); 744 ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ 745 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 746 ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ 747 ib->ptr[ib->length_dw++] = upper_32_bits(flags); 748 ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ 749 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 750 ib->ptr[ib->length_dw++] = incr; /* increment size */ 751 ib->ptr[ib->length_dw++] = 0; 752 ib->ptr[ib->length_dw++] = count; /* number of entries */ 753 } 754 755 /** 756 * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw 757 * 758 * @ib: indirect buffer to fill with padding 759 * 760 */ 761 static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 762 { 763 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 764 u32 pad_count; 765 int i; 766 767 pad_count = (8 - (ib->length_dw & 0x7)) % 8; 768 for (i = 0; i < pad_count; i++) 769 if (sdma && sdma->burst_nop && (i == 0)) 770 ib->ptr[ib->length_dw++] = 771 SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | 772 SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); 773 else 774 ib->ptr[ib->length_dw++] = 775 SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 776 } 777 778 /** 779 * sdma_v2_4_ring_emit_pipeline_sync - sync the pipeline 780 * 781 * @ring: amdgpu_ring pointer 782 * 783 * Make sure all previous operations are completed (CIK). 784 */ 785 static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 786 { 787 uint32_t seq = ring->fence_drv.sync_seq; 788 uint64_t addr = ring->fence_drv.gpu_addr; 789 790 /* wait for idle */ 791 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 792 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 793 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ 794 SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); 795 amdgpu_ring_write(ring, addr & 0xfffffffc); 796 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 797 amdgpu_ring_write(ring, seq); /* reference */ 798 amdgpu_ring_write(ring, 0xffffffff); /* mask */ 799 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 800 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ 801 } 802 803 /** 804 * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA 805 * 806 * @ring: amdgpu_ring pointer 807 * @vm: amdgpu_vm pointer 808 * 809 * Update the page table base and flush the VM TLB 810 * using sDMA (VI). 811 */ 812 static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring, 813 unsigned vmid, uint64_t pd_addr) 814 { 815 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 816 817 /* wait for flush */ 818 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 819 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 820 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(0)); /* always */ 821 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); 822 amdgpu_ring_write(ring, 0); 823 amdgpu_ring_write(ring, 0); /* reference */ 824 amdgpu_ring_write(ring, 0); /* mask */ 825 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 826 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 827 } 828 829 static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring, 830 uint32_t reg, uint32_t val) 831 { 832 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 833 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 834 amdgpu_ring_write(ring, reg); 835 amdgpu_ring_write(ring, val); 836 } 837 838 static int sdma_v2_4_early_init(void *handle) 839 { 840 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 841 842 adev->sdma.num_instances = SDMA_MAX_INSTANCE; 843 844 sdma_v2_4_set_ring_funcs(adev); 845 sdma_v2_4_set_buffer_funcs(adev); 846 sdma_v2_4_set_vm_pte_funcs(adev); 847 sdma_v2_4_set_irq_funcs(adev); 848 849 return 0; 850 } 851 852 static int sdma_v2_4_sw_init(void *handle) 853 { 854 struct amdgpu_ring *ring; 855 int r, i; 856 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 857 858 /* SDMA trap event */ 859 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, 860 &adev->sdma.trap_irq); 861 if (r) 862 return r; 863 864 /* SDMA Privileged inst */ 865 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241, 866 &adev->sdma.illegal_inst_irq); 867 if (r) 868 return r; 869 870 /* SDMA Privileged inst */ 871 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, 872 &adev->sdma.illegal_inst_irq); 873 if (r) 874 return r; 875 876 r = sdma_v2_4_init_microcode(adev); 877 if (r) { 878 DRM_ERROR("Failed to load sdma firmware!\n"); 879 return r; 880 } 881 882 for (i = 0; i < adev->sdma.num_instances; i++) { 883 ring = &adev->sdma.instance[i].ring; 884 ring->ring_obj = NULL; 885 ring->use_doorbell = false; 886 sprintf(ring->name, "sdma%d", i); 887 r = amdgpu_ring_init(adev, ring, 1024, 888 &adev->sdma.trap_irq, 889 (i == 0) ? 890 AMDGPU_SDMA_IRQ_TRAP0 : 891 AMDGPU_SDMA_IRQ_TRAP1); 892 if (r) 893 return r; 894 } 895 896 return r; 897 } 898 899 static int sdma_v2_4_sw_fini(void *handle) 900 { 901 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 902 int i; 903 904 for (i = 0; i < adev->sdma.num_instances; i++) 905 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 906 907 sdma_v2_4_free_microcode(adev); 908 return 0; 909 } 910 911 static int sdma_v2_4_hw_init(void *handle) 912 { 913 int r; 914 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 915 916 sdma_v2_4_init_golden_registers(adev); 917 918 r = sdma_v2_4_start(adev); 919 if (r) 920 return r; 921 922 return r; 923 } 924 925 static int sdma_v2_4_hw_fini(void *handle) 926 { 927 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 928 929 sdma_v2_4_enable(adev, false); 930 931 return 0; 932 } 933 934 static int sdma_v2_4_suspend(void *handle) 935 { 936 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 937 938 return sdma_v2_4_hw_fini(adev); 939 } 940 941 static int sdma_v2_4_resume(void *handle) 942 { 943 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 944 945 return sdma_v2_4_hw_init(adev); 946 } 947 948 static bool sdma_v2_4_is_idle(void *handle) 949 { 950 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 951 u32 tmp = RREG32(mmSRBM_STATUS2); 952 953 if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK | 954 SRBM_STATUS2__SDMA1_BUSY_MASK)) 955 return false; 956 957 return true; 958 } 959 960 static int sdma_v2_4_wait_for_idle(void *handle) 961 { 962 unsigned i; 963 u32 tmp; 964 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 965 966 for (i = 0; i < adev->usec_timeout; i++) { 967 tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK | 968 SRBM_STATUS2__SDMA1_BUSY_MASK); 969 970 if (!tmp) 971 return 0; 972 udelay(1); 973 } 974 return -ETIMEDOUT; 975 } 976 977 static int sdma_v2_4_soft_reset(void *handle) 978 { 979 u32 srbm_soft_reset = 0; 980 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 981 u32 tmp = RREG32(mmSRBM_STATUS2); 982 983 if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) { 984 /* sdma0 */ 985 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); 986 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0); 987 WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); 988 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; 989 } 990 if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) { 991 /* sdma1 */ 992 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); 993 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0); 994 WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); 995 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; 996 } 997 998 if (srbm_soft_reset) { 999 tmp = RREG32(mmSRBM_SOFT_RESET); 1000 tmp |= srbm_soft_reset; 1001 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 1002 WREG32(mmSRBM_SOFT_RESET, tmp); 1003 tmp = RREG32(mmSRBM_SOFT_RESET); 1004 1005 udelay(50); 1006 1007 tmp &= ~srbm_soft_reset; 1008 WREG32(mmSRBM_SOFT_RESET, tmp); 1009 tmp = RREG32(mmSRBM_SOFT_RESET); 1010 1011 /* Wait a little for things to settle down */ 1012 udelay(50); 1013 } 1014 1015 return 0; 1016 } 1017 1018 static int sdma_v2_4_set_trap_irq_state(struct amdgpu_device *adev, 1019 struct amdgpu_irq_src *src, 1020 unsigned type, 1021 enum amdgpu_interrupt_state state) 1022 { 1023 u32 sdma_cntl; 1024 1025 switch (type) { 1026 case AMDGPU_SDMA_IRQ_TRAP0: 1027 switch (state) { 1028 case AMDGPU_IRQ_STATE_DISABLE: 1029 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); 1030 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0); 1031 WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl); 1032 break; 1033 case AMDGPU_IRQ_STATE_ENABLE: 1034 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); 1035 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1); 1036 WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl); 1037 break; 1038 default: 1039 break; 1040 } 1041 break; 1042 case AMDGPU_SDMA_IRQ_TRAP1: 1043 switch (state) { 1044 case AMDGPU_IRQ_STATE_DISABLE: 1045 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); 1046 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0); 1047 WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl); 1048 break; 1049 case AMDGPU_IRQ_STATE_ENABLE: 1050 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); 1051 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1); 1052 WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl); 1053 break; 1054 default: 1055 break; 1056 } 1057 break; 1058 default: 1059 break; 1060 } 1061 return 0; 1062 } 1063 1064 static int sdma_v2_4_process_trap_irq(struct amdgpu_device *adev, 1065 struct amdgpu_irq_src *source, 1066 struct amdgpu_iv_entry *entry) 1067 { 1068 u8 instance_id, queue_id; 1069 1070 instance_id = (entry->ring_id & 0x3) >> 0; 1071 queue_id = (entry->ring_id & 0xc) >> 2; 1072 DRM_DEBUG("IH: SDMA trap\n"); 1073 switch (instance_id) { 1074 case 0: 1075 switch (queue_id) { 1076 case 0: 1077 amdgpu_fence_process(&adev->sdma.instance[0].ring); 1078 break; 1079 case 1: 1080 /* XXX compute */ 1081 break; 1082 case 2: 1083 /* XXX compute */ 1084 break; 1085 } 1086 break; 1087 case 1: 1088 switch (queue_id) { 1089 case 0: 1090 amdgpu_fence_process(&adev->sdma.instance[1].ring); 1091 break; 1092 case 1: 1093 /* XXX compute */ 1094 break; 1095 case 2: 1096 /* XXX compute */ 1097 break; 1098 } 1099 break; 1100 } 1101 return 0; 1102 } 1103 1104 static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev, 1105 struct amdgpu_irq_src *source, 1106 struct amdgpu_iv_entry *entry) 1107 { 1108 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 1109 schedule_work(&adev->reset_work); 1110 return 0; 1111 } 1112 1113 static int sdma_v2_4_set_clockgating_state(void *handle, 1114 enum amd_clockgating_state state) 1115 { 1116 /* XXX handled via the smc on VI */ 1117 return 0; 1118 } 1119 1120 static int sdma_v2_4_set_powergating_state(void *handle, 1121 enum amd_powergating_state state) 1122 { 1123 return 0; 1124 } 1125 1126 static const struct amd_ip_funcs sdma_v2_4_ip_funcs = { 1127 .name = "sdma_v2_4", 1128 .early_init = sdma_v2_4_early_init, 1129 .late_init = NULL, 1130 .sw_init = sdma_v2_4_sw_init, 1131 .sw_fini = sdma_v2_4_sw_fini, 1132 .hw_init = sdma_v2_4_hw_init, 1133 .hw_fini = sdma_v2_4_hw_fini, 1134 .suspend = sdma_v2_4_suspend, 1135 .resume = sdma_v2_4_resume, 1136 .is_idle = sdma_v2_4_is_idle, 1137 .wait_for_idle = sdma_v2_4_wait_for_idle, 1138 .soft_reset = sdma_v2_4_soft_reset, 1139 .set_clockgating_state = sdma_v2_4_set_clockgating_state, 1140 .set_powergating_state = sdma_v2_4_set_powergating_state, 1141 }; 1142 1143 static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { 1144 .type = AMDGPU_RING_TYPE_SDMA, 1145 .align_mask = 0xf, 1146 .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 1147 .support_64bit_ptrs = false, 1148 .get_rptr = sdma_v2_4_ring_get_rptr, 1149 .get_wptr = sdma_v2_4_ring_get_wptr, 1150 .set_wptr = sdma_v2_4_ring_set_wptr, 1151 .emit_frame_size = 1152 6 + /* sdma_v2_4_ring_emit_hdp_flush */ 1153 3 + /* hdp invalidate */ 1154 6 + /* sdma_v2_4_ring_emit_pipeline_sync */ 1155 VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v2_4_ring_emit_vm_flush */ 1156 10 + 10 + 10, /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */ 1157 .emit_ib_size = 7 + 6, /* sdma_v2_4_ring_emit_ib */ 1158 .emit_ib = sdma_v2_4_ring_emit_ib, 1159 .emit_fence = sdma_v2_4_ring_emit_fence, 1160 .emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync, 1161 .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush, 1162 .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush, 1163 .test_ring = sdma_v2_4_ring_test_ring, 1164 .test_ib = sdma_v2_4_ring_test_ib, 1165 .insert_nop = sdma_v2_4_ring_insert_nop, 1166 .pad_ib = sdma_v2_4_ring_pad_ib, 1167 .emit_wreg = sdma_v2_4_ring_emit_wreg, 1168 }; 1169 1170 static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) 1171 { 1172 int i; 1173 1174 for (i = 0; i < adev->sdma.num_instances; i++) { 1175 adev->sdma.instance[i].ring.funcs = &sdma_v2_4_ring_funcs; 1176 adev->sdma.instance[i].ring.me = i; 1177 } 1178 } 1179 1180 static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = { 1181 .set = sdma_v2_4_set_trap_irq_state, 1182 .process = sdma_v2_4_process_trap_irq, 1183 }; 1184 1185 static const struct amdgpu_irq_src_funcs sdma_v2_4_illegal_inst_irq_funcs = { 1186 .process = sdma_v2_4_process_illegal_inst_irq, 1187 }; 1188 1189 static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) 1190 { 1191 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 1192 adev->sdma.trap_irq.funcs = &sdma_v2_4_trap_irq_funcs; 1193 adev->sdma.illegal_inst_irq.funcs = &sdma_v2_4_illegal_inst_irq_funcs; 1194 } 1195 1196 /** 1197 * sdma_v2_4_emit_copy_buffer - copy buffer using the sDMA engine 1198 * 1199 * @ring: amdgpu_ring structure holding ring information 1200 * @src_offset: src GPU address 1201 * @dst_offset: dst GPU address 1202 * @byte_count: number of bytes to xfer 1203 * 1204 * Copy GPU buffers using the DMA engine (VI). 1205 * Used by the amdgpu ttm implementation to move pages if 1206 * registered as the asic copy callback. 1207 */ 1208 static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, 1209 uint64_t src_offset, 1210 uint64_t dst_offset, 1211 uint32_t byte_count) 1212 { 1213 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1214 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 1215 ib->ptr[ib->length_dw++] = byte_count; 1216 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1217 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 1218 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 1219 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1220 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1221 } 1222 1223 /** 1224 * sdma_v2_4_emit_fill_buffer - fill buffer using the sDMA engine 1225 * 1226 * @ring: amdgpu_ring structure holding ring information 1227 * @src_data: value to write to buffer 1228 * @dst_offset: dst GPU address 1229 * @byte_count: number of bytes to xfer 1230 * 1231 * Fill GPU buffers using the DMA engine (VI). 1232 */ 1233 static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ib *ib, 1234 uint32_t src_data, 1235 uint64_t dst_offset, 1236 uint32_t byte_count) 1237 { 1238 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); 1239 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1240 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1241 ib->ptr[ib->length_dw++] = src_data; 1242 ib->ptr[ib->length_dw++] = byte_count; 1243 } 1244 1245 static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = { 1246 .copy_max_bytes = 0x1fffff, 1247 .copy_num_dw = 7, 1248 .emit_copy_buffer = sdma_v2_4_emit_copy_buffer, 1249 1250 .fill_max_bytes = 0x1fffff, 1251 .fill_num_dw = 7, 1252 .emit_fill_buffer = sdma_v2_4_emit_fill_buffer, 1253 }; 1254 1255 static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev) 1256 { 1257 adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs; 1258 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 1259 } 1260 1261 static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { 1262 .copy_pte_num_dw = 7, 1263 .copy_pte = sdma_v2_4_vm_copy_pte, 1264 1265 .write_pte = sdma_v2_4_vm_write_pte, 1266 .set_pte_pde = sdma_v2_4_vm_set_pte_pde, 1267 }; 1268 1269 static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) 1270 { 1271 struct drm_gpu_scheduler *sched; 1272 unsigned i; 1273 1274 adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; 1275 for (i = 0; i < adev->sdma.num_instances; i++) { 1276 sched = &adev->sdma.instance[i].ring.sched; 1277 adev->vm_manager.vm_pte_rqs[i] = 1278 &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; 1279 } 1280 adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; 1281 } 1282 1283 const struct amdgpu_ip_block_version sdma_v2_4_ip_block = 1284 { 1285 .type = AMD_IP_BLOCK_TYPE_SDMA, 1286 .major = 2, 1287 .minor = 4, 1288 .rev = 0, 1289 .funcs = &sdma_v2_4_ip_funcs, 1290 }; 1291