1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <drm/drmP.h> 25 #include "amdgpu.h" 26 #include "amdgpu_trace.h" 27 #include "si/sid.h" 28 29 const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 30 { 31 DMA0_REGISTER_OFFSET, 32 DMA1_REGISTER_OFFSET 33 }; 34 35 static void si_dma_set_ring_funcs(struct amdgpu_device *adev); 36 static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); 37 static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); 38 static void si_dma_set_irq_funcs(struct amdgpu_device *adev); 39 40 static uint32_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) 41 { 42 return ring->adev->wb.wb[ring->rptr_offs>>2]; 43 } 44 45 static uint32_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) 46 { 47 struct amdgpu_device *adev = ring->adev; 48 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 49 50 return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; 51 } 52 53 static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) 54 { 55 struct amdgpu_device *adev = ring->adev; 56 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 57 58 WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); 59 } 60 61 static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, 62 struct amdgpu_ib *ib, 63 unsigned vm_id, bool ctx_switch) 64 { 65 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 66 * Pad as necessary with NOPs. 67 */ 68 while ((ring->wptr & 7) != 5) 69 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); 70 amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); 71 amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 72 amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 73 74 } 75 76 static void si_dma_ring_emit_hdp_flush(struct amdgpu_ring *ring) 77 { 78 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 79 amdgpu_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL)); 80 amdgpu_ring_write(ring, 1); 81 } 82 83 static void si_dma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 84 { 85 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 86 amdgpu_ring_write(ring, (0xf << 16) | (HDP_DEBUG0)); 87 amdgpu_ring_write(ring, 1); 88 } 89 90 /** 91 * si_dma_ring_emit_fence - emit a fence on the DMA ring 92 * 93 * @ring: amdgpu ring pointer 94 * @fence: amdgpu fence object 95 * 96 * Add a DMA fence packet to the ring to write 97 * the fence seq number and DMA trap packet to generate 98 * an interrupt if needed (VI). 99 */ 100 static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 101 unsigned flags) 102 { 103 104 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 105 /* write the fence */ 106 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 107 amdgpu_ring_write(ring, addr & 0xfffffffc); 108 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 109 amdgpu_ring_write(ring, seq); 110 /* optionally write high bits as well */ 111 if (write64bit) { 112 addr += 4; 113 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 114 amdgpu_ring_write(ring, addr & 0xfffffffc); 115 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 116 amdgpu_ring_write(ring, upper_32_bits(seq)); 117 } 118 /* generate an interrupt */ 119 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0, 0)); 120 } 121 122 static void si_dma_stop(struct amdgpu_device *adev) 123 { 124 struct amdgpu_ring *ring; 125 u32 rb_cntl; 126 unsigned i; 127 128 for (i = 0; i < adev->sdma.num_instances; i++) { 129 ring = &adev->sdma.instance[i].ring; 130 /* dma0 */ 131 rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]); 132 rb_cntl &= ~DMA_RB_ENABLE; 133 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); 134 135 if (adev->mman.buffer_funcs_ring == ring) 136 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 137 ring->ready = false; 138 } 139 } 140 141 static int si_dma_start(struct amdgpu_device *adev) 142 { 143 struct amdgpu_ring *ring; 144 u32 rb_cntl, dma_cntl, ib_cntl, rb_bufsz; 145 int i, r; 146 uint64_t rptr_addr; 147 148 for (i = 0; i < adev->sdma.num_instances; i++) { 149 ring = &adev->sdma.instance[i].ring; 150 151 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); 152 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 153 154 /* Set ring buffer size in dwords */ 155 rb_bufsz = order_base_2(ring->ring_size / 4); 156 rb_cntl = rb_bufsz << 1; 157 #ifdef __BIG_ENDIAN 158 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; 159 #endif 160 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); 161 162 /* Initialize the ring buffer's read and write pointers */ 163 WREG32(DMA_RB_RPTR + sdma_offsets[i], 0); 164 WREG32(DMA_RB_WPTR + sdma_offsets[i], 0); 165 166 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 167 168 WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); 169 WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); 170 171 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; 172 173 WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 174 175 /* enable DMA IBs */ 176 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; 177 #ifdef __BIG_ENDIAN 178 ib_cntl |= DMA_IB_SWAP_ENABLE; 179 #endif 180 WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl); 181 182 dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]); 183 dma_cntl &= ~CTXEMPTY_INT_ENABLE; 184 WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); 185 186 ring->wptr = 0; 187 WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); 188 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); 189 190 ring->ready = true; 191 192 r = amdgpu_ring_test_ring(ring); 193 if (r) { 194 ring->ready = false; 195 return r; 196 } 197 198 if (adev->mman.buffer_funcs_ring == ring) 199 amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); 200 } 201 202 return 0; 203 } 204 205 /** 206 * si_dma_ring_test_ring - simple async dma engine test 207 * 208 * @ring: amdgpu_ring structure holding ring information 209 * 210 * Test the DMA engine by writing using it to write an 211 * value to memory. (VI). 212 * Returns 0 for success, error for failure. 213 */ 214 static int si_dma_ring_test_ring(struct amdgpu_ring *ring) 215 { 216 struct amdgpu_device *adev = ring->adev; 217 unsigned i; 218 unsigned index; 219 int r; 220 u32 tmp; 221 u64 gpu_addr; 222 223 r = amdgpu_wb_get(adev, &index); 224 if (r) { 225 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 226 return r; 227 } 228 229 gpu_addr = adev->wb.gpu_addr + (index * 4); 230 tmp = 0xCAFEDEAD; 231 adev->wb.wb[index] = cpu_to_le32(tmp); 232 233 r = amdgpu_ring_alloc(ring, 4); 234 if (r) { 235 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 236 amdgpu_wb_free(adev, index); 237 return r; 238 } 239 240 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); 241 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 242 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); 243 amdgpu_ring_write(ring, 0xDEADBEEF); 244 amdgpu_ring_commit(ring); 245 246 for (i = 0; i < adev->usec_timeout; i++) { 247 tmp = le32_to_cpu(adev->wb.wb[index]); 248 if (tmp == 0xDEADBEEF) 249 break; 250 DRM_UDELAY(1); 251 } 252 253 if (i < adev->usec_timeout) { 254 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 255 } else { 256 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", 257 ring->idx, tmp); 258 r = -EINVAL; 259 } 260 amdgpu_wb_free(adev, index); 261 262 return r; 263 } 264 265 /** 266 * si_dma_ring_test_ib - test an IB on the DMA engine 267 * 268 * @ring: amdgpu_ring structure holding ring information 269 * 270 * Test a simple IB in the DMA ring (VI). 271 * Returns 0 on success, error on failure. 272 */ 273 static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) 274 { 275 struct amdgpu_device *adev = ring->adev; 276 struct amdgpu_ib ib; 277 struct dma_fence *f = NULL; 278 unsigned index; 279 u32 tmp = 0; 280 u64 gpu_addr; 281 long r; 282 283 r = amdgpu_wb_get(adev, &index); 284 if (r) { 285 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 286 return r; 287 } 288 289 gpu_addr = adev->wb.gpu_addr + (index * 4); 290 tmp = 0xCAFEDEAD; 291 adev->wb.wb[index] = cpu_to_le32(tmp); 292 memset(&ib, 0, sizeof(ib)); 293 r = amdgpu_ib_get(adev, NULL, 256, &ib); 294 if (r) { 295 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 296 goto err0; 297 } 298 299 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); 300 ib.ptr[1] = lower_32_bits(gpu_addr); 301 ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff; 302 ib.ptr[3] = 0xDEADBEEF; 303 ib.length_dw = 4; 304 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 305 if (r) 306 goto err1; 307 308 r = dma_fence_wait_timeout(f, false, timeout); 309 if (r == 0) { 310 DRM_ERROR("amdgpu: IB test timed out\n"); 311 r = -ETIMEDOUT; 312 goto err1; 313 } else if (r < 0) { 314 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 315 goto err1; 316 } 317 tmp = le32_to_cpu(adev->wb.wb[index]); 318 if (tmp == 0xDEADBEEF) { 319 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 320 r = 0; 321 } else { 322 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); 323 r = -EINVAL; 324 } 325 326 err1: 327 amdgpu_ib_free(adev, &ib, NULL); 328 dma_fence_put(f); 329 err0: 330 amdgpu_wb_free(adev, index); 331 return r; 332 } 333 334 /** 335 * cik_dma_vm_copy_pte - update PTEs by copying them from the GART 336 * 337 * @ib: indirect buffer to fill with commands 338 * @pe: addr of the page entry 339 * @src: src addr to copy from 340 * @count: number of page entries to update 341 * 342 * Update PTEs by copying them from the GART using DMA (SI). 343 */ 344 static void si_dma_vm_copy_pte(struct amdgpu_ib *ib, 345 uint64_t pe, uint64_t src, 346 unsigned count) 347 { 348 unsigned bytes = count * 8; 349 350 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 351 1, 0, 0, bytes); 352 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 353 ib->ptr[ib->length_dw++] = lower_32_bits(src); 354 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 355 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 356 } 357 358 /** 359 * si_dma_vm_write_pte - update PTEs by writing them manually 360 * 361 * @ib: indirect buffer to fill with commands 362 * @pe: addr of the page entry 363 * @value: dst addr to write into pe 364 * @count: number of page entries to update 365 * @incr: increase next addr by incr bytes 366 * 367 * Update PTEs by writing them manually using DMA (SI). 368 */ 369 static void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 370 uint64_t value, unsigned count, 371 uint32_t incr) 372 { 373 unsigned ndw = count * 2; 374 375 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); 376 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 377 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 378 for (; ndw > 0; ndw -= 2) { 379 ib->ptr[ib->length_dw++] = lower_32_bits(value); 380 ib->ptr[ib->length_dw++] = upper_32_bits(value); 381 value += incr; 382 } 383 } 384 385 /** 386 * si_dma_vm_set_pte_pde - update the page tables using sDMA 387 * 388 * @ib: indirect buffer to fill with commands 389 * @pe: addr of the page entry 390 * @addr: dst addr to write into pe 391 * @count: number of page entries to update 392 * @incr: increase next addr by incr bytes 393 * @flags: access flags 394 * 395 * Update the page tables using sDMA (CIK). 396 */ 397 static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, 398 uint64_t pe, 399 uint64_t addr, unsigned count, 400 uint32_t incr, uint32_t flags) 401 { 402 uint64_t value; 403 unsigned ndw; 404 405 while (count) { 406 ndw = count * 2; 407 if (ndw > 0xFFFFE) 408 ndw = 0xFFFFE; 409 410 if (flags & AMDGPU_PTE_VALID) 411 value = addr; 412 else 413 value = 0; 414 415 /* for physically contiguous pages (vram) */ 416 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 417 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 418 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 419 ib->ptr[ib->length_dw++] = flags; /* mask */ 420 ib->ptr[ib->length_dw++] = 0; 421 ib->ptr[ib->length_dw++] = value; /* value */ 422 ib->ptr[ib->length_dw++] = upper_32_bits(value); 423 ib->ptr[ib->length_dw++] = incr; /* increment size */ 424 ib->ptr[ib->length_dw++] = 0; 425 pe += ndw * 4; 426 addr += (ndw / 2) * incr; 427 count -= ndw / 2; 428 } 429 } 430 431 /** 432 * si_dma_pad_ib - pad the IB to the required number of dw 433 * 434 * @ib: indirect buffer to fill with padding 435 * 436 */ 437 static void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 438 { 439 while (ib->length_dw & 0x7) 440 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); 441 } 442 443 /** 444 * cik_sdma_ring_emit_pipeline_sync - sync the pipeline 445 * 446 * @ring: amdgpu_ring pointer 447 * 448 * Make sure all previous operations are completed (CIK). 449 */ 450 static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 451 { 452 uint32_t seq = ring->fence_drv.sync_seq; 453 uint64_t addr = ring->fence_drv.gpu_addr; 454 455 /* wait for idle */ 456 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0) | 457 (1 << 27)); /* Poll memory */ 458 amdgpu_ring_write(ring, lower_32_bits(addr)); 459 amdgpu_ring_write(ring, (0xff << 16) | upper_32_bits(addr)); /* retry, addr_hi */ 460 amdgpu_ring_write(ring, 0xffffffff); /* mask */ 461 amdgpu_ring_write(ring, seq); /* value */ 462 amdgpu_ring_write(ring, (3 << 28) | 0x20); /* func(equal) | poll interval */ 463 } 464 465 /** 466 * si_dma_ring_emit_vm_flush - cik vm flush using sDMA 467 * 468 * @ring: amdgpu_ring pointer 469 * @vm: amdgpu_vm pointer 470 * 471 * Update the page table base and flush the VM TLB 472 * using sDMA (VI). 473 */ 474 static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, 475 unsigned vm_id, uint64_t pd_addr) 476 { 477 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 478 if (vm_id < 8) 479 amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 480 else 481 amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8))); 482 amdgpu_ring_write(ring, pd_addr >> 12); 483 484 /* bits 0-7 are the VM contexts0-7 */ 485 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 486 amdgpu_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST)); 487 amdgpu_ring_write(ring, 1 << vm_id); 488 489 /* wait for invalidate to complete */ 490 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); 491 amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); 492 amdgpu_ring_write(ring, 0xff << 16); /* retry */ 493 amdgpu_ring_write(ring, 1 << vm_id); /* mask */ 494 amdgpu_ring_write(ring, 0); /* value */ 495 amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ 496 } 497 498 static int si_dma_early_init(void *handle) 499 { 500 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 501 502 adev->sdma.num_instances = 2; 503 504 si_dma_set_ring_funcs(adev); 505 si_dma_set_buffer_funcs(adev); 506 si_dma_set_vm_pte_funcs(adev); 507 si_dma_set_irq_funcs(adev); 508 509 return 0; 510 } 511 512 static int si_dma_sw_init(void *handle) 513 { 514 struct amdgpu_ring *ring; 515 int r, i; 516 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 517 518 /* DMA0 trap event */ 519 r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq); 520 if (r) 521 return r; 522 523 /* DMA1 trap event */ 524 r = amdgpu_irq_add_id(adev, 244, &adev->sdma.trap_irq_1); 525 if (r) 526 return r; 527 528 for (i = 0; i < adev->sdma.num_instances; i++) { 529 ring = &adev->sdma.instance[i].ring; 530 ring->ring_obj = NULL; 531 ring->use_doorbell = false; 532 sprintf(ring->name, "sdma%d", i); 533 r = amdgpu_ring_init(adev, ring, 1024, 534 &adev->sdma.trap_irq, 535 (i == 0) ? 536 AMDGPU_SDMA_IRQ_TRAP0 : 537 AMDGPU_SDMA_IRQ_TRAP1); 538 if (r) 539 return r; 540 } 541 542 return r; 543 } 544 545 static int si_dma_sw_fini(void *handle) 546 { 547 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 548 int i; 549 550 for (i = 0; i < adev->sdma.num_instances; i++) 551 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 552 553 return 0; 554 } 555 556 static int si_dma_hw_init(void *handle) 557 { 558 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 559 560 return si_dma_start(adev); 561 } 562 563 static int si_dma_hw_fini(void *handle) 564 { 565 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 566 567 si_dma_stop(adev); 568 569 return 0; 570 } 571 572 static int si_dma_suspend(void *handle) 573 { 574 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 575 576 return si_dma_hw_fini(adev); 577 } 578 579 static int si_dma_resume(void *handle) 580 { 581 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 582 583 return si_dma_hw_init(adev); 584 } 585 586 static bool si_dma_is_idle(void *handle) 587 { 588 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 589 u32 tmp = RREG32(SRBM_STATUS2); 590 591 if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK)) 592 return false; 593 594 return true; 595 } 596 597 static int si_dma_wait_for_idle(void *handle) 598 { 599 unsigned i; 600 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 601 602 for (i = 0; i < adev->usec_timeout; i++) { 603 if (si_dma_is_idle(handle)) 604 return 0; 605 udelay(1); 606 } 607 return -ETIMEDOUT; 608 } 609 610 static int si_dma_soft_reset(void *handle) 611 { 612 DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n"); 613 return 0; 614 } 615 616 static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, 617 struct amdgpu_irq_src *src, 618 unsigned type, 619 enum amdgpu_interrupt_state state) 620 { 621 u32 sdma_cntl; 622 623 switch (type) { 624 case AMDGPU_SDMA_IRQ_TRAP0: 625 switch (state) { 626 case AMDGPU_IRQ_STATE_DISABLE: 627 sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); 628 sdma_cntl &= ~TRAP_ENABLE; 629 WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 630 break; 631 case AMDGPU_IRQ_STATE_ENABLE: 632 sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); 633 sdma_cntl |= TRAP_ENABLE; 634 WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 635 break; 636 default: 637 break; 638 } 639 break; 640 case AMDGPU_SDMA_IRQ_TRAP1: 641 switch (state) { 642 case AMDGPU_IRQ_STATE_DISABLE: 643 sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); 644 sdma_cntl &= ~TRAP_ENABLE; 645 WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 646 break; 647 case AMDGPU_IRQ_STATE_ENABLE: 648 sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); 649 sdma_cntl |= TRAP_ENABLE; 650 WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 651 break; 652 default: 653 break; 654 } 655 break; 656 default: 657 break; 658 } 659 return 0; 660 } 661 662 static int si_dma_process_trap_irq(struct amdgpu_device *adev, 663 struct amdgpu_irq_src *source, 664 struct amdgpu_iv_entry *entry) 665 { 666 amdgpu_fence_process(&adev->sdma.instance[0].ring); 667 668 return 0; 669 } 670 671 static int si_dma_process_trap_irq_1(struct amdgpu_device *adev, 672 struct amdgpu_irq_src *source, 673 struct amdgpu_iv_entry *entry) 674 { 675 amdgpu_fence_process(&adev->sdma.instance[1].ring); 676 677 return 0; 678 } 679 680 static int si_dma_process_illegal_inst_irq(struct amdgpu_device *adev, 681 struct amdgpu_irq_src *source, 682 struct amdgpu_iv_entry *entry) 683 { 684 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 685 schedule_work(&adev->reset_work); 686 return 0; 687 } 688 689 static int si_dma_set_clockgating_state(void *handle, 690 enum amd_clockgating_state state) 691 { 692 u32 orig, data, offset; 693 int i; 694 bool enable; 695 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 696 697 enable = (state == AMD_CG_STATE_GATE) ? true : false; 698 699 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { 700 for (i = 0; i < adev->sdma.num_instances; i++) { 701 if (i == 0) 702 offset = DMA0_REGISTER_OFFSET; 703 else 704 offset = DMA1_REGISTER_OFFSET; 705 orig = data = RREG32(DMA_POWER_CNTL + offset); 706 data &= ~MEM_POWER_OVERRIDE; 707 if (data != orig) 708 WREG32(DMA_POWER_CNTL + offset, data); 709 WREG32(DMA_CLK_CTRL + offset, 0x00000100); 710 } 711 } else { 712 for (i = 0; i < adev->sdma.num_instances; i++) { 713 if (i == 0) 714 offset = DMA0_REGISTER_OFFSET; 715 else 716 offset = DMA1_REGISTER_OFFSET; 717 orig = data = RREG32(DMA_POWER_CNTL + offset); 718 data |= MEM_POWER_OVERRIDE; 719 if (data != orig) 720 WREG32(DMA_POWER_CNTL + offset, data); 721 722 orig = data = RREG32(DMA_CLK_CTRL + offset); 723 data = 0xff000000; 724 if (data != orig) 725 WREG32(DMA_CLK_CTRL + offset, data); 726 } 727 } 728 729 return 0; 730 } 731 732 static int si_dma_set_powergating_state(void *handle, 733 enum amd_powergating_state state) 734 { 735 u32 tmp; 736 737 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 738 739 WREG32(DMA_PGFSM_WRITE, 0x00002000); 740 WREG32(DMA_PGFSM_CONFIG, 0x100010ff); 741 742 for (tmp = 0; tmp < 5; tmp++) 743 WREG32(DMA_PGFSM_WRITE, 0); 744 745 return 0; 746 } 747 748 static const struct amd_ip_funcs si_dma_ip_funcs = { 749 .name = "si_dma", 750 .early_init = si_dma_early_init, 751 .late_init = NULL, 752 .sw_init = si_dma_sw_init, 753 .sw_fini = si_dma_sw_fini, 754 .hw_init = si_dma_hw_init, 755 .hw_fini = si_dma_hw_fini, 756 .suspend = si_dma_suspend, 757 .resume = si_dma_resume, 758 .is_idle = si_dma_is_idle, 759 .wait_for_idle = si_dma_wait_for_idle, 760 .soft_reset = si_dma_soft_reset, 761 .set_clockgating_state = si_dma_set_clockgating_state, 762 .set_powergating_state = si_dma_set_powergating_state, 763 }; 764 765 static const struct amdgpu_ring_funcs si_dma_ring_funcs = { 766 .type = AMDGPU_RING_TYPE_SDMA, 767 .align_mask = 0xf, 768 .nop = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0), 769 .get_rptr = si_dma_ring_get_rptr, 770 .get_wptr = si_dma_ring_get_wptr, 771 .set_wptr = si_dma_ring_set_wptr, 772 .emit_frame_size = 773 3 + /* si_dma_ring_emit_hdp_flush */ 774 3 + /* si_dma_ring_emit_hdp_invalidate */ 775 6 + /* si_dma_ring_emit_pipeline_sync */ 776 12 + /* si_dma_ring_emit_vm_flush */ 777 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */ 778 .emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */ 779 .emit_ib = si_dma_ring_emit_ib, 780 .emit_fence = si_dma_ring_emit_fence, 781 .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync, 782 .emit_vm_flush = si_dma_ring_emit_vm_flush, 783 .emit_hdp_flush = si_dma_ring_emit_hdp_flush, 784 .emit_hdp_invalidate = si_dma_ring_emit_hdp_invalidate, 785 .test_ring = si_dma_ring_test_ring, 786 .test_ib = si_dma_ring_test_ib, 787 .insert_nop = amdgpu_ring_insert_nop, 788 .pad_ib = si_dma_ring_pad_ib, 789 }; 790 791 static void si_dma_set_ring_funcs(struct amdgpu_device *adev) 792 { 793 int i; 794 795 for (i = 0; i < adev->sdma.num_instances; i++) 796 adev->sdma.instance[i].ring.funcs = &si_dma_ring_funcs; 797 } 798 799 static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = { 800 .set = si_dma_set_trap_irq_state, 801 .process = si_dma_process_trap_irq, 802 }; 803 804 static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs_1 = { 805 .set = si_dma_set_trap_irq_state, 806 .process = si_dma_process_trap_irq_1, 807 }; 808 809 static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = { 810 .process = si_dma_process_illegal_inst_irq, 811 }; 812 813 static void si_dma_set_irq_funcs(struct amdgpu_device *adev) 814 { 815 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 816 adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; 817 adev->sdma.trap_irq_1.funcs = &si_dma_trap_irq_funcs_1; 818 adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs; 819 } 820 821 /** 822 * si_dma_emit_copy_buffer - copy buffer using the sDMA engine 823 * 824 * @ring: amdgpu_ring structure holding ring information 825 * @src_offset: src GPU address 826 * @dst_offset: dst GPU address 827 * @byte_count: number of bytes to xfer 828 * 829 * Copy GPU buffers using the DMA engine (VI). 830 * Used by the amdgpu ttm implementation to move pages if 831 * registered as the asic copy callback. 832 */ 833 static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, 834 uint64_t src_offset, 835 uint64_t dst_offset, 836 uint32_t byte_count) 837 { 838 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 839 1, 0, 0, byte_count); 840 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 841 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 842 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) & 0xff; 843 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset) & 0xff; 844 } 845 846 /** 847 * si_dma_emit_fill_buffer - fill buffer using the sDMA engine 848 * 849 * @ring: amdgpu_ring structure holding ring information 850 * @src_data: value to write to buffer 851 * @dst_offset: dst GPU address 852 * @byte_count: number of bytes to xfer 853 * 854 * Fill GPU buffers using the DMA engine (VI). 855 */ 856 static void si_dma_emit_fill_buffer(struct amdgpu_ib *ib, 857 uint32_t src_data, 858 uint64_t dst_offset, 859 uint32_t byte_count) 860 { 861 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL, 862 0, 0, 0, byte_count / 4); 863 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 864 ib->ptr[ib->length_dw++] = src_data; 865 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) << 16; 866 } 867 868 869 static const struct amdgpu_buffer_funcs si_dma_buffer_funcs = { 870 .copy_max_bytes = 0xffff8, 871 .copy_num_dw = 5, 872 .emit_copy_buffer = si_dma_emit_copy_buffer, 873 874 .fill_max_bytes = 0xffff8, 875 .fill_num_dw = 4, 876 .emit_fill_buffer = si_dma_emit_fill_buffer, 877 }; 878 879 static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) 880 { 881 if (adev->mman.buffer_funcs == NULL) { 882 adev->mman.buffer_funcs = &si_dma_buffer_funcs; 883 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 884 } 885 } 886 887 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { 888 .copy_pte = si_dma_vm_copy_pte, 889 .write_pte = si_dma_vm_write_pte, 890 .set_pte_pde = si_dma_vm_set_pte_pde, 891 }; 892 893 static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev) 894 { 895 unsigned i; 896 897 if (adev->vm_manager.vm_pte_funcs == NULL) { 898 adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; 899 for (i = 0; i < adev->sdma.num_instances; i++) 900 adev->vm_manager.vm_pte_rings[i] = 901 &adev->sdma.instance[i].ring; 902 903 adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; 904 } 905 } 906 907 const struct amdgpu_ip_block_version si_dma_ip_block = 908 { 909 .type = AMD_IP_BLOCK_TYPE_SDMA, 910 .major = 1, 911 .minor = 0, 912 .rev = 0, 913 .funcs = &si_dma_ip_funcs, 914 }; 915