1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <drm/drmP.h> 25 #include "amdgpu.h" 26 #include "amdgpu_trace.h" 27 #include "si/sid.h" 28 29 const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 30 { 31 DMA0_REGISTER_OFFSET, 32 DMA1_REGISTER_OFFSET 33 }; 34 35 static void si_dma_set_ring_funcs(struct amdgpu_device *adev); 36 static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); 37 static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); 38 static void si_dma_set_irq_funcs(struct amdgpu_device *adev); 39 40 static uint32_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) 41 { 42 return ring->adev->wb.wb[ring->rptr_offs>>2]; 43 } 44 45 static uint32_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) 46 { 47 struct amdgpu_device *adev = ring->adev; 48 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 49 50 return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; 51 } 52 53 static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) 54 { 55 struct amdgpu_device *adev = ring->adev; 56 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 57 58 WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); 59 } 60 61 static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, 62 struct amdgpu_ib *ib, 63 unsigned vm_id, bool ctx_switch) 64 { 65 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 66 * Pad as necessary with NOPs. 67 */ 68 while ((ring->wptr & 7) != 5) 69 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); 70 amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); 71 amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 72 amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 73 74 } 75 76 static void si_dma_ring_emit_hdp_flush(struct amdgpu_ring *ring) 77 { 78 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 79 amdgpu_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL)); 80 amdgpu_ring_write(ring, 1); 81 } 82 83 static void si_dma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 84 { 85 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 86 amdgpu_ring_write(ring, (0xf << 16) | (HDP_DEBUG0)); 87 amdgpu_ring_write(ring, 1); 88 } 89 90 /** 91 * si_dma_ring_emit_fence - emit a fence on the DMA ring 92 * 93 * @ring: amdgpu ring pointer 94 * @fence: amdgpu fence object 95 * 96 * Add a DMA fence packet to the ring to write 97 * the fence seq number and DMA trap packet to generate 98 * an interrupt if needed (VI). 99 */ 100 static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 101 unsigned flags) 102 { 103 104 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 105 /* write the fence */ 106 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 107 amdgpu_ring_write(ring, addr & 0xfffffffc); 108 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 109 amdgpu_ring_write(ring, seq); 110 /* optionally write high bits as well */ 111 if (write64bit) { 112 addr += 4; 113 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 114 amdgpu_ring_write(ring, addr & 0xfffffffc); 115 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 116 amdgpu_ring_write(ring, upper_32_bits(seq)); 117 } 118 /* generate an interrupt */ 119 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0, 0)); 120 } 121 122 static void si_dma_stop(struct amdgpu_device *adev) 123 { 124 struct amdgpu_ring *ring; 125 u32 rb_cntl; 126 unsigned i; 127 128 for (i = 0; i < adev->sdma.num_instances; i++) { 129 ring = &adev->sdma.instance[i].ring; 130 /* dma0 */ 131 rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]); 132 rb_cntl &= ~DMA_RB_ENABLE; 133 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); 134 135 if (adev->mman.buffer_funcs_ring == ring) 136 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 137 ring->ready = false; 138 } 139 } 140 141 static int si_dma_start(struct amdgpu_device *adev) 142 { 143 struct amdgpu_ring *ring; 144 u32 rb_cntl, dma_cntl, ib_cntl, rb_bufsz; 145 int i, r; 146 uint64_t rptr_addr; 147 148 for (i = 0; i < adev->sdma.num_instances; i++) { 149 ring = &adev->sdma.instance[i].ring; 150 151 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); 152 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 153 154 /* Set ring buffer size in dwords */ 155 rb_bufsz = order_base_2(ring->ring_size / 4); 156 rb_cntl = rb_bufsz << 1; 157 #ifdef __BIG_ENDIAN 158 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; 159 #endif 160 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); 161 162 /* Initialize the ring buffer's read and write pointers */ 163 WREG32(DMA_RB_RPTR + sdma_offsets[i], 0); 164 WREG32(DMA_RB_WPTR + sdma_offsets[i], 0); 165 166 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 167 168 WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); 169 WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); 170 171 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; 172 173 WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 174 175 /* enable DMA IBs */ 176 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; 177 #ifdef __BIG_ENDIAN 178 ib_cntl |= DMA_IB_SWAP_ENABLE; 179 #endif 180 WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl); 181 182 dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]); 183 dma_cntl &= ~CTXEMPTY_INT_ENABLE; 184 WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); 185 186 ring->wptr = 0; 187 WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); 188 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); 189 190 ring->ready = true; 191 192 r = amdgpu_ring_test_ring(ring); 193 if (r) { 194 ring->ready = false; 195 return r; 196 } 197 198 if (adev->mman.buffer_funcs_ring == ring) 199 amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); 200 } 201 202 return 0; 203 } 204 205 /** 206 * si_dma_ring_test_ring - simple async dma engine test 207 * 208 * @ring: amdgpu_ring structure holding ring information 209 * 210 * Test the DMA engine by writing using it to write an 211 * value to memory. (VI). 212 * Returns 0 for success, error for failure. 213 */ 214 static int si_dma_ring_test_ring(struct amdgpu_ring *ring) 215 { 216 struct amdgpu_device *adev = ring->adev; 217 unsigned i; 218 unsigned index; 219 int r; 220 u32 tmp; 221 u64 gpu_addr; 222 223 r = amdgpu_wb_get(adev, &index); 224 if (r) { 225 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 226 return r; 227 } 228 229 gpu_addr = adev->wb.gpu_addr + (index * 4); 230 tmp = 0xCAFEDEAD; 231 adev->wb.wb[index] = cpu_to_le32(tmp); 232 233 r = amdgpu_ring_alloc(ring, 4); 234 if (r) { 235 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 236 amdgpu_wb_free(adev, index); 237 return r; 238 } 239 240 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); 241 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 242 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); 243 amdgpu_ring_write(ring, 0xDEADBEEF); 244 amdgpu_ring_commit(ring); 245 246 for (i = 0; i < adev->usec_timeout; i++) { 247 tmp = le32_to_cpu(adev->wb.wb[index]); 248 if (tmp == 0xDEADBEEF) 249 break; 250 DRM_UDELAY(1); 251 } 252 253 if (i < adev->usec_timeout) { 254 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 255 } else { 256 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", 257 ring->idx, tmp); 258 r = -EINVAL; 259 } 260 amdgpu_wb_free(adev, index); 261 262 return r; 263 } 264 265 /** 266 * si_dma_ring_test_ib - test an IB on the DMA engine 267 * 268 * @ring: amdgpu_ring structure holding ring information 269 * 270 * Test a simple IB in the DMA ring (VI). 271 * Returns 0 on success, error on failure. 272 */ 273 static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) 274 { 275 struct amdgpu_device *adev = ring->adev; 276 struct amdgpu_ib ib; 277 struct fence *f = NULL; 278 unsigned index; 279 u32 tmp = 0; 280 u64 gpu_addr; 281 long r; 282 283 r = amdgpu_wb_get(adev, &index); 284 if (r) { 285 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 286 return r; 287 } 288 289 gpu_addr = adev->wb.gpu_addr + (index * 4); 290 tmp = 0xCAFEDEAD; 291 adev->wb.wb[index] = cpu_to_le32(tmp); 292 memset(&ib, 0, sizeof(ib)); 293 r = amdgpu_ib_get(adev, NULL, 256, &ib); 294 if (r) { 295 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 296 goto err0; 297 } 298 299 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); 300 ib.ptr[1] = lower_32_bits(gpu_addr); 301 ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff; 302 ib.ptr[3] = 0xDEADBEEF; 303 ib.length_dw = 4; 304 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 305 if (r) 306 goto err1; 307 308 r = fence_wait_timeout(f, false, timeout); 309 if (r == 0) { 310 DRM_ERROR("amdgpu: IB test timed out\n"); 311 r = -ETIMEDOUT; 312 goto err1; 313 } else if (r < 0) { 314 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 315 goto err1; 316 } 317 tmp = le32_to_cpu(adev->wb.wb[index]); 318 if (tmp == 0xDEADBEEF) { 319 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 320 r = 0; 321 } else { 322 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); 323 r = -EINVAL; 324 } 325 326 err1: 327 amdgpu_ib_free(adev, &ib, NULL); 328 fence_put(f); 329 err0: 330 amdgpu_wb_free(adev, index); 331 return r; 332 } 333 334 /** 335 * cik_dma_vm_copy_pte - update PTEs by copying them from the GART 336 * 337 * @ib: indirect buffer to fill with commands 338 * @pe: addr of the page entry 339 * @src: src addr to copy from 340 * @count: number of page entries to update 341 * 342 * Update PTEs by copying them from the GART using DMA (SI). 343 */ 344 static void si_dma_vm_copy_pte(struct amdgpu_ib *ib, 345 uint64_t pe, uint64_t src, 346 unsigned count) 347 { 348 unsigned bytes = count * 8; 349 350 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 351 1, 0, 0, bytes); 352 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 353 ib->ptr[ib->length_dw++] = lower_32_bits(src); 354 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 355 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 356 } 357 358 /** 359 * si_dma_vm_write_pte - update PTEs by writing them manually 360 * 361 * @ib: indirect buffer to fill with commands 362 * @pe: addr of the page entry 363 * @value: dst addr to write into pe 364 * @count: number of page entries to update 365 * @incr: increase next addr by incr bytes 366 * 367 * Update PTEs by writing them manually using DMA (SI). 368 */ 369 static void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 370 uint64_t value, unsigned count, 371 uint32_t incr) 372 { 373 unsigned ndw = count * 2; 374 375 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); 376 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 377 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 378 for (; ndw > 0; ndw -= 2) { 379 ib->ptr[ib->length_dw++] = lower_32_bits(value); 380 ib->ptr[ib->length_dw++] = upper_32_bits(value); 381 value += incr; 382 } 383 } 384 385 /** 386 * si_dma_vm_set_pte_pde - update the page tables using sDMA 387 * 388 * @ib: indirect buffer to fill with commands 389 * @pe: addr of the page entry 390 * @addr: dst addr to write into pe 391 * @count: number of page entries to update 392 * @incr: increase next addr by incr bytes 393 * @flags: access flags 394 * 395 * Update the page tables using sDMA (CIK). 396 */ 397 static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, 398 uint64_t pe, 399 uint64_t addr, unsigned count, 400 uint32_t incr, uint32_t flags) 401 { 402 uint64_t value; 403 unsigned ndw; 404 405 while (count) { 406 ndw = count * 2; 407 if (ndw > 0xFFFFE) 408 ndw = 0xFFFFE; 409 410 if (flags & AMDGPU_PTE_VALID) 411 value = addr; 412 else 413 value = 0; 414 415 /* for physically contiguous pages (vram) */ 416 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 417 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 418 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 419 ib->ptr[ib->length_dw++] = flags; /* mask */ 420 ib->ptr[ib->length_dw++] = 0; 421 ib->ptr[ib->length_dw++] = value; /* value */ 422 ib->ptr[ib->length_dw++] = upper_32_bits(value); 423 ib->ptr[ib->length_dw++] = incr; /* increment size */ 424 ib->ptr[ib->length_dw++] = 0; 425 pe += ndw * 4; 426 addr += (ndw / 2) * incr; 427 count -= ndw / 2; 428 } 429 } 430 431 /** 432 * si_dma_pad_ib - pad the IB to the required number of dw 433 * 434 * @ib: indirect buffer to fill with padding 435 * 436 */ 437 static void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 438 { 439 while (ib->length_dw & 0x7) 440 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); 441 } 442 443 /** 444 * cik_sdma_ring_emit_pipeline_sync - sync the pipeline 445 * 446 * @ring: amdgpu_ring pointer 447 * 448 * Make sure all previous operations are completed (CIK). 449 */ 450 static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 451 { 452 uint32_t seq = ring->fence_drv.sync_seq; 453 uint64_t addr = ring->fence_drv.gpu_addr; 454 455 /* wait for idle */ 456 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0) | 457 (1 << 27)); /* Poll memory */ 458 amdgpu_ring_write(ring, lower_32_bits(addr)); 459 amdgpu_ring_write(ring, (0xff << 16) | upper_32_bits(addr)); /* retry, addr_hi */ 460 amdgpu_ring_write(ring, 0xffffffff); /* mask */ 461 amdgpu_ring_write(ring, seq); /* value */ 462 amdgpu_ring_write(ring, (3 << 28) | 0x20); /* func(equal) | poll interval */ 463 } 464 465 /** 466 * si_dma_ring_emit_vm_flush - cik vm flush using sDMA 467 * 468 * @ring: amdgpu_ring pointer 469 * @vm: amdgpu_vm pointer 470 * 471 * Update the page table base and flush the VM TLB 472 * using sDMA (VI). 473 */ 474 static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, 475 unsigned vm_id, uint64_t pd_addr) 476 { 477 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 478 if (vm_id < 8) 479 amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 480 else 481 amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8))); 482 amdgpu_ring_write(ring, pd_addr >> 12); 483 484 /* bits 0-7 are the VM contexts0-7 */ 485 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 486 amdgpu_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST)); 487 amdgpu_ring_write(ring, 1 << vm_id); 488 489 /* wait for invalidate to complete */ 490 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); 491 amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); 492 amdgpu_ring_write(ring, 0xff << 16); /* retry */ 493 amdgpu_ring_write(ring, 1 << vm_id); /* mask */ 494 amdgpu_ring_write(ring, 0); /* value */ 495 amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ 496 } 497 498 static unsigned si_dma_ring_get_emit_ib_size(struct amdgpu_ring *ring) 499 { 500 return 501 7 + 3; /* si_dma_ring_emit_ib */ 502 } 503 504 static unsigned si_dma_ring_get_dma_frame_size(struct amdgpu_ring *ring) 505 { 506 return 507 3 + /* si_dma_ring_emit_hdp_flush */ 508 3 + /* si_dma_ring_emit_hdp_invalidate */ 509 6 + /* si_dma_ring_emit_pipeline_sync */ 510 12 + /* si_dma_ring_emit_vm_flush */ 511 9 + 9 + 9; /* si_dma_ring_emit_fence x3 for user fence, vm fence */ 512 } 513 514 static int si_dma_early_init(void *handle) 515 { 516 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 517 518 adev->sdma.num_instances = 2; 519 520 si_dma_set_ring_funcs(adev); 521 si_dma_set_buffer_funcs(adev); 522 si_dma_set_vm_pte_funcs(adev); 523 si_dma_set_irq_funcs(adev); 524 525 return 0; 526 } 527 528 static int si_dma_sw_init(void *handle) 529 { 530 struct amdgpu_ring *ring; 531 int r, i; 532 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 533 534 /* DMA0 trap event */ 535 r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq); 536 if (r) 537 return r; 538 539 /* DMA1 trap event */ 540 r = amdgpu_irq_add_id(adev, 244, &adev->sdma.trap_irq_1); 541 if (r) 542 return r; 543 544 for (i = 0; i < adev->sdma.num_instances; i++) { 545 ring = &adev->sdma.instance[i].ring; 546 ring->ring_obj = NULL; 547 ring->use_doorbell = false; 548 sprintf(ring->name, "sdma%d", i); 549 r = amdgpu_ring_init(adev, ring, 1024, 550 DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0), 0xf, 551 &adev->sdma.trap_irq, 552 (i == 0) ? 553 AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP1, 554 AMDGPU_RING_TYPE_SDMA); 555 if (r) 556 return r; 557 } 558 559 return r; 560 } 561 562 static int si_dma_sw_fini(void *handle) 563 { 564 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 565 int i; 566 567 for (i = 0; i < adev->sdma.num_instances; i++) 568 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 569 570 return 0; 571 } 572 573 static int si_dma_hw_init(void *handle) 574 { 575 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 576 577 return si_dma_start(adev); 578 } 579 580 static int si_dma_hw_fini(void *handle) 581 { 582 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 583 584 si_dma_stop(adev); 585 586 return 0; 587 } 588 589 static int si_dma_suspend(void *handle) 590 { 591 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 592 593 return si_dma_hw_fini(adev); 594 } 595 596 static int si_dma_resume(void *handle) 597 { 598 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 599 600 return si_dma_hw_init(adev); 601 } 602 603 static bool si_dma_is_idle(void *handle) 604 { 605 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 606 u32 tmp = RREG32(SRBM_STATUS2); 607 608 if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK)) 609 return false; 610 611 return true; 612 } 613 614 static int si_dma_wait_for_idle(void *handle) 615 { 616 unsigned i; 617 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 618 619 for (i = 0; i < adev->usec_timeout; i++) { 620 if (si_dma_is_idle(handle)) 621 return 0; 622 udelay(1); 623 } 624 return -ETIMEDOUT; 625 } 626 627 static int si_dma_soft_reset(void *handle) 628 { 629 DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n"); 630 return 0; 631 } 632 633 static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, 634 struct amdgpu_irq_src *src, 635 unsigned type, 636 enum amdgpu_interrupt_state state) 637 { 638 u32 sdma_cntl; 639 640 switch (type) { 641 case AMDGPU_SDMA_IRQ_TRAP0: 642 switch (state) { 643 case AMDGPU_IRQ_STATE_DISABLE: 644 sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); 645 sdma_cntl &= ~TRAP_ENABLE; 646 WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 647 break; 648 case AMDGPU_IRQ_STATE_ENABLE: 649 sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); 650 sdma_cntl |= TRAP_ENABLE; 651 WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 652 break; 653 default: 654 break; 655 } 656 break; 657 case AMDGPU_SDMA_IRQ_TRAP1: 658 switch (state) { 659 case AMDGPU_IRQ_STATE_DISABLE: 660 sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); 661 sdma_cntl &= ~TRAP_ENABLE; 662 WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 663 break; 664 case AMDGPU_IRQ_STATE_ENABLE: 665 sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); 666 sdma_cntl |= TRAP_ENABLE; 667 WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 668 break; 669 default: 670 break; 671 } 672 break; 673 default: 674 break; 675 } 676 return 0; 677 } 678 679 static int si_dma_process_trap_irq(struct amdgpu_device *adev, 680 struct amdgpu_irq_src *source, 681 struct amdgpu_iv_entry *entry) 682 { 683 amdgpu_fence_process(&adev->sdma.instance[0].ring); 684 685 return 0; 686 } 687 688 static int si_dma_process_trap_irq_1(struct amdgpu_device *adev, 689 struct amdgpu_irq_src *source, 690 struct amdgpu_iv_entry *entry) 691 { 692 amdgpu_fence_process(&adev->sdma.instance[1].ring); 693 694 return 0; 695 } 696 697 static int si_dma_process_illegal_inst_irq(struct amdgpu_device *adev, 698 struct amdgpu_irq_src *source, 699 struct amdgpu_iv_entry *entry) 700 { 701 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 702 schedule_work(&adev->reset_work); 703 return 0; 704 } 705 706 static int si_dma_set_clockgating_state(void *handle, 707 enum amd_clockgating_state state) 708 { 709 u32 orig, data, offset; 710 int i; 711 bool enable; 712 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 713 714 enable = (state == AMD_CG_STATE_GATE) ? true : false; 715 716 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { 717 for (i = 0; i < adev->sdma.num_instances; i++) { 718 if (i == 0) 719 offset = DMA0_REGISTER_OFFSET; 720 else 721 offset = DMA1_REGISTER_OFFSET; 722 orig = data = RREG32(DMA_POWER_CNTL + offset); 723 data &= ~MEM_POWER_OVERRIDE; 724 if (data != orig) 725 WREG32(DMA_POWER_CNTL + offset, data); 726 WREG32(DMA_CLK_CTRL + offset, 0x00000100); 727 } 728 } else { 729 for (i = 0; i < adev->sdma.num_instances; i++) { 730 if (i == 0) 731 offset = DMA0_REGISTER_OFFSET; 732 else 733 offset = DMA1_REGISTER_OFFSET; 734 orig = data = RREG32(DMA_POWER_CNTL + offset); 735 data |= MEM_POWER_OVERRIDE; 736 if (data != orig) 737 WREG32(DMA_POWER_CNTL + offset, data); 738 739 orig = data = RREG32(DMA_CLK_CTRL + offset); 740 data = 0xff000000; 741 if (data != orig) 742 WREG32(DMA_CLK_CTRL + offset, data); 743 } 744 } 745 746 return 0; 747 } 748 749 static int si_dma_set_powergating_state(void *handle, 750 enum amd_powergating_state state) 751 { 752 u32 tmp; 753 754 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 755 756 WREG32(DMA_PGFSM_WRITE, 0x00002000); 757 WREG32(DMA_PGFSM_CONFIG, 0x100010ff); 758 759 for (tmp = 0; tmp < 5; tmp++) 760 WREG32(DMA_PGFSM_WRITE, 0); 761 762 return 0; 763 } 764 765 const struct amd_ip_funcs si_dma_ip_funcs = { 766 .name = "si_dma", 767 .early_init = si_dma_early_init, 768 .late_init = NULL, 769 .sw_init = si_dma_sw_init, 770 .sw_fini = si_dma_sw_fini, 771 .hw_init = si_dma_hw_init, 772 .hw_fini = si_dma_hw_fini, 773 .suspend = si_dma_suspend, 774 .resume = si_dma_resume, 775 .is_idle = si_dma_is_idle, 776 .wait_for_idle = si_dma_wait_for_idle, 777 .soft_reset = si_dma_soft_reset, 778 .set_clockgating_state = si_dma_set_clockgating_state, 779 .set_powergating_state = si_dma_set_powergating_state, 780 }; 781 782 static const struct amdgpu_ring_funcs si_dma_ring_funcs = { 783 .get_rptr = si_dma_ring_get_rptr, 784 .get_wptr = si_dma_ring_get_wptr, 785 .set_wptr = si_dma_ring_set_wptr, 786 .parse_cs = NULL, 787 .emit_ib = si_dma_ring_emit_ib, 788 .emit_fence = si_dma_ring_emit_fence, 789 .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync, 790 .emit_vm_flush = si_dma_ring_emit_vm_flush, 791 .emit_hdp_flush = si_dma_ring_emit_hdp_flush, 792 .emit_hdp_invalidate = si_dma_ring_emit_hdp_invalidate, 793 .test_ring = si_dma_ring_test_ring, 794 .test_ib = si_dma_ring_test_ib, 795 .insert_nop = amdgpu_ring_insert_nop, 796 .pad_ib = si_dma_ring_pad_ib, 797 .get_emit_ib_size = si_dma_ring_get_emit_ib_size, 798 .get_dma_frame_size = si_dma_ring_get_dma_frame_size, 799 }; 800 801 static void si_dma_set_ring_funcs(struct amdgpu_device *adev) 802 { 803 int i; 804 805 for (i = 0; i < adev->sdma.num_instances; i++) 806 adev->sdma.instance[i].ring.funcs = &si_dma_ring_funcs; 807 } 808 809 static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = { 810 .set = si_dma_set_trap_irq_state, 811 .process = si_dma_process_trap_irq, 812 }; 813 814 static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs_1 = { 815 .set = si_dma_set_trap_irq_state, 816 .process = si_dma_process_trap_irq_1, 817 }; 818 819 static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = { 820 .process = si_dma_process_illegal_inst_irq, 821 }; 822 823 static void si_dma_set_irq_funcs(struct amdgpu_device *adev) 824 { 825 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 826 adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; 827 adev->sdma.trap_irq_1.funcs = &si_dma_trap_irq_funcs_1; 828 adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs; 829 } 830 831 /** 832 * si_dma_emit_copy_buffer - copy buffer using the sDMA engine 833 * 834 * @ring: amdgpu_ring structure holding ring information 835 * @src_offset: src GPU address 836 * @dst_offset: dst GPU address 837 * @byte_count: number of bytes to xfer 838 * 839 * Copy GPU buffers using the DMA engine (VI). 840 * Used by the amdgpu ttm implementation to move pages if 841 * registered as the asic copy callback. 842 */ 843 static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, 844 uint64_t src_offset, 845 uint64_t dst_offset, 846 uint32_t byte_count) 847 { 848 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 849 1, 0, 0, byte_count); 850 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 851 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 852 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) & 0xff; 853 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset) & 0xff; 854 } 855 856 /** 857 * si_dma_emit_fill_buffer - fill buffer using the sDMA engine 858 * 859 * @ring: amdgpu_ring structure holding ring information 860 * @src_data: value to write to buffer 861 * @dst_offset: dst GPU address 862 * @byte_count: number of bytes to xfer 863 * 864 * Fill GPU buffers using the DMA engine (VI). 865 */ 866 static void si_dma_emit_fill_buffer(struct amdgpu_ib *ib, 867 uint32_t src_data, 868 uint64_t dst_offset, 869 uint32_t byte_count) 870 { 871 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL, 872 0, 0, 0, byte_count / 4); 873 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 874 ib->ptr[ib->length_dw++] = src_data; 875 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) << 16; 876 } 877 878 879 static const struct amdgpu_buffer_funcs si_dma_buffer_funcs = { 880 .copy_max_bytes = 0xffff8, 881 .copy_num_dw = 5, 882 .emit_copy_buffer = si_dma_emit_copy_buffer, 883 884 .fill_max_bytes = 0xffff8, 885 .fill_num_dw = 4, 886 .emit_fill_buffer = si_dma_emit_fill_buffer, 887 }; 888 889 static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) 890 { 891 if (adev->mman.buffer_funcs == NULL) { 892 adev->mman.buffer_funcs = &si_dma_buffer_funcs; 893 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 894 } 895 } 896 897 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { 898 .copy_pte = si_dma_vm_copy_pte, 899 .write_pte = si_dma_vm_write_pte, 900 .set_pte_pde = si_dma_vm_set_pte_pde, 901 }; 902 903 static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev) 904 { 905 unsigned i; 906 907 if (adev->vm_manager.vm_pte_funcs == NULL) { 908 adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; 909 for (i = 0; i < adev->sdma.num_instances; i++) 910 adev->vm_manager.vm_pte_rings[i] = 911 &adev->sdma.instance[i].ring; 912 913 adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; 914 } 915 } 916