1 /* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <drm/drmP.h> 25 #include "radeon.h" 26 #include "radeon_asic.h" 27 #include "radeon_trace.h" 28 #include "nid.h" 29 30 u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev); 31 32 /* 33 * DMA 34 * Starting with R600, the GPU has an asynchronous 35 * DMA engine. The programming model is very similar 36 * to the 3D engine (ring buffer, IBs, etc.), but the 37 * DMA controller has it's own packet format that is 38 * different form the PM4 format used by the 3D engine. 39 * It supports copying data, writing embedded data, 40 * solid fills, and a number of other things. It also 41 * has support for tiling/detiling of buffers. 42 * Cayman and newer support two asynchronous DMA engines. 43 */ 44 45 /** 46 * cayman_dma_get_rptr - get the current read pointer 47 * 48 * @rdev: radeon_device pointer 49 * @ring: radeon ring pointer 50 * 51 * Get the current rptr from the hardware (cayman+). 52 */ 53 uint32_t cayman_dma_get_rptr(struct radeon_device *rdev, 54 struct radeon_ring *ring) 55 { 56 u32 rptr, reg; 57 58 if (rdev->wb.enabled) { 59 rptr = rdev->wb.wb[ring->rptr_offs/4]; 60 } else { 61 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 62 reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET; 63 else 64 reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET; 65 66 rptr = RREG32(reg); 67 } 68 69 return (rptr & 0x3fffc) >> 2; 70 } 71 72 /** 73 * cayman_dma_get_wptr - get the current write pointer 74 * 75 * @rdev: radeon_device pointer 76 * @ring: radeon ring pointer 77 * 78 * Get the current wptr from the hardware (cayman+). 79 */ 80 uint32_t cayman_dma_get_wptr(struct radeon_device *rdev, 81 struct radeon_ring *ring) 82 { 83 u32 reg; 84 85 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 86 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 87 else 88 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 89 90 return (RREG32(reg) & 0x3fffc) >> 2; 91 } 92 93 /** 94 * cayman_dma_set_wptr - commit the write pointer 95 * 96 * @rdev: radeon_device pointer 97 * @ring: radeon ring pointer 98 * 99 * Write the wptr back to the hardware (cayman+). 100 */ 101 void cayman_dma_set_wptr(struct radeon_device *rdev, 102 struct radeon_ring *ring) 103 { 104 u32 reg; 105 106 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 107 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 108 else 109 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 110 111 WREG32(reg, (ring->wptr << 2) & 0x3fffc); 112 } 113 114 /** 115 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine 116 * 117 * @rdev: radeon_device pointer 118 * @ib: IB object to schedule 119 * 120 * Schedule an IB in the DMA ring (cayman-SI). 121 */ 122 void cayman_dma_ring_ib_execute(struct radeon_device *rdev, 123 struct radeon_ib *ib) 124 { 125 struct radeon_ring *ring = &rdev->ring[ib->ring]; 126 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; 127 128 if (rdev->wb.enabled) { 129 u32 next_rptr = ring->wptr + 4; 130 while ((next_rptr & 7) != 5) 131 next_rptr++; 132 next_rptr += 3; 133 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); 134 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 135 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); 136 radeon_ring_write(ring, next_rptr); 137 } 138 139 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 140 * Pad as necessary with NOPs. 141 */ 142 while ((ring->wptr & 7) != 5) 143 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 144 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); 145 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 146 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 147 148 } 149 150 /** 151 * cayman_dma_stop - stop the async dma engines 152 * 153 * @rdev: radeon_device pointer 154 * 155 * Stop the async dma engines (cayman-SI). 156 */ 157 void cayman_dma_stop(struct radeon_device *rdev) 158 { 159 u32 rb_cntl; 160 161 if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 162 (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 163 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 164 165 /* dma0 */ 166 rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); 167 rb_cntl &= ~DMA_RB_ENABLE; 168 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); 169 170 /* dma1 */ 171 rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); 172 rb_cntl &= ~DMA_RB_ENABLE; 173 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); 174 175 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; 176 rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; 177 } 178 179 /** 180 * cayman_dma_resume - setup and start the async dma engines 181 * 182 * @rdev: radeon_device pointer 183 * 184 * Set up the DMA ring buffers and enable them. (cayman-SI). 185 * Returns 0 for success, error for failure. 186 */ 187 int cayman_dma_resume(struct radeon_device *rdev) 188 { 189 struct radeon_ring *ring; 190 u32 rb_cntl, dma_cntl, ib_cntl; 191 u32 rb_bufsz; 192 u32 reg_offset, wb_offset; 193 int i, r; 194 195 for (i = 0; i < 2; i++) { 196 if (i == 0) { 197 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 198 reg_offset = DMA0_REGISTER_OFFSET; 199 wb_offset = R600_WB_DMA_RPTR_OFFSET; 200 } else { 201 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 202 reg_offset = DMA1_REGISTER_OFFSET; 203 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; 204 } 205 206 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); 207 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); 208 209 /* Set ring buffer size in dwords */ 210 rb_bufsz = order_base_2(ring->ring_size / 4); 211 rb_cntl = rb_bufsz << 1; 212 #ifdef __BIG_ENDIAN 213 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; 214 #endif 215 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); 216 217 /* Initialize the ring buffer's read and write pointers */ 218 WREG32(DMA_RB_RPTR + reg_offset, 0); 219 WREG32(DMA_RB_WPTR + reg_offset, 0); 220 221 /* set the wb address whether it's enabled or not */ 222 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, 223 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); 224 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, 225 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); 226 227 if (rdev->wb.enabled) 228 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; 229 230 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); 231 232 /* enable DMA IBs */ 233 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; 234 #ifdef __BIG_ENDIAN 235 ib_cntl |= DMA_IB_SWAP_ENABLE; 236 #endif 237 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); 238 239 dma_cntl = RREG32(DMA_CNTL + reg_offset); 240 dma_cntl &= ~CTXEMPTY_INT_ENABLE; 241 WREG32(DMA_CNTL + reg_offset, dma_cntl); 242 243 ring->wptr = 0; 244 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); 245 246 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); 247 248 ring->ready = true; 249 250 r = radeon_ring_test(rdev, ring->idx, ring); 251 if (r) { 252 ring->ready = false; 253 return r; 254 } 255 } 256 257 if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 258 (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 259 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 260 261 return 0; 262 } 263 264 /** 265 * cayman_dma_fini - tear down the async dma engines 266 * 267 * @rdev: radeon_device pointer 268 * 269 * Stop the async dma engines and free the rings (cayman-SI). 270 */ 271 void cayman_dma_fini(struct radeon_device *rdev) 272 { 273 cayman_dma_stop(rdev); 274 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 275 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); 276 } 277 278 /** 279 * cayman_dma_is_lockup - Check if the DMA engine is locked up 280 * 281 * @rdev: radeon_device pointer 282 * @ring: radeon_ring structure holding ring information 283 * 284 * Check if the async DMA engine is locked up. 285 * Returns true if the engine appears to be locked up, false if not. 286 */ 287 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 288 { 289 u32 reset_mask = cayman_gpu_check_soft_reset(rdev); 290 u32 mask; 291 292 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 293 mask = RADEON_RESET_DMA; 294 else 295 mask = RADEON_RESET_DMA1; 296 297 if (!(reset_mask & mask)) { 298 radeon_ring_lockup_update(rdev, ring); 299 return false; 300 } 301 return radeon_ring_test_lockup(rdev, ring); 302 } 303 304 /** 305 * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART 306 * 307 * @rdev: radeon_device pointer 308 * @ib: indirect buffer to fill with commands 309 * @pe: addr of the page entry 310 * @src: src addr where to copy from 311 * @count: number of page entries to update 312 * 313 * Update PTEs by copying them from the GART using the DMA (cayman/TN). 314 */ 315 void cayman_dma_vm_copy_pages(struct radeon_device *rdev, 316 struct radeon_ib *ib, 317 uint64_t pe, uint64_t src, 318 unsigned count) 319 { 320 unsigned ndw; 321 322 while (count) { 323 ndw = count * 2; 324 if (ndw > 0xFFFFE) 325 ndw = 0xFFFFE; 326 327 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 328 0, 0, ndw); 329 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 330 ib->ptr[ib->length_dw++] = lower_32_bits(src); 331 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 332 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 333 334 pe += ndw * 4; 335 src += ndw * 4; 336 count -= ndw / 2; 337 } 338 } 339 340 /** 341 * cayman_dma_vm_write_pages - update PTEs by writing them manually 342 * 343 * @rdev: radeon_device pointer 344 * @ib: indirect buffer to fill with commands 345 * @pe: addr of the page entry 346 * @addr: dst addr to write into pe 347 * @count: number of page entries to update 348 * @incr: increase next addr by incr bytes 349 * @flags: hw access flags 350 * 351 * Update PTEs by writing them manually using the DMA (cayman/TN). 352 */ 353 void cayman_dma_vm_write_pages(struct radeon_device *rdev, 354 struct radeon_ib *ib, 355 uint64_t pe, 356 uint64_t addr, unsigned count, 357 uint32_t incr, uint32_t flags) 358 { 359 uint64_t value; 360 unsigned ndw; 361 362 while (count) { 363 ndw = count * 2; 364 if (ndw > 0xFFFFE) 365 ndw = 0xFFFFE; 366 367 /* for non-physically contiguous pages (system) */ 368 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 369 0, 0, ndw); 370 ib->ptr[ib->length_dw++] = pe; 371 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 372 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 373 if (flags & R600_PTE_SYSTEM) { 374 value = radeon_vm_map_gart(rdev, addr); 375 } else if (flags & R600_PTE_VALID) { 376 value = addr; 377 } else { 378 value = 0; 379 } 380 addr += incr; 381 value |= flags; 382 ib->ptr[ib->length_dw++] = value; 383 ib->ptr[ib->length_dw++] = upper_32_bits(value); 384 } 385 } 386 } 387 388 /** 389 * cayman_dma_vm_set_pages - update the page tables using the DMA 390 * 391 * @rdev: radeon_device pointer 392 * @ib: indirect buffer to fill with commands 393 * @pe: addr of the page entry 394 * @addr: dst addr to write into pe 395 * @count: number of page entries to update 396 * @incr: increase next addr by incr bytes 397 * @flags: hw access flags 398 * 399 * Update the page tables using the DMA (cayman/TN). 400 */ 401 void cayman_dma_vm_set_pages(struct radeon_device *rdev, 402 struct radeon_ib *ib, 403 uint64_t pe, 404 uint64_t addr, unsigned count, 405 uint32_t incr, uint32_t flags) 406 { 407 uint64_t value; 408 unsigned ndw; 409 410 while (count) { 411 ndw = count * 2; 412 if (ndw > 0xFFFFE) 413 ndw = 0xFFFFE; 414 415 if (flags & R600_PTE_VALID) 416 value = addr; 417 else 418 value = 0; 419 420 /* for physically contiguous pages (vram) */ 421 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 422 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 423 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 424 ib->ptr[ib->length_dw++] = flags; /* mask */ 425 ib->ptr[ib->length_dw++] = 0; 426 ib->ptr[ib->length_dw++] = value; /* value */ 427 ib->ptr[ib->length_dw++] = upper_32_bits(value); 428 ib->ptr[ib->length_dw++] = incr; /* increment size */ 429 ib->ptr[ib->length_dw++] = 0; 430 431 pe += ndw * 4; 432 addr += (ndw / 2) * incr; 433 count -= ndw / 2; 434 } 435 } 436 437 /** 438 * cayman_dma_vm_pad_ib - pad the IB to the required number of dw 439 * 440 * @ib: indirect buffer to fill with padding 441 * 442 */ 443 void cayman_dma_vm_pad_ib(struct radeon_ib *ib) 444 { 445 while (ib->length_dw & 0x7) 446 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); 447 } 448 449 void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, 450 unsigned vm_id, uint64_t pd_addr) 451 { 452 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 453 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); 454 radeon_ring_write(ring, pd_addr >> 12); 455 456 /* flush hdp cache */ 457 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 458 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); 459 radeon_ring_write(ring, 1); 460 461 /* bits 0-7 are the VM contexts0-7 */ 462 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 463 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); 464 radeon_ring_write(ring, 1 << vm_id); 465 466 /* wait for invalidate to complete */ 467 radeon_ring_write(ring, DMA_SRBM_READ_PACKET); 468 radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2)); 469 radeon_ring_write(ring, 0); /* mask */ 470 radeon_ring_write(ring, 0); /* value */ 471 } 472 473