1 /* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <drm/drmP.h> 25 #include "radeon.h" 26 #include "radeon_asic.h" 27 #include "radeon_trace.h" 28 #include "nid.h" 29 30 u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev); 31 32 /* 33 * DMA 34 * Starting with R600, the GPU has an asynchronous 35 * DMA engine. The programming model is very similar 36 * to the 3D engine (ring buffer, IBs, etc.), but the 37 * DMA controller has it's own packet format that is 38 * different form the PM4 format used by the 3D engine. 39 * It supports copying data, writing embedded data, 40 * solid fills, and a number of other things. It also 41 * has support for tiling/detiling of buffers. 42 * Cayman and newer support two asynchronous DMA engines. 43 */ 44 45 /** 46 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine 47 * 48 * @rdev: radeon_device pointer 49 * @ib: IB object to schedule 50 * 51 * Schedule an IB in the DMA ring (cayman-SI). 52 */ 53 void cayman_dma_ring_ib_execute(struct radeon_device *rdev, 54 struct radeon_ib *ib) 55 { 56 struct radeon_ring *ring = &rdev->ring[ib->ring]; 57 58 if (rdev->wb.enabled) { 59 u32 next_rptr = ring->wptr + 4; 60 while ((next_rptr & 7) != 5) 61 next_rptr++; 62 next_rptr += 3; 63 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); 64 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 65 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); 66 radeon_ring_write(ring, next_rptr); 67 } 68 69 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 70 * Pad as necessary with NOPs. 71 */ 72 while ((ring->wptr & 7) != 5) 73 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 74 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); 75 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 76 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 77 78 } 79 80 /** 81 * cayman_dma_stop - stop the async dma engines 82 * 83 * @rdev: radeon_device pointer 84 * 85 * Stop the async dma engines (cayman-SI). 86 */ 87 void cayman_dma_stop(struct radeon_device *rdev) 88 { 89 u32 rb_cntl; 90 91 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 92 93 /* dma0 */ 94 rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); 95 rb_cntl &= ~DMA_RB_ENABLE; 96 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); 97 98 /* dma1 */ 99 rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); 100 rb_cntl &= ~DMA_RB_ENABLE; 101 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); 102 103 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; 104 rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; 105 } 106 107 /** 108 * cayman_dma_resume - setup and start the async dma engines 109 * 110 * @rdev: radeon_device pointer 111 * 112 * Set up the DMA ring buffers and enable them. (cayman-SI). 113 * Returns 0 for success, error for failure. 114 */ 115 int cayman_dma_resume(struct radeon_device *rdev) 116 { 117 struct radeon_ring *ring; 118 u32 rb_cntl, dma_cntl, ib_cntl; 119 u32 rb_bufsz; 120 u32 reg_offset, wb_offset; 121 int i, r; 122 123 /* Reset dma */ 124 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1); 125 RREG32(SRBM_SOFT_RESET); 126 udelay(50); 127 WREG32(SRBM_SOFT_RESET, 0); 128 129 for (i = 0; i < 2; i++) { 130 if (i == 0) { 131 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 132 reg_offset = DMA0_REGISTER_OFFSET; 133 wb_offset = R600_WB_DMA_RPTR_OFFSET; 134 } else { 135 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 136 reg_offset = DMA1_REGISTER_OFFSET; 137 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; 138 } 139 140 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); 141 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); 142 143 /* Set ring buffer size in dwords */ 144 rb_bufsz = order_base_2(ring->ring_size / 4); 145 rb_cntl = rb_bufsz << 1; 146 #ifdef __BIG_ENDIAN 147 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; 148 #endif 149 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); 150 151 /* Initialize the ring buffer's read and write pointers */ 152 WREG32(DMA_RB_RPTR + reg_offset, 0); 153 WREG32(DMA_RB_WPTR + reg_offset, 0); 154 155 /* set the wb address whether it's enabled or not */ 156 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, 157 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); 158 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, 159 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); 160 161 if (rdev->wb.enabled) 162 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; 163 164 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); 165 166 /* enable DMA IBs */ 167 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; 168 #ifdef __BIG_ENDIAN 169 ib_cntl |= DMA_IB_SWAP_ENABLE; 170 #endif 171 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); 172 173 dma_cntl = RREG32(DMA_CNTL + reg_offset); 174 dma_cntl &= ~CTXEMPTY_INT_ENABLE; 175 WREG32(DMA_CNTL + reg_offset, dma_cntl); 176 177 ring->wptr = 0; 178 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); 179 180 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2; 181 182 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); 183 184 ring->ready = true; 185 186 r = radeon_ring_test(rdev, ring->idx, ring); 187 if (r) { 188 ring->ready = false; 189 return r; 190 } 191 } 192 193 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 194 195 return 0; 196 } 197 198 /** 199 * cayman_dma_fini - tear down the async dma engines 200 * 201 * @rdev: radeon_device pointer 202 * 203 * Stop the async dma engines and free the rings (cayman-SI). 204 */ 205 void cayman_dma_fini(struct radeon_device *rdev) 206 { 207 cayman_dma_stop(rdev); 208 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 209 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); 210 } 211 212 /** 213 * cayman_dma_is_lockup - Check if the DMA engine is locked up 214 * 215 * @rdev: radeon_device pointer 216 * @ring: radeon_ring structure holding ring information 217 * 218 * Check if the async DMA engine is locked up. 219 * Returns true if the engine appears to be locked up, false if not. 220 */ 221 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 222 { 223 u32 reset_mask = cayman_gpu_check_soft_reset(rdev); 224 u32 mask; 225 226 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 227 mask = RADEON_RESET_DMA; 228 else 229 mask = RADEON_RESET_DMA1; 230 231 if (!(reset_mask & mask)) { 232 radeon_ring_lockup_update(ring); 233 return false; 234 } 235 /* force ring activities */ 236 radeon_ring_force_activity(rdev, ring); 237 return radeon_ring_test_lockup(rdev, ring); 238 } 239 240 /** 241 * cayman_dma_vm_set_page - update the page tables using the DMA 242 * 243 * @rdev: radeon_device pointer 244 * @ib: indirect buffer to fill with commands 245 * @pe: addr of the page entry 246 * @addr: dst addr to write into pe 247 * @count: number of page entries to update 248 * @incr: increase next addr by incr bytes 249 * @flags: hw access flags 250 * 251 * Update the page tables using the DMA (cayman/TN). 252 */ 253 void cayman_dma_vm_set_page(struct radeon_device *rdev, 254 struct radeon_ib *ib, 255 uint64_t pe, 256 uint64_t addr, unsigned count, 257 uint32_t incr, uint32_t flags) 258 { 259 uint64_t value; 260 unsigned ndw; 261 262 trace_radeon_vm_set_page(pe, addr, count, incr, flags); 263 264 if ((flags & R600_PTE_SYSTEM) || (count == 1)) { 265 while (count) { 266 ndw = count * 2; 267 if (ndw > 0xFFFFE) 268 ndw = 0xFFFFE; 269 270 /* for non-physically contiguous pages (system) */ 271 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw); 272 ib->ptr[ib->length_dw++] = pe; 273 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 274 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 275 if (flags & R600_PTE_SYSTEM) { 276 value = radeon_vm_map_gart(rdev, addr); 277 value &= 0xFFFFFFFFFFFFF000ULL; 278 } else if (flags & R600_PTE_VALID) { 279 value = addr; 280 } else { 281 value = 0; 282 } 283 addr += incr; 284 value |= flags; 285 ib->ptr[ib->length_dw++] = value; 286 ib->ptr[ib->length_dw++] = upper_32_bits(value); 287 } 288 } 289 } else { 290 while (count) { 291 ndw = count * 2; 292 if (ndw > 0xFFFFE) 293 ndw = 0xFFFFE; 294 295 if (flags & R600_PTE_VALID) 296 value = addr; 297 else 298 value = 0; 299 /* for physically contiguous pages (vram) */ 300 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 301 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 302 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 303 ib->ptr[ib->length_dw++] = flags; /* mask */ 304 ib->ptr[ib->length_dw++] = 0; 305 ib->ptr[ib->length_dw++] = value; /* value */ 306 ib->ptr[ib->length_dw++] = upper_32_bits(value); 307 ib->ptr[ib->length_dw++] = incr; /* increment size */ 308 ib->ptr[ib->length_dw++] = 0; 309 pe += ndw * 4; 310 addr += (ndw / 2) * incr; 311 count -= ndw / 2; 312 } 313 } 314 while (ib->length_dw & 0x7) 315 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); 316 } 317 318 void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) 319 { 320 struct radeon_ring *ring = &rdev->ring[ridx]; 321 322 if (vm == NULL) 323 return; 324 325 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 326 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); 327 radeon_ring_write(ring, vm->pd_gpu_addr >> 12); 328 329 /* flush hdp cache */ 330 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 331 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); 332 radeon_ring_write(ring, 1); 333 334 /* bits 0-7 are the VM contexts0-7 */ 335 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 336 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); 337 radeon_ring_write(ring, 1 << vm->id); 338 } 339 340