1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_ucode.h" 28 #include "amdgpu_trace.h" 29 #include "vi.h" 30 #include "vid.h" 31 32 #include "oss/oss_3_0_d.h" 33 #include "oss/oss_3_0_sh_mask.h" 34 35 #include "gmc/gmc_8_1_d.h" 36 #include "gmc/gmc_8_1_sh_mask.h" 37 38 #include "gca/gfx_8_0_d.h" 39 #include "gca/gfx_8_0_enum.h" 40 #include "gca/gfx_8_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 45 #include "tonga_sdma_pkt_open.h" 46 47 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev); 48 static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev); 49 static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev); 50 static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev); 51 52 MODULE_FIRMWARE("amdgpu/tonga_sdma.bin"); 53 MODULE_FIRMWARE("amdgpu/tonga_sdma1.bin"); 54 MODULE_FIRMWARE("amdgpu/carrizo_sdma.bin"); 55 MODULE_FIRMWARE("amdgpu/carrizo_sdma1.bin"); 56 MODULE_FIRMWARE("amdgpu/fiji_sdma.bin"); 57 MODULE_FIRMWARE("amdgpu/fiji_sdma1.bin"); 58 MODULE_FIRMWARE("amdgpu/stoney_sdma.bin"); 59 60 static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 61 { 62 SDMA0_REGISTER_OFFSET, 63 SDMA1_REGISTER_OFFSET 64 }; 65 66 static const u32 golden_settings_tonga_a11[] = 67 { 68 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 69 mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, 70 mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100, 71 mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 72 mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 73 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 74 mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, 75 mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100, 76 mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 77 mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 78 }; 79 80 static const u32 tonga_mgcg_cgcg_init[] = 81 { 82 mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, 83 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 84 }; 85 86 static const u32 golden_settings_fiji_a10[] = 87 { 88 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 89 mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100, 90 mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 91 mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 92 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 93 mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100, 94 mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 95 mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 96 }; 97 98 static const u32 fiji_mgcg_cgcg_init[] = 99 { 100 mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, 101 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 102 }; 103 104 static const u32 cz_golden_settings_a11[] = 105 { 106 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 107 mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, 108 mmSDMA0_GFX_IB_CNTL, 0x00000100, 0x00000100, 109 mmSDMA0_POWER_CNTL, 0x00000800, 0x0003c800, 110 mmSDMA0_RLC0_IB_CNTL, 0x00000100, 0x00000100, 111 mmSDMA0_RLC1_IB_CNTL, 0x00000100, 0x00000100, 112 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 113 mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, 114 mmSDMA1_GFX_IB_CNTL, 0x00000100, 0x00000100, 115 mmSDMA1_POWER_CNTL, 0x00000800, 0x0003c800, 116 mmSDMA1_RLC0_IB_CNTL, 0x00000100, 0x00000100, 117 mmSDMA1_RLC1_IB_CNTL, 0x00000100, 0x00000100, 118 }; 119 120 static const u32 cz_mgcg_cgcg_init[] = 121 { 122 mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, 123 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 124 }; 125 126 static const u32 stoney_golden_settings_a11[] = 127 { 128 mmSDMA0_GFX_IB_CNTL, 0x00000100, 0x00000100, 129 mmSDMA0_POWER_CNTL, 0x00000800, 0x0003c800, 130 mmSDMA0_RLC0_IB_CNTL, 0x00000100, 0x00000100, 131 mmSDMA0_RLC1_IB_CNTL, 0x00000100, 0x00000100, 132 }; 133 134 static const u32 stoney_mgcg_cgcg_init[] = 135 { 136 mmSDMA0_CLK_CTRL, 0xffffffff, 0x00000100, 137 }; 138 139 /* 140 * sDMA - System DMA 141 * Starting with CIK, the GPU has new asynchronous 142 * DMA engines. These engines are used for compute 143 * and gfx. There are two DMA engines (SDMA0, SDMA1) 144 * and each one supports 1 ring buffer used for gfx 145 * and 2 queues used for compute. 146 * 147 * The programming model is very similar to the CP 148 * (ring buffer, IBs, etc.), but sDMA has it's own 149 * packet format that is different from the PM4 format 150 * used by the CP. sDMA supports copying data, writing 151 * embedded data, solid fills, and a number of other 152 * things. It also has support for tiling/detiling of 153 * buffers. 154 */ 155 156 static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) 157 { 158 switch (adev->asic_type) { 159 case CHIP_FIJI: 160 amdgpu_program_register_sequence(adev, 161 fiji_mgcg_cgcg_init, 162 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 163 amdgpu_program_register_sequence(adev, 164 golden_settings_fiji_a10, 165 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 166 break; 167 case CHIP_TONGA: 168 amdgpu_program_register_sequence(adev, 169 tonga_mgcg_cgcg_init, 170 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 171 amdgpu_program_register_sequence(adev, 172 golden_settings_tonga_a11, 173 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 174 break; 175 case CHIP_CARRIZO: 176 amdgpu_program_register_sequence(adev, 177 cz_mgcg_cgcg_init, 178 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 179 amdgpu_program_register_sequence(adev, 180 cz_golden_settings_a11, 181 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 182 break; 183 case CHIP_STONEY: 184 amdgpu_program_register_sequence(adev, 185 stoney_mgcg_cgcg_init, 186 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 187 amdgpu_program_register_sequence(adev, 188 stoney_golden_settings_a11, 189 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 190 break; 191 default: 192 break; 193 } 194 } 195 196 /** 197 * sdma_v3_0_init_microcode - load ucode images from disk 198 * 199 * @adev: amdgpu_device pointer 200 * 201 * Use the firmware interface to load the ucode images into 202 * the driver (not loaded into hw). 203 * Returns 0 on success, error on failure. 204 */ 205 static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) 206 { 207 const char *chip_name; 208 char fw_name[30]; 209 int err = 0, i; 210 struct amdgpu_firmware_info *info = NULL; 211 const struct common_firmware_header *header = NULL; 212 const struct sdma_firmware_header_v1_0 *hdr; 213 214 DRM_DEBUG("\n"); 215 216 switch (adev->asic_type) { 217 case CHIP_TONGA: 218 chip_name = "tonga"; 219 break; 220 case CHIP_FIJI: 221 chip_name = "fiji"; 222 break; 223 case CHIP_CARRIZO: 224 chip_name = "carrizo"; 225 break; 226 case CHIP_STONEY: 227 chip_name = "stoney"; 228 break; 229 default: BUG(); 230 } 231 232 for (i = 0; i < adev->sdma.num_instances; i++) { 233 if (i == 0) 234 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); 235 else 236 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); 237 err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); 238 if (err) 239 goto out; 240 err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); 241 if (err) 242 goto out; 243 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 244 adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 245 adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 246 if (adev->sdma.instance[i].feature_version >= 20) 247 adev->sdma.instance[i].burst_nop = true; 248 249 if (adev->firmware.smu_load) { 250 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; 251 info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; 252 info->fw = adev->sdma.instance[i].fw; 253 header = (const struct common_firmware_header *)info->fw->data; 254 adev->firmware.fw_size += 255 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 256 } 257 } 258 out: 259 if (err) { 260 printk(KERN_ERR 261 "sdma_v3_0: Failed to load firmware \"%s\"\n", 262 fw_name); 263 for (i = 0; i < adev->sdma.num_instances; i++) { 264 release_firmware(adev->sdma.instance[i].fw); 265 adev->sdma.instance[i].fw = NULL; 266 } 267 } 268 return err; 269 } 270 271 /** 272 * sdma_v3_0_ring_get_rptr - get the current read pointer 273 * 274 * @ring: amdgpu ring pointer 275 * 276 * Get the current rptr from the hardware (VI+). 277 */ 278 static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) 279 { 280 u32 rptr; 281 282 /* XXX check if swapping is necessary on BE */ 283 rptr = ring->adev->wb.wb[ring->rptr_offs] >> 2; 284 285 return rptr; 286 } 287 288 /** 289 * sdma_v3_0_ring_get_wptr - get the current write pointer 290 * 291 * @ring: amdgpu ring pointer 292 * 293 * Get the current wptr from the hardware (VI+). 294 */ 295 static uint32_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) 296 { 297 struct amdgpu_device *adev = ring->adev; 298 u32 wptr; 299 300 if (ring->use_doorbell) { 301 /* XXX check if swapping is necessary on BE */ 302 wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; 303 } else { 304 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; 305 306 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2; 307 } 308 309 return wptr; 310 } 311 312 /** 313 * sdma_v3_0_ring_set_wptr - commit the write pointer 314 * 315 * @ring: amdgpu ring pointer 316 * 317 * Write the wptr back to the hardware (VI+). 318 */ 319 static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) 320 { 321 struct amdgpu_device *adev = ring->adev; 322 323 if (ring->use_doorbell) { 324 /* XXX check if swapping is necessary on BE */ 325 adev->wb.wb[ring->wptr_offs] = ring->wptr << 2; 326 WDOORBELL32(ring->doorbell_index, ring->wptr << 2); 327 } else { 328 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; 329 330 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); 331 } 332 } 333 334 static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 335 { 336 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 337 int i; 338 339 for (i = 0; i < count; i++) 340 if (sdma && sdma->burst_nop && (i == 0)) 341 amdgpu_ring_write(ring, ring->nop | 342 SDMA_PKT_NOP_HEADER_COUNT(count - 1)); 343 else 344 amdgpu_ring_write(ring, ring->nop); 345 } 346 347 /** 348 * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine 349 * 350 * @ring: amdgpu ring pointer 351 * @ib: IB object to schedule 352 * 353 * Schedule an IB in the DMA ring (VI). 354 */ 355 static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, 356 struct amdgpu_ib *ib) 357 { 358 u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf; 359 u32 next_rptr = ring->wptr + 5; 360 361 while ((next_rptr & 7) != 2) 362 next_rptr++; 363 next_rptr += 6; 364 365 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 366 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 367 amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc); 368 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr)); 369 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); 370 amdgpu_ring_write(ring, next_rptr); 371 372 /* IB packet must end on a 8 DW boundary */ 373 sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); 374 375 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 376 SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); 377 /* base must be 32 byte aligned */ 378 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); 379 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 380 amdgpu_ring_write(ring, ib->length_dw); 381 amdgpu_ring_write(ring, 0); 382 amdgpu_ring_write(ring, 0); 383 384 } 385 386 /** 387 * sdma_v3_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring 388 * 389 * @ring: amdgpu ring pointer 390 * 391 * Emit an hdp flush packet on the requested DMA ring. 392 */ 393 static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 394 { 395 u32 ref_and_mask = 0; 396 397 if (ring == &ring->adev->sdma.instance[0].ring) 398 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1); 399 else 400 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1); 401 402 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 403 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | 404 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ 405 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2); 406 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2); 407 amdgpu_ring_write(ring, ref_and_mask); /* reference */ 408 amdgpu_ring_write(ring, ref_and_mask); /* mask */ 409 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 410 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 411 } 412 413 /** 414 * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring 415 * 416 * @ring: amdgpu ring pointer 417 * @fence: amdgpu fence object 418 * 419 * Add a DMA fence packet to the ring to write 420 * the fence seq number and DMA trap packet to generate 421 * an interrupt if needed (VI). 422 */ 423 static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 424 unsigned flags) 425 { 426 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 427 /* write the fence */ 428 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); 429 amdgpu_ring_write(ring, lower_32_bits(addr)); 430 amdgpu_ring_write(ring, upper_32_bits(addr)); 431 amdgpu_ring_write(ring, lower_32_bits(seq)); 432 433 /* optionally write high bits as well */ 434 if (write64bit) { 435 addr += 4; 436 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); 437 amdgpu_ring_write(ring, lower_32_bits(addr)); 438 amdgpu_ring_write(ring, upper_32_bits(addr)); 439 amdgpu_ring_write(ring, upper_32_bits(seq)); 440 } 441 442 /* generate an interrupt */ 443 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); 444 amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); 445 } 446 447 448 /** 449 * sdma_v3_0_ring_emit_semaphore - emit a semaphore on the dma ring 450 * 451 * @ring: amdgpu_ring structure holding ring information 452 * @semaphore: amdgpu semaphore object 453 * @emit_wait: wait or signal semaphore 454 * 455 * Add a DMA semaphore packet to the ring wait on or signal 456 * other rings (VI). 457 */ 458 static bool sdma_v3_0_ring_emit_semaphore(struct amdgpu_ring *ring, 459 struct amdgpu_semaphore *semaphore, 460 bool emit_wait) 461 { 462 u64 addr = semaphore->gpu_addr; 463 u32 sig = emit_wait ? 0 : 1; 464 465 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) | 466 SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig)); 467 amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8); 468 amdgpu_ring_write(ring, upper_32_bits(addr)); 469 470 return true; 471 } 472 473 /** 474 * sdma_v3_0_gfx_stop - stop the gfx async dma engines 475 * 476 * @adev: amdgpu_device pointer 477 * 478 * Stop the gfx async dma ring buffers (VI). 479 */ 480 static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) 481 { 482 struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; 483 struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; 484 u32 rb_cntl, ib_cntl; 485 int i; 486 487 if ((adev->mman.buffer_funcs_ring == sdma0) || 488 (adev->mman.buffer_funcs_ring == sdma1)) 489 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 490 491 for (i = 0; i < adev->sdma.num_instances; i++) { 492 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 493 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); 494 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 495 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]); 496 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); 497 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 498 } 499 sdma0->ready = false; 500 sdma1->ready = false; 501 } 502 503 /** 504 * sdma_v3_0_rlc_stop - stop the compute async dma engines 505 * 506 * @adev: amdgpu_device pointer 507 * 508 * Stop the compute async dma queues (VI). 509 */ 510 static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev) 511 { 512 /* XXX todo */ 513 } 514 515 /** 516 * sdma_v3_0_ctx_switch_enable - stop the async dma engines context switch 517 * 518 * @adev: amdgpu_device pointer 519 * @enable: enable/disable the DMA MEs context switch. 520 * 521 * Halt or unhalt the async dma engines context switch (VI). 522 */ 523 static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) 524 { 525 u32 f32_cntl; 526 int i; 527 528 for (i = 0; i < adev->sdma.num_instances; i++) { 529 f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); 530 if (enable) 531 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, 532 AUTO_CTXSW_ENABLE, 1); 533 else 534 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, 535 AUTO_CTXSW_ENABLE, 0); 536 WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl); 537 } 538 } 539 540 /** 541 * sdma_v3_0_enable - stop the async dma engines 542 * 543 * @adev: amdgpu_device pointer 544 * @enable: enable/disable the DMA MEs. 545 * 546 * Halt or unhalt the async dma engines (VI). 547 */ 548 static void sdma_v3_0_enable(struct amdgpu_device *adev, bool enable) 549 { 550 u32 f32_cntl; 551 int i; 552 553 if (enable == false) { 554 sdma_v3_0_gfx_stop(adev); 555 sdma_v3_0_rlc_stop(adev); 556 } 557 558 for (i = 0; i < adev->sdma.num_instances; i++) { 559 f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]); 560 if (enable) 561 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0); 562 else 563 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1); 564 WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], f32_cntl); 565 } 566 } 567 568 /** 569 * sdma_v3_0_gfx_resume - setup and start the async dma engines 570 * 571 * @adev: amdgpu_device pointer 572 * 573 * Set up the gfx DMA ring buffers and enable them (VI). 574 * Returns 0 for success, error for failure. 575 */ 576 static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) 577 { 578 struct amdgpu_ring *ring; 579 u32 rb_cntl, ib_cntl; 580 u32 rb_bufsz; 581 u32 wb_offset; 582 u32 doorbell; 583 int i, j, r; 584 585 for (i = 0; i < adev->sdma.num_instances; i++) { 586 ring = &adev->sdma.instance[i].ring; 587 wb_offset = (ring->rptr_offs * 4); 588 589 mutex_lock(&adev->srbm_mutex); 590 for (j = 0; j < 16; j++) { 591 vi_srbm_select(adev, 0, 0, 0, j); 592 /* SDMA GFX */ 593 WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0); 594 WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0); 595 } 596 vi_srbm_select(adev, 0, 0, 0, 0); 597 mutex_unlock(&adev->srbm_mutex); 598 599 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 600 601 /* Set ring buffer size in dwords */ 602 rb_bufsz = order_base_2(ring->ring_size / 4); 603 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 604 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); 605 #ifdef __BIG_ENDIAN 606 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); 607 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, 608 RPTR_WRITEBACK_SWAP_ENABLE, 1); 609 #endif 610 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 611 612 /* Initialize the ring buffer's read and write pointers */ 613 WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); 614 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); 615 616 /* set the wb address whether it's enabled or not */ 617 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], 618 upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); 619 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], 620 lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); 621 622 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); 623 624 WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 625 WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); 626 627 ring->wptr = 0; 628 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); 629 630 doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]); 631 632 if (ring->use_doorbell) { 633 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, 634 OFFSET, ring->doorbell_index); 635 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); 636 } else { 637 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); 638 } 639 WREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i], doorbell); 640 641 /* enable DMA RB */ 642 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); 643 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 644 645 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]); 646 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); 647 #ifdef __BIG_ENDIAN 648 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); 649 #endif 650 /* enable DMA IBs */ 651 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 652 653 ring->ready = true; 654 655 r = amdgpu_ring_test_ring(ring); 656 if (r) { 657 ring->ready = false; 658 return r; 659 } 660 661 if (adev->mman.buffer_funcs_ring == ring) 662 amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); 663 } 664 665 return 0; 666 } 667 668 /** 669 * sdma_v3_0_rlc_resume - setup and start the async dma engines 670 * 671 * @adev: amdgpu_device pointer 672 * 673 * Set up the compute DMA queues and enable them (VI). 674 * Returns 0 for success, error for failure. 675 */ 676 static int sdma_v3_0_rlc_resume(struct amdgpu_device *adev) 677 { 678 /* XXX todo */ 679 return 0; 680 } 681 682 /** 683 * sdma_v3_0_load_microcode - load the sDMA ME ucode 684 * 685 * @adev: amdgpu_device pointer 686 * 687 * Loads the sDMA0/1 ucode. 688 * Returns 0 for success, -EINVAL if the ucode is not available. 689 */ 690 static int sdma_v3_0_load_microcode(struct amdgpu_device *adev) 691 { 692 const struct sdma_firmware_header_v1_0 *hdr; 693 const __le32 *fw_data; 694 u32 fw_size; 695 int i, j; 696 697 /* halt the MEs */ 698 sdma_v3_0_enable(adev, false); 699 700 for (i = 0; i < adev->sdma.num_instances; i++) { 701 if (!adev->sdma.instance[i].fw) 702 return -EINVAL; 703 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 704 amdgpu_ucode_print_sdma_hdr(&hdr->header); 705 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 706 fw_data = (const __le32 *) 707 (adev->sdma.instance[i].fw->data + 708 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 709 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); 710 for (j = 0; j < fw_size; j++) 711 WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++)); 712 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version); 713 } 714 715 return 0; 716 } 717 718 /** 719 * sdma_v3_0_start - setup and start the async dma engines 720 * 721 * @adev: amdgpu_device pointer 722 * 723 * Set up the DMA engines and enable them (VI). 724 * Returns 0 for success, error for failure. 725 */ 726 static int sdma_v3_0_start(struct amdgpu_device *adev) 727 { 728 int r, i; 729 730 if (!adev->firmware.smu_load) { 731 r = sdma_v3_0_load_microcode(adev); 732 if (r) 733 return r; 734 } else { 735 for (i = 0; i < adev->sdma.num_instances; i++) { 736 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 737 (i == 0) ? 738 AMDGPU_UCODE_ID_SDMA0 : 739 AMDGPU_UCODE_ID_SDMA1); 740 if (r) 741 return -EINVAL; 742 } 743 } 744 745 /* unhalt the MEs */ 746 sdma_v3_0_enable(adev, true); 747 /* enable sdma ring preemption */ 748 sdma_v3_0_ctx_switch_enable(adev, true); 749 750 /* start the gfx rings and rlc compute queues */ 751 r = sdma_v3_0_gfx_resume(adev); 752 if (r) 753 return r; 754 r = sdma_v3_0_rlc_resume(adev); 755 if (r) 756 return r; 757 758 return 0; 759 } 760 761 /** 762 * sdma_v3_0_ring_test_ring - simple async dma engine test 763 * 764 * @ring: amdgpu_ring structure holding ring information 765 * 766 * Test the DMA engine by writing using it to write an 767 * value to memory. (VI). 768 * Returns 0 for success, error for failure. 769 */ 770 static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) 771 { 772 struct amdgpu_device *adev = ring->adev; 773 unsigned i; 774 unsigned index; 775 int r; 776 u32 tmp; 777 u64 gpu_addr; 778 779 r = amdgpu_wb_get(adev, &index); 780 if (r) { 781 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 782 return r; 783 } 784 785 gpu_addr = adev->wb.gpu_addr + (index * 4); 786 tmp = 0xCAFEDEAD; 787 adev->wb.wb[index] = cpu_to_le32(tmp); 788 789 r = amdgpu_ring_lock(ring, 5); 790 if (r) { 791 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 792 amdgpu_wb_free(adev, index); 793 return r; 794 } 795 796 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 797 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 798 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 799 amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 800 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); 801 amdgpu_ring_write(ring, 0xDEADBEEF); 802 amdgpu_ring_unlock_commit(ring); 803 804 for (i = 0; i < adev->usec_timeout; i++) { 805 tmp = le32_to_cpu(adev->wb.wb[index]); 806 if (tmp == 0xDEADBEEF) 807 break; 808 DRM_UDELAY(1); 809 } 810 811 if (i < adev->usec_timeout) { 812 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 813 } else { 814 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", 815 ring->idx, tmp); 816 r = -EINVAL; 817 } 818 amdgpu_wb_free(adev, index); 819 820 return r; 821 } 822 823 /** 824 * sdma_v3_0_ring_test_ib - test an IB on the DMA engine 825 * 826 * @ring: amdgpu_ring structure holding ring information 827 * 828 * Test a simple IB in the DMA ring (VI). 829 * Returns 0 on success, error on failure. 830 */ 831 static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) 832 { 833 struct amdgpu_device *adev = ring->adev; 834 struct amdgpu_ib ib; 835 struct fence *f = NULL; 836 unsigned i; 837 unsigned index; 838 int r; 839 u32 tmp = 0; 840 u64 gpu_addr; 841 842 r = amdgpu_wb_get(adev, &index); 843 if (r) { 844 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 845 return r; 846 } 847 848 gpu_addr = adev->wb.gpu_addr + (index * 4); 849 tmp = 0xCAFEDEAD; 850 adev->wb.wb[index] = cpu_to_le32(tmp); 851 memset(&ib, 0, sizeof(ib)); 852 r = amdgpu_ib_get(ring, NULL, 256, &ib); 853 if (r) { 854 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 855 goto err0; 856 } 857 858 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 859 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 860 ib.ptr[1] = lower_32_bits(gpu_addr); 861 ib.ptr[2] = upper_32_bits(gpu_addr); 862 ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1); 863 ib.ptr[4] = 0xDEADBEEF; 864 ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 865 ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 866 ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 867 ib.length_dw = 8; 868 869 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, 870 AMDGPU_FENCE_OWNER_UNDEFINED, 871 &f); 872 if (r) 873 goto err1; 874 875 r = fence_wait(f, false); 876 if (r) { 877 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 878 goto err1; 879 } 880 for (i = 0; i < adev->usec_timeout; i++) { 881 tmp = le32_to_cpu(adev->wb.wb[index]); 882 if (tmp == 0xDEADBEEF) 883 break; 884 DRM_UDELAY(1); 885 } 886 if (i < adev->usec_timeout) { 887 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", 888 ring->idx, i); 889 goto err1; 890 } else { 891 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); 892 r = -EINVAL; 893 } 894 err1: 895 fence_put(f); 896 amdgpu_ib_free(adev, &ib); 897 err0: 898 amdgpu_wb_free(adev, index); 899 return r; 900 } 901 902 /** 903 * sdma_v3_0_vm_copy_pte - update PTEs by copying them from the GART 904 * 905 * @ib: indirect buffer to fill with commands 906 * @pe: addr of the page entry 907 * @src: src addr to copy from 908 * @count: number of page entries to update 909 * 910 * Update PTEs by copying them from the GART using sDMA (CIK). 911 */ 912 static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib, 913 uint64_t pe, uint64_t src, 914 unsigned count) 915 { 916 while (count) { 917 unsigned bytes = count * 8; 918 if (bytes > 0x1FFFF8) 919 bytes = 0x1FFFF8; 920 921 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 922 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 923 ib->ptr[ib->length_dw++] = bytes; 924 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 925 ib->ptr[ib->length_dw++] = lower_32_bits(src); 926 ib->ptr[ib->length_dw++] = upper_32_bits(src); 927 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 928 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 929 930 pe += bytes; 931 src += bytes; 932 count -= bytes / 8; 933 } 934 } 935 936 /** 937 * sdma_v3_0_vm_write_pte - update PTEs by writing them manually 938 * 939 * @ib: indirect buffer to fill with commands 940 * @pe: addr of the page entry 941 * @addr: dst addr to write into pe 942 * @count: number of page entries to update 943 * @incr: increase next addr by incr bytes 944 * @flags: access flags 945 * 946 * Update PTEs by writing them manually using sDMA (CIK). 947 */ 948 static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, 949 uint64_t pe, 950 uint64_t addr, unsigned count, 951 uint32_t incr, uint32_t flags) 952 { 953 uint64_t value; 954 unsigned ndw; 955 956 while (count) { 957 ndw = count * 2; 958 if (ndw > 0xFFFFE) 959 ndw = 0xFFFFE; 960 961 /* for non-physically contiguous pages (system) */ 962 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 963 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 964 ib->ptr[ib->length_dw++] = pe; 965 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 966 ib->ptr[ib->length_dw++] = ndw; 967 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 968 if (flags & AMDGPU_PTE_SYSTEM) { 969 value = amdgpu_vm_map_gart(ib->ring->adev, addr); 970 value &= 0xFFFFFFFFFFFFF000ULL; 971 } else if (flags & AMDGPU_PTE_VALID) { 972 value = addr; 973 } else { 974 value = 0; 975 } 976 addr += incr; 977 value |= flags; 978 ib->ptr[ib->length_dw++] = value; 979 ib->ptr[ib->length_dw++] = upper_32_bits(value); 980 } 981 } 982 } 983 984 /** 985 * sdma_v3_0_vm_set_pte_pde - update the page tables using sDMA 986 * 987 * @ib: indirect buffer to fill with commands 988 * @pe: addr of the page entry 989 * @addr: dst addr to write into pe 990 * @count: number of page entries to update 991 * @incr: increase next addr by incr bytes 992 * @flags: access flags 993 * 994 * Update the page tables using sDMA (CIK). 995 */ 996 static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, 997 uint64_t pe, 998 uint64_t addr, unsigned count, 999 uint32_t incr, uint32_t flags) 1000 { 1001 uint64_t value; 1002 unsigned ndw; 1003 1004 while (count) { 1005 ndw = count; 1006 if (ndw > 0x7FFFF) 1007 ndw = 0x7FFFF; 1008 1009 if (flags & AMDGPU_PTE_VALID) 1010 value = addr; 1011 else 1012 value = 0; 1013 1014 /* for physically contiguous pages (vram) */ 1015 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); 1016 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 1017 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 1018 ib->ptr[ib->length_dw++] = flags; /* mask */ 1019 ib->ptr[ib->length_dw++] = 0; 1020 ib->ptr[ib->length_dw++] = value; /* value */ 1021 ib->ptr[ib->length_dw++] = upper_32_bits(value); 1022 ib->ptr[ib->length_dw++] = incr; /* increment size */ 1023 ib->ptr[ib->length_dw++] = 0; 1024 ib->ptr[ib->length_dw++] = ndw; /* number of entries */ 1025 1026 pe += ndw * 8; 1027 addr += ndw * incr; 1028 count -= ndw; 1029 } 1030 } 1031 1032 /** 1033 * sdma_v3_0_vm_pad_ib - pad the IB to the required number of dw 1034 * 1035 * @ib: indirect buffer to fill with padding 1036 * 1037 */ 1038 static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib) 1039 { 1040 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring); 1041 u32 pad_count; 1042 int i; 1043 1044 pad_count = (8 - (ib->length_dw & 0x7)) % 8; 1045 for (i = 0; i < pad_count; i++) 1046 if (sdma && sdma->burst_nop && (i == 0)) 1047 ib->ptr[ib->length_dw++] = 1048 SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | 1049 SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); 1050 else 1051 ib->ptr[ib->length_dw++] = 1052 SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 1053 } 1054 1055 /** 1056 * sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA 1057 * 1058 * @ring: amdgpu_ring pointer 1059 * @vm: amdgpu_vm pointer 1060 * 1061 * Update the page table base and flush the VM TLB 1062 * using sDMA (VI). 1063 */ 1064 static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 1065 unsigned vm_id, uint64_t pd_addr) 1066 { 1067 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1068 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1069 if (vm_id < 8) { 1070 amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 1071 } else { 1072 amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 1073 } 1074 amdgpu_ring_write(ring, pd_addr >> 12); 1075 1076 /* flush TLB */ 1077 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1078 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1079 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 1080 amdgpu_ring_write(ring, 1 << vm_id); 1081 1082 /* wait for flush */ 1083 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1084 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1085 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(0)); /* always */ 1086 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); 1087 amdgpu_ring_write(ring, 0); 1088 amdgpu_ring_write(ring, 0); /* reference */ 1089 amdgpu_ring_write(ring, 0); /* mask */ 1090 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1091 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 1092 } 1093 1094 static int sdma_v3_0_early_init(void *handle) 1095 { 1096 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1097 1098 switch (adev->asic_type) { 1099 case CHIP_STONEY: 1100 adev->sdma.num_instances = 1; 1101 break; 1102 default: 1103 adev->sdma.num_instances = SDMA_MAX_INSTANCE; 1104 break; 1105 } 1106 1107 sdma_v3_0_set_ring_funcs(adev); 1108 sdma_v3_0_set_buffer_funcs(adev); 1109 sdma_v3_0_set_vm_pte_funcs(adev); 1110 sdma_v3_0_set_irq_funcs(adev); 1111 1112 return 0; 1113 } 1114 1115 static int sdma_v3_0_sw_init(void *handle) 1116 { 1117 struct amdgpu_ring *ring; 1118 int r, i; 1119 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1120 1121 /* SDMA trap event */ 1122 r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq); 1123 if (r) 1124 return r; 1125 1126 /* SDMA Privileged inst */ 1127 r = amdgpu_irq_add_id(adev, 241, &adev->sdma.illegal_inst_irq); 1128 if (r) 1129 return r; 1130 1131 /* SDMA Privileged inst */ 1132 r = amdgpu_irq_add_id(adev, 247, &adev->sdma.illegal_inst_irq); 1133 if (r) 1134 return r; 1135 1136 r = sdma_v3_0_init_microcode(adev); 1137 if (r) { 1138 DRM_ERROR("Failed to load sdma firmware!\n"); 1139 return r; 1140 } 1141 1142 for (i = 0; i < adev->sdma.num_instances; i++) { 1143 ring = &adev->sdma.instance[i].ring; 1144 ring->ring_obj = NULL; 1145 ring->use_doorbell = true; 1146 ring->doorbell_index = (i == 0) ? 1147 AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; 1148 1149 sprintf(ring->name, "sdma%d", i); 1150 r = amdgpu_ring_init(adev, ring, 256 * 1024, 1151 SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf, 1152 &adev->sdma.trap_irq, 1153 (i == 0) ? 1154 AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP1, 1155 AMDGPU_RING_TYPE_SDMA); 1156 if (r) 1157 return r; 1158 } 1159 1160 return r; 1161 } 1162 1163 static int sdma_v3_0_sw_fini(void *handle) 1164 { 1165 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1166 int i; 1167 1168 for (i = 0; i < adev->sdma.num_instances; i++) 1169 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 1170 1171 return 0; 1172 } 1173 1174 static int sdma_v3_0_hw_init(void *handle) 1175 { 1176 int r; 1177 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1178 1179 sdma_v3_0_init_golden_registers(adev); 1180 1181 r = sdma_v3_0_start(adev); 1182 if (r) 1183 return r; 1184 1185 return r; 1186 } 1187 1188 static int sdma_v3_0_hw_fini(void *handle) 1189 { 1190 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1191 1192 sdma_v3_0_ctx_switch_enable(adev, false); 1193 sdma_v3_0_enable(adev, false); 1194 1195 return 0; 1196 } 1197 1198 static int sdma_v3_0_suspend(void *handle) 1199 { 1200 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1201 1202 return sdma_v3_0_hw_fini(adev); 1203 } 1204 1205 static int sdma_v3_0_resume(void *handle) 1206 { 1207 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1208 1209 return sdma_v3_0_hw_init(adev); 1210 } 1211 1212 static bool sdma_v3_0_is_idle(void *handle) 1213 { 1214 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1215 u32 tmp = RREG32(mmSRBM_STATUS2); 1216 1217 if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK | 1218 SRBM_STATUS2__SDMA1_BUSY_MASK)) 1219 return false; 1220 1221 return true; 1222 } 1223 1224 static int sdma_v3_0_wait_for_idle(void *handle) 1225 { 1226 unsigned i; 1227 u32 tmp; 1228 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1229 1230 for (i = 0; i < adev->usec_timeout; i++) { 1231 tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK | 1232 SRBM_STATUS2__SDMA1_BUSY_MASK); 1233 1234 if (!tmp) 1235 return 0; 1236 udelay(1); 1237 } 1238 return -ETIMEDOUT; 1239 } 1240 1241 static void sdma_v3_0_print_status(void *handle) 1242 { 1243 int i, j; 1244 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1245 1246 dev_info(adev->dev, "VI SDMA registers\n"); 1247 dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", 1248 RREG32(mmSRBM_STATUS2)); 1249 for (i = 0; i < adev->sdma.num_instances; i++) { 1250 dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n", 1251 i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i])); 1252 dev_info(adev->dev, " SDMA%d_F32_CNTL=0x%08X\n", 1253 i, RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i])); 1254 dev_info(adev->dev, " SDMA%d_CNTL=0x%08X\n", 1255 i, RREG32(mmSDMA0_CNTL + sdma_offsets[i])); 1256 dev_info(adev->dev, " SDMA%d_SEM_WAIT_FAIL_TIMER_CNTL=0x%08X\n", 1257 i, RREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i])); 1258 dev_info(adev->dev, " SDMA%d_GFX_IB_CNTL=0x%08X\n", 1259 i, RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i])); 1260 dev_info(adev->dev, " SDMA%d_GFX_RB_CNTL=0x%08X\n", 1261 i, RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i])); 1262 dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR=0x%08X\n", 1263 i, RREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i])); 1264 dev_info(adev->dev, " SDMA%d_GFX_RB_WPTR=0x%08X\n", 1265 i, RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i])); 1266 dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_HI=0x%08X\n", 1267 i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i])); 1268 dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_LO=0x%08X\n", 1269 i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i])); 1270 dev_info(adev->dev, " SDMA%d_GFX_RB_BASE=0x%08X\n", 1271 i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); 1272 dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", 1273 i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); 1274 dev_info(adev->dev, " SDMA%d_GFX_DOORBELL=0x%08X\n", 1275 i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i])); 1276 mutex_lock(&adev->srbm_mutex); 1277 for (j = 0; j < 16; j++) { 1278 vi_srbm_select(adev, 0, 0, 0, j); 1279 dev_info(adev->dev, " VM %d:\n", j); 1280 dev_info(adev->dev, " SDMA%d_GFX_VIRTUAL_ADDR=0x%08X\n", 1281 i, RREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i])); 1282 dev_info(adev->dev, " SDMA%d_GFX_APE1_CNTL=0x%08X\n", 1283 i, RREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i])); 1284 } 1285 vi_srbm_select(adev, 0, 0, 0, 0); 1286 mutex_unlock(&adev->srbm_mutex); 1287 } 1288 } 1289 1290 static int sdma_v3_0_soft_reset(void *handle) 1291 { 1292 u32 srbm_soft_reset = 0; 1293 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1294 u32 tmp = RREG32(mmSRBM_STATUS2); 1295 1296 if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) { 1297 /* sdma0 */ 1298 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); 1299 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0); 1300 WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); 1301 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; 1302 } 1303 if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) { 1304 /* sdma1 */ 1305 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); 1306 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0); 1307 WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); 1308 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; 1309 } 1310 1311 if (srbm_soft_reset) { 1312 sdma_v3_0_print_status((void *)adev); 1313 1314 tmp = RREG32(mmSRBM_SOFT_RESET); 1315 tmp |= srbm_soft_reset; 1316 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 1317 WREG32(mmSRBM_SOFT_RESET, tmp); 1318 tmp = RREG32(mmSRBM_SOFT_RESET); 1319 1320 udelay(50); 1321 1322 tmp &= ~srbm_soft_reset; 1323 WREG32(mmSRBM_SOFT_RESET, tmp); 1324 tmp = RREG32(mmSRBM_SOFT_RESET); 1325 1326 /* Wait a little for things to settle down */ 1327 udelay(50); 1328 1329 sdma_v3_0_print_status((void *)adev); 1330 } 1331 1332 return 0; 1333 } 1334 1335 static int sdma_v3_0_set_trap_irq_state(struct amdgpu_device *adev, 1336 struct amdgpu_irq_src *source, 1337 unsigned type, 1338 enum amdgpu_interrupt_state state) 1339 { 1340 u32 sdma_cntl; 1341 1342 switch (type) { 1343 case AMDGPU_SDMA_IRQ_TRAP0: 1344 switch (state) { 1345 case AMDGPU_IRQ_STATE_DISABLE: 1346 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); 1347 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0); 1348 WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl); 1349 break; 1350 case AMDGPU_IRQ_STATE_ENABLE: 1351 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); 1352 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1); 1353 WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl); 1354 break; 1355 default: 1356 break; 1357 } 1358 break; 1359 case AMDGPU_SDMA_IRQ_TRAP1: 1360 switch (state) { 1361 case AMDGPU_IRQ_STATE_DISABLE: 1362 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); 1363 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0); 1364 WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl); 1365 break; 1366 case AMDGPU_IRQ_STATE_ENABLE: 1367 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); 1368 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1); 1369 WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl); 1370 break; 1371 default: 1372 break; 1373 } 1374 break; 1375 default: 1376 break; 1377 } 1378 return 0; 1379 } 1380 1381 static int sdma_v3_0_process_trap_irq(struct amdgpu_device *adev, 1382 struct amdgpu_irq_src *source, 1383 struct amdgpu_iv_entry *entry) 1384 { 1385 u8 instance_id, queue_id; 1386 1387 instance_id = (entry->ring_id & 0x3) >> 0; 1388 queue_id = (entry->ring_id & 0xc) >> 2; 1389 DRM_DEBUG("IH: SDMA trap\n"); 1390 switch (instance_id) { 1391 case 0: 1392 switch (queue_id) { 1393 case 0: 1394 amdgpu_fence_process(&adev->sdma.instance[0].ring); 1395 break; 1396 case 1: 1397 /* XXX compute */ 1398 break; 1399 case 2: 1400 /* XXX compute */ 1401 break; 1402 } 1403 break; 1404 case 1: 1405 switch (queue_id) { 1406 case 0: 1407 amdgpu_fence_process(&adev->sdma.instance[1].ring); 1408 break; 1409 case 1: 1410 /* XXX compute */ 1411 break; 1412 case 2: 1413 /* XXX compute */ 1414 break; 1415 } 1416 break; 1417 } 1418 return 0; 1419 } 1420 1421 static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev, 1422 struct amdgpu_irq_src *source, 1423 struct amdgpu_iv_entry *entry) 1424 { 1425 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 1426 schedule_work(&adev->reset_work); 1427 return 0; 1428 } 1429 1430 static int sdma_v3_0_set_clockgating_state(void *handle, 1431 enum amd_clockgating_state state) 1432 { 1433 return 0; 1434 } 1435 1436 static int sdma_v3_0_set_powergating_state(void *handle, 1437 enum amd_powergating_state state) 1438 { 1439 return 0; 1440 } 1441 1442 const struct amd_ip_funcs sdma_v3_0_ip_funcs = { 1443 .early_init = sdma_v3_0_early_init, 1444 .late_init = NULL, 1445 .sw_init = sdma_v3_0_sw_init, 1446 .sw_fini = sdma_v3_0_sw_fini, 1447 .hw_init = sdma_v3_0_hw_init, 1448 .hw_fini = sdma_v3_0_hw_fini, 1449 .suspend = sdma_v3_0_suspend, 1450 .resume = sdma_v3_0_resume, 1451 .is_idle = sdma_v3_0_is_idle, 1452 .wait_for_idle = sdma_v3_0_wait_for_idle, 1453 .soft_reset = sdma_v3_0_soft_reset, 1454 .print_status = sdma_v3_0_print_status, 1455 .set_clockgating_state = sdma_v3_0_set_clockgating_state, 1456 .set_powergating_state = sdma_v3_0_set_powergating_state, 1457 }; 1458 1459 static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { 1460 .get_rptr = sdma_v3_0_ring_get_rptr, 1461 .get_wptr = sdma_v3_0_ring_get_wptr, 1462 .set_wptr = sdma_v3_0_ring_set_wptr, 1463 .parse_cs = NULL, 1464 .emit_ib = sdma_v3_0_ring_emit_ib, 1465 .emit_fence = sdma_v3_0_ring_emit_fence, 1466 .emit_semaphore = sdma_v3_0_ring_emit_semaphore, 1467 .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush, 1468 .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush, 1469 .test_ring = sdma_v3_0_ring_test_ring, 1470 .test_ib = sdma_v3_0_ring_test_ib, 1471 .insert_nop = sdma_v3_0_ring_insert_nop, 1472 }; 1473 1474 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) 1475 { 1476 int i; 1477 1478 for (i = 0; i < adev->sdma.num_instances; i++) 1479 adev->sdma.instance[i].ring.funcs = &sdma_v3_0_ring_funcs; 1480 } 1481 1482 static const struct amdgpu_irq_src_funcs sdma_v3_0_trap_irq_funcs = { 1483 .set = sdma_v3_0_set_trap_irq_state, 1484 .process = sdma_v3_0_process_trap_irq, 1485 }; 1486 1487 static const struct amdgpu_irq_src_funcs sdma_v3_0_illegal_inst_irq_funcs = { 1488 .process = sdma_v3_0_process_illegal_inst_irq, 1489 }; 1490 1491 static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) 1492 { 1493 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 1494 adev->sdma.trap_irq.funcs = &sdma_v3_0_trap_irq_funcs; 1495 adev->sdma.illegal_inst_irq.funcs = &sdma_v3_0_illegal_inst_irq_funcs; 1496 } 1497 1498 /** 1499 * sdma_v3_0_emit_copy_buffer - copy buffer using the sDMA engine 1500 * 1501 * @ring: amdgpu_ring structure holding ring information 1502 * @src_offset: src GPU address 1503 * @dst_offset: dst GPU address 1504 * @byte_count: number of bytes to xfer 1505 * 1506 * Copy GPU buffers using the DMA engine (VI). 1507 * Used by the amdgpu ttm implementation to move pages if 1508 * registered as the asic copy callback. 1509 */ 1510 static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, 1511 uint64_t src_offset, 1512 uint64_t dst_offset, 1513 uint32_t byte_count) 1514 { 1515 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1516 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 1517 ib->ptr[ib->length_dw++] = byte_count; 1518 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1519 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 1520 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 1521 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1522 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1523 } 1524 1525 /** 1526 * sdma_v3_0_emit_fill_buffer - fill buffer using the sDMA engine 1527 * 1528 * @ring: amdgpu_ring structure holding ring information 1529 * @src_data: value to write to buffer 1530 * @dst_offset: dst GPU address 1531 * @byte_count: number of bytes to xfer 1532 * 1533 * Fill GPU buffers using the DMA engine (VI). 1534 */ 1535 static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib *ib, 1536 uint32_t src_data, 1537 uint64_t dst_offset, 1538 uint32_t byte_count) 1539 { 1540 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); 1541 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1542 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1543 ib->ptr[ib->length_dw++] = src_data; 1544 ib->ptr[ib->length_dw++] = byte_count; 1545 } 1546 1547 static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = { 1548 .copy_max_bytes = 0x1fffff, 1549 .copy_num_dw = 7, 1550 .emit_copy_buffer = sdma_v3_0_emit_copy_buffer, 1551 1552 .fill_max_bytes = 0x1fffff, 1553 .fill_num_dw = 5, 1554 .emit_fill_buffer = sdma_v3_0_emit_fill_buffer, 1555 }; 1556 1557 static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev) 1558 { 1559 if (adev->mman.buffer_funcs == NULL) { 1560 adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs; 1561 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 1562 } 1563 } 1564 1565 static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { 1566 .copy_pte = sdma_v3_0_vm_copy_pte, 1567 .write_pte = sdma_v3_0_vm_write_pte, 1568 .set_pte_pde = sdma_v3_0_vm_set_pte_pde, 1569 .pad_ib = sdma_v3_0_vm_pad_ib, 1570 }; 1571 1572 static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) 1573 { 1574 if (adev->vm_manager.vm_pte_funcs == NULL) { 1575 adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; 1576 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; 1577 adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; 1578 } 1579 } 1580