1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_ucode.h" 28 #include "amdgpu_trace.h" 29 #include "vi.h" 30 #include "vid.h" 31 32 #include "oss/oss_3_0_d.h" 33 #include "oss/oss_3_0_sh_mask.h" 34 35 #include "gmc/gmc_8_1_d.h" 36 #include "gmc/gmc_8_1_sh_mask.h" 37 38 #include "gca/gfx_8_0_d.h" 39 #include "gca/gfx_8_0_enum.h" 40 #include "gca/gfx_8_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 45 #include "tonga_sdma_pkt_open.h" 46 47 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev); 48 static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev); 49 static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev); 50 static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev); 51 52 MODULE_FIRMWARE("amdgpu/tonga_sdma.bin"); 53 MODULE_FIRMWARE("amdgpu/tonga_sdma1.bin"); 54 MODULE_FIRMWARE("amdgpu/carrizo_sdma.bin"); 55 MODULE_FIRMWARE("amdgpu/carrizo_sdma1.bin"); 56 MODULE_FIRMWARE("amdgpu/fiji_sdma.bin"); 57 MODULE_FIRMWARE("amdgpu/fiji_sdma1.bin"); 58 MODULE_FIRMWARE("amdgpu/stoney_sdma.bin"); 59 60 static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 61 { 62 SDMA0_REGISTER_OFFSET, 63 SDMA1_REGISTER_OFFSET 64 }; 65 66 static const u32 golden_settings_tonga_a11[] = 67 { 68 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 69 mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, 70 mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100, 71 mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 72 mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 73 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 74 mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, 75 mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100, 76 mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 77 mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 78 }; 79 80 static const u32 tonga_mgcg_cgcg_init[] = 81 { 82 mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, 83 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 84 }; 85 86 static const u32 golden_settings_fiji_a10[] = 87 { 88 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 89 mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100, 90 mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 91 mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 92 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 93 mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100, 94 mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 95 mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 96 }; 97 98 static const u32 fiji_mgcg_cgcg_init[] = 99 { 100 mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, 101 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 102 }; 103 104 static const u32 cz_golden_settings_a11[] = 105 { 106 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 107 mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, 108 mmSDMA0_GFX_IB_CNTL, 0x00000100, 0x00000100, 109 mmSDMA0_POWER_CNTL, 0x00000800, 0x0003c800, 110 mmSDMA0_RLC0_IB_CNTL, 0x00000100, 0x00000100, 111 mmSDMA0_RLC1_IB_CNTL, 0x00000100, 0x00000100, 112 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 113 mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, 114 mmSDMA1_GFX_IB_CNTL, 0x00000100, 0x00000100, 115 mmSDMA1_POWER_CNTL, 0x00000800, 0x0003c800, 116 mmSDMA1_RLC0_IB_CNTL, 0x00000100, 0x00000100, 117 mmSDMA1_RLC1_IB_CNTL, 0x00000100, 0x00000100, 118 }; 119 120 static const u32 cz_mgcg_cgcg_init[] = 121 { 122 mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, 123 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 124 }; 125 126 static const u32 stoney_golden_settings_a11[] = 127 { 128 mmSDMA0_GFX_IB_CNTL, 0x00000100, 0x00000100, 129 mmSDMA0_POWER_CNTL, 0x00000800, 0x0003c800, 130 mmSDMA0_RLC0_IB_CNTL, 0x00000100, 0x00000100, 131 mmSDMA0_RLC1_IB_CNTL, 0x00000100, 0x00000100, 132 }; 133 134 static const u32 stoney_mgcg_cgcg_init[] = 135 { 136 mmSDMA0_CLK_CTRL, 0xffffffff, 0x00000100, 137 }; 138 139 /* 140 * sDMA - System DMA 141 * Starting with CIK, the GPU has new asynchronous 142 * DMA engines. These engines are used for compute 143 * and gfx. There are two DMA engines (SDMA0, SDMA1) 144 * and each one supports 1 ring buffer used for gfx 145 * and 2 queues used for compute. 146 * 147 * The programming model is very similar to the CP 148 * (ring buffer, IBs, etc.), but sDMA has it's own 149 * packet format that is different from the PM4 format 150 * used by the CP. sDMA supports copying data, writing 151 * embedded data, solid fills, and a number of other 152 * things. It also has support for tiling/detiling of 153 * buffers. 154 */ 155 156 static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) 157 { 158 switch (adev->asic_type) { 159 case CHIP_FIJI: 160 amdgpu_program_register_sequence(adev, 161 fiji_mgcg_cgcg_init, 162 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 163 amdgpu_program_register_sequence(adev, 164 golden_settings_fiji_a10, 165 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 166 break; 167 case CHIP_TONGA: 168 amdgpu_program_register_sequence(adev, 169 tonga_mgcg_cgcg_init, 170 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 171 amdgpu_program_register_sequence(adev, 172 golden_settings_tonga_a11, 173 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 174 break; 175 case CHIP_CARRIZO: 176 amdgpu_program_register_sequence(adev, 177 cz_mgcg_cgcg_init, 178 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 179 amdgpu_program_register_sequence(adev, 180 cz_golden_settings_a11, 181 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 182 break; 183 case CHIP_STONEY: 184 amdgpu_program_register_sequence(adev, 185 stoney_mgcg_cgcg_init, 186 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 187 amdgpu_program_register_sequence(adev, 188 stoney_golden_settings_a11, 189 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 190 break; 191 default: 192 break; 193 } 194 } 195 196 /** 197 * sdma_v3_0_init_microcode - load ucode images from disk 198 * 199 * @adev: amdgpu_device pointer 200 * 201 * Use the firmware interface to load the ucode images into 202 * the driver (not loaded into hw). 203 * Returns 0 on success, error on failure. 204 */ 205 static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) 206 { 207 const char *chip_name; 208 char fw_name[30]; 209 int err = 0, i; 210 struct amdgpu_firmware_info *info = NULL; 211 const struct common_firmware_header *header = NULL; 212 const struct sdma_firmware_header_v1_0 *hdr; 213 214 DRM_DEBUG("\n"); 215 216 switch (adev->asic_type) { 217 case CHIP_TONGA: 218 chip_name = "tonga"; 219 break; 220 case CHIP_FIJI: 221 chip_name = "fiji"; 222 break; 223 case CHIP_CARRIZO: 224 chip_name = "carrizo"; 225 break; 226 case CHIP_STONEY: 227 chip_name = "stoney"; 228 break; 229 default: BUG(); 230 } 231 232 for (i = 0; i < adev->sdma.num_instances; i++) { 233 if (i == 0) 234 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); 235 else 236 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); 237 err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); 238 if (err) 239 goto out; 240 err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); 241 if (err) 242 goto out; 243 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 244 adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 245 adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 246 if (adev->sdma.instance[i].feature_version >= 20) 247 adev->sdma.instance[i].burst_nop = true; 248 249 if (adev->firmware.smu_load) { 250 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; 251 info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; 252 info->fw = adev->sdma.instance[i].fw; 253 header = (const struct common_firmware_header *)info->fw->data; 254 adev->firmware.fw_size += 255 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 256 } 257 } 258 out: 259 if (err) { 260 printk(KERN_ERR 261 "sdma_v3_0: Failed to load firmware \"%s\"\n", 262 fw_name); 263 for (i = 0; i < adev->sdma.num_instances; i++) { 264 release_firmware(adev->sdma.instance[i].fw); 265 adev->sdma.instance[i].fw = NULL; 266 } 267 } 268 return err; 269 } 270 271 /** 272 * sdma_v3_0_ring_get_rptr - get the current read pointer 273 * 274 * @ring: amdgpu ring pointer 275 * 276 * Get the current rptr from the hardware (VI+). 277 */ 278 static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) 279 { 280 u32 rptr; 281 282 /* XXX check if swapping is necessary on BE */ 283 rptr = ring->adev->wb.wb[ring->rptr_offs] >> 2; 284 285 return rptr; 286 } 287 288 /** 289 * sdma_v3_0_ring_get_wptr - get the current write pointer 290 * 291 * @ring: amdgpu ring pointer 292 * 293 * Get the current wptr from the hardware (VI+). 294 */ 295 static uint32_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) 296 { 297 struct amdgpu_device *adev = ring->adev; 298 u32 wptr; 299 300 if (ring->use_doorbell) { 301 /* XXX check if swapping is necessary on BE */ 302 wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; 303 } else { 304 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; 305 306 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2; 307 } 308 309 return wptr; 310 } 311 312 /** 313 * sdma_v3_0_ring_set_wptr - commit the write pointer 314 * 315 * @ring: amdgpu ring pointer 316 * 317 * Write the wptr back to the hardware (VI+). 318 */ 319 static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) 320 { 321 struct amdgpu_device *adev = ring->adev; 322 323 if (ring->use_doorbell) { 324 /* XXX check if swapping is necessary on BE */ 325 adev->wb.wb[ring->wptr_offs] = ring->wptr << 2; 326 WDOORBELL32(ring->doorbell_index, ring->wptr << 2); 327 } else { 328 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; 329 330 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); 331 } 332 } 333 334 static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 335 { 336 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 337 int i; 338 339 for (i = 0; i < count; i++) 340 if (sdma && sdma->burst_nop && (i == 0)) 341 amdgpu_ring_write(ring, ring->nop | 342 SDMA_PKT_NOP_HEADER_COUNT(count - 1)); 343 else 344 amdgpu_ring_write(ring, ring->nop); 345 } 346 347 /** 348 * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine 349 * 350 * @ring: amdgpu ring pointer 351 * @ib: IB object to schedule 352 * 353 * Schedule an IB in the DMA ring (VI). 354 */ 355 static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, 356 struct amdgpu_ib *ib) 357 { 358 u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf; 359 u32 next_rptr = ring->wptr + 5; 360 361 while ((next_rptr & 7) != 2) 362 next_rptr++; 363 next_rptr += 6; 364 365 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 366 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 367 amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc); 368 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr)); 369 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); 370 amdgpu_ring_write(ring, next_rptr); 371 372 /* IB packet must end on a 8 DW boundary */ 373 sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); 374 375 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 376 SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); 377 /* base must be 32 byte aligned */ 378 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); 379 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 380 amdgpu_ring_write(ring, ib->length_dw); 381 amdgpu_ring_write(ring, 0); 382 amdgpu_ring_write(ring, 0); 383 384 } 385 386 /** 387 * sdma_v3_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring 388 * 389 * @ring: amdgpu ring pointer 390 * 391 * Emit an hdp flush packet on the requested DMA ring. 392 */ 393 static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 394 { 395 u32 ref_and_mask = 0; 396 397 if (ring == &ring->adev->sdma.instance[0].ring) 398 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1); 399 else 400 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1); 401 402 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 403 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | 404 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ 405 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2); 406 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2); 407 amdgpu_ring_write(ring, ref_and_mask); /* reference */ 408 amdgpu_ring_write(ring, ref_and_mask); /* mask */ 409 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 410 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 411 } 412 413 /** 414 * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring 415 * 416 * @ring: amdgpu ring pointer 417 * @fence: amdgpu fence object 418 * 419 * Add a DMA fence packet to the ring to write 420 * the fence seq number and DMA trap packet to generate 421 * an interrupt if needed (VI). 422 */ 423 static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 424 unsigned flags) 425 { 426 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 427 /* write the fence */ 428 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); 429 amdgpu_ring_write(ring, lower_32_bits(addr)); 430 amdgpu_ring_write(ring, upper_32_bits(addr)); 431 amdgpu_ring_write(ring, lower_32_bits(seq)); 432 433 /* optionally write high bits as well */ 434 if (write64bit) { 435 addr += 4; 436 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); 437 amdgpu_ring_write(ring, lower_32_bits(addr)); 438 amdgpu_ring_write(ring, upper_32_bits(addr)); 439 amdgpu_ring_write(ring, upper_32_bits(seq)); 440 } 441 442 /* generate an interrupt */ 443 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); 444 amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); 445 } 446 447 448 /** 449 * sdma_v3_0_ring_emit_semaphore - emit a semaphore on the dma ring 450 * 451 * @ring: amdgpu_ring structure holding ring information 452 * @semaphore: amdgpu semaphore object 453 * @emit_wait: wait or signal semaphore 454 * 455 * Add a DMA semaphore packet to the ring wait on or signal 456 * other rings (VI). 457 */ 458 static bool sdma_v3_0_ring_emit_semaphore(struct amdgpu_ring *ring, 459 struct amdgpu_semaphore *semaphore, 460 bool emit_wait) 461 { 462 u64 addr = semaphore->gpu_addr; 463 u32 sig = emit_wait ? 0 : 1; 464 465 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) | 466 SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig)); 467 amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8); 468 amdgpu_ring_write(ring, upper_32_bits(addr)); 469 470 return true; 471 } 472 473 /** 474 * sdma_v3_0_gfx_stop - stop the gfx async dma engines 475 * 476 * @adev: amdgpu_device pointer 477 * 478 * Stop the gfx async dma ring buffers (VI). 479 */ 480 static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) 481 { 482 struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; 483 struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; 484 u32 rb_cntl, ib_cntl; 485 int i; 486 487 if ((adev->mman.buffer_funcs_ring == sdma0) || 488 (adev->mman.buffer_funcs_ring == sdma1)) 489 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 490 491 for (i = 0; i < adev->sdma.num_instances; i++) { 492 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 493 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); 494 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 495 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]); 496 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); 497 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 498 } 499 sdma0->ready = false; 500 sdma1->ready = false; 501 } 502 503 /** 504 * sdma_v3_0_rlc_stop - stop the compute async dma engines 505 * 506 * @adev: amdgpu_device pointer 507 * 508 * Stop the compute async dma queues (VI). 509 */ 510 static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev) 511 { 512 /* XXX todo */ 513 } 514 515 /** 516 * sdma_v3_0_ctx_switch_enable - stop the async dma engines context switch 517 * 518 * @adev: amdgpu_device pointer 519 * @enable: enable/disable the DMA MEs context switch. 520 * 521 * Halt or unhalt the async dma engines context switch (VI). 522 */ 523 static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) 524 { 525 u32 f32_cntl; 526 int i; 527 528 for (i = 0; i < adev->sdma.num_instances; i++) { 529 f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); 530 if (enable) 531 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, 532 AUTO_CTXSW_ENABLE, 1); 533 else 534 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, 535 AUTO_CTXSW_ENABLE, 0); 536 WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl); 537 } 538 } 539 540 /** 541 * sdma_v3_0_enable - stop the async dma engines 542 * 543 * @adev: amdgpu_device pointer 544 * @enable: enable/disable the DMA MEs. 545 * 546 * Halt or unhalt the async dma engines (VI). 547 */ 548 static void sdma_v3_0_enable(struct amdgpu_device *adev, bool enable) 549 { 550 u32 f32_cntl; 551 int i; 552 553 if (enable == false) { 554 sdma_v3_0_gfx_stop(adev); 555 sdma_v3_0_rlc_stop(adev); 556 } 557 558 for (i = 0; i < adev->sdma.num_instances; i++) { 559 f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]); 560 if (enable) 561 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0); 562 else 563 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1); 564 WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], f32_cntl); 565 } 566 } 567 568 /** 569 * sdma_v3_0_gfx_resume - setup and start the async dma engines 570 * 571 * @adev: amdgpu_device pointer 572 * 573 * Set up the gfx DMA ring buffers and enable them (VI). 574 * Returns 0 for success, error for failure. 575 */ 576 static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) 577 { 578 struct amdgpu_ring *ring; 579 u32 rb_cntl, ib_cntl; 580 u32 rb_bufsz; 581 u32 wb_offset; 582 u32 doorbell; 583 int i, j, r; 584 585 for (i = 0; i < adev->sdma.num_instances; i++) { 586 ring = &adev->sdma.instance[i].ring; 587 wb_offset = (ring->rptr_offs * 4); 588 589 mutex_lock(&adev->srbm_mutex); 590 for (j = 0; j < 16; j++) { 591 vi_srbm_select(adev, 0, 0, 0, j); 592 /* SDMA GFX */ 593 WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0); 594 WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0); 595 } 596 vi_srbm_select(adev, 0, 0, 0, 0); 597 mutex_unlock(&adev->srbm_mutex); 598 599 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 600 601 /* Set ring buffer size in dwords */ 602 rb_bufsz = order_base_2(ring->ring_size / 4); 603 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 604 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); 605 #ifdef __BIG_ENDIAN 606 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); 607 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, 608 RPTR_WRITEBACK_SWAP_ENABLE, 1); 609 #endif 610 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 611 612 /* Initialize the ring buffer's read and write pointers */ 613 WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); 614 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); 615 616 /* set the wb address whether it's enabled or not */ 617 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], 618 upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); 619 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], 620 lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); 621 622 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); 623 624 WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 625 WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); 626 627 ring->wptr = 0; 628 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); 629 630 doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]); 631 632 if (ring->use_doorbell) { 633 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, 634 OFFSET, ring->doorbell_index); 635 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); 636 } else { 637 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); 638 } 639 WREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i], doorbell); 640 641 /* enable DMA RB */ 642 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); 643 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 644 645 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]); 646 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); 647 #ifdef __BIG_ENDIAN 648 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); 649 #endif 650 /* enable DMA IBs */ 651 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 652 653 ring->ready = true; 654 655 r = amdgpu_ring_test_ring(ring); 656 if (r) { 657 ring->ready = false; 658 return r; 659 } 660 661 if (adev->mman.buffer_funcs_ring == ring) 662 amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); 663 } 664 665 return 0; 666 } 667 668 /** 669 * sdma_v3_0_rlc_resume - setup and start the async dma engines 670 * 671 * @adev: amdgpu_device pointer 672 * 673 * Set up the compute DMA queues and enable them (VI). 674 * Returns 0 for success, error for failure. 675 */ 676 static int sdma_v3_0_rlc_resume(struct amdgpu_device *adev) 677 { 678 /* XXX todo */ 679 return 0; 680 } 681 682 /** 683 * sdma_v3_0_load_microcode - load the sDMA ME ucode 684 * 685 * @adev: amdgpu_device pointer 686 * 687 * Loads the sDMA0/1 ucode. 688 * Returns 0 for success, -EINVAL if the ucode is not available. 689 */ 690 static int sdma_v3_0_load_microcode(struct amdgpu_device *adev) 691 { 692 const struct sdma_firmware_header_v1_0 *hdr; 693 const __le32 *fw_data; 694 u32 fw_size; 695 int i, j; 696 697 /* halt the MEs */ 698 sdma_v3_0_enable(adev, false); 699 700 for (i = 0; i < adev->sdma.num_instances; i++) { 701 if (!adev->sdma.instance[i].fw) 702 return -EINVAL; 703 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 704 amdgpu_ucode_print_sdma_hdr(&hdr->header); 705 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 706 fw_data = (const __le32 *) 707 (adev->sdma.instance[i].fw->data + 708 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 709 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); 710 for (j = 0; j < fw_size; j++) 711 WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++)); 712 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version); 713 } 714 715 return 0; 716 } 717 718 /** 719 * sdma_v3_0_start - setup and start the async dma engines 720 * 721 * @adev: amdgpu_device pointer 722 * 723 * Set up the DMA engines and enable them (VI). 724 * Returns 0 for success, error for failure. 725 */ 726 static int sdma_v3_0_start(struct amdgpu_device *adev) 727 { 728 int r, i; 729 730 if (!adev->pp_enabled) { 731 if (!adev->firmware.smu_load) { 732 r = sdma_v3_0_load_microcode(adev); 733 if (r) 734 return r; 735 } else { 736 for (i = 0; i < adev->sdma.num_instances; i++) { 737 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 738 (i == 0) ? 739 AMDGPU_UCODE_ID_SDMA0 : 740 AMDGPU_UCODE_ID_SDMA1); 741 if (r) 742 return -EINVAL; 743 } 744 } 745 } 746 747 /* unhalt the MEs */ 748 sdma_v3_0_enable(adev, true); 749 /* enable sdma ring preemption */ 750 sdma_v3_0_ctx_switch_enable(adev, true); 751 752 /* start the gfx rings and rlc compute queues */ 753 r = sdma_v3_0_gfx_resume(adev); 754 if (r) 755 return r; 756 r = sdma_v3_0_rlc_resume(adev); 757 if (r) 758 return r; 759 760 return 0; 761 } 762 763 /** 764 * sdma_v3_0_ring_test_ring - simple async dma engine test 765 * 766 * @ring: amdgpu_ring structure holding ring information 767 * 768 * Test the DMA engine by writing using it to write an 769 * value to memory. (VI). 770 * Returns 0 for success, error for failure. 771 */ 772 static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) 773 { 774 struct amdgpu_device *adev = ring->adev; 775 unsigned i; 776 unsigned index; 777 int r; 778 u32 tmp; 779 u64 gpu_addr; 780 781 r = amdgpu_wb_get(adev, &index); 782 if (r) { 783 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 784 return r; 785 } 786 787 gpu_addr = adev->wb.gpu_addr + (index * 4); 788 tmp = 0xCAFEDEAD; 789 adev->wb.wb[index] = cpu_to_le32(tmp); 790 791 r = amdgpu_ring_lock(ring, 5); 792 if (r) { 793 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 794 amdgpu_wb_free(adev, index); 795 return r; 796 } 797 798 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 799 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 800 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 801 amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 802 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); 803 amdgpu_ring_write(ring, 0xDEADBEEF); 804 amdgpu_ring_unlock_commit(ring); 805 806 for (i = 0; i < adev->usec_timeout; i++) { 807 tmp = le32_to_cpu(adev->wb.wb[index]); 808 if (tmp == 0xDEADBEEF) 809 break; 810 DRM_UDELAY(1); 811 } 812 813 if (i < adev->usec_timeout) { 814 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 815 } else { 816 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", 817 ring->idx, tmp); 818 r = -EINVAL; 819 } 820 amdgpu_wb_free(adev, index); 821 822 return r; 823 } 824 825 /** 826 * sdma_v3_0_ring_test_ib - test an IB on the DMA engine 827 * 828 * @ring: amdgpu_ring structure holding ring information 829 * 830 * Test a simple IB in the DMA ring (VI). 831 * Returns 0 on success, error on failure. 832 */ 833 static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) 834 { 835 struct amdgpu_device *adev = ring->adev; 836 struct amdgpu_ib ib; 837 struct fence *f = NULL; 838 unsigned i; 839 unsigned index; 840 int r; 841 u32 tmp = 0; 842 u64 gpu_addr; 843 844 r = amdgpu_wb_get(adev, &index); 845 if (r) { 846 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 847 return r; 848 } 849 850 gpu_addr = adev->wb.gpu_addr + (index * 4); 851 tmp = 0xCAFEDEAD; 852 adev->wb.wb[index] = cpu_to_le32(tmp); 853 memset(&ib, 0, sizeof(ib)); 854 r = amdgpu_ib_get(ring, NULL, 256, &ib); 855 if (r) { 856 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 857 goto err0; 858 } 859 860 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 861 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 862 ib.ptr[1] = lower_32_bits(gpu_addr); 863 ib.ptr[2] = upper_32_bits(gpu_addr); 864 ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1); 865 ib.ptr[4] = 0xDEADBEEF; 866 ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 867 ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 868 ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 869 ib.length_dw = 8; 870 871 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, 872 AMDGPU_FENCE_OWNER_UNDEFINED, 873 &f); 874 if (r) 875 goto err1; 876 877 r = fence_wait(f, false); 878 if (r) { 879 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 880 goto err1; 881 } 882 for (i = 0; i < adev->usec_timeout; i++) { 883 tmp = le32_to_cpu(adev->wb.wb[index]); 884 if (tmp == 0xDEADBEEF) 885 break; 886 DRM_UDELAY(1); 887 } 888 if (i < adev->usec_timeout) { 889 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", 890 ring->idx, i); 891 goto err1; 892 } else { 893 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); 894 r = -EINVAL; 895 } 896 err1: 897 fence_put(f); 898 amdgpu_ib_free(adev, &ib); 899 err0: 900 amdgpu_wb_free(adev, index); 901 return r; 902 } 903 904 /** 905 * sdma_v3_0_vm_copy_pte - update PTEs by copying them from the GART 906 * 907 * @ib: indirect buffer to fill with commands 908 * @pe: addr of the page entry 909 * @src: src addr to copy from 910 * @count: number of page entries to update 911 * 912 * Update PTEs by copying them from the GART using sDMA (CIK). 913 */ 914 static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib, 915 uint64_t pe, uint64_t src, 916 unsigned count) 917 { 918 while (count) { 919 unsigned bytes = count * 8; 920 if (bytes > 0x1FFFF8) 921 bytes = 0x1FFFF8; 922 923 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 924 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 925 ib->ptr[ib->length_dw++] = bytes; 926 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 927 ib->ptr[ib->length_dw++] = lower_32_bits(src); 928 ib->ptr[ib->length_dw++] = upper_32_bits(src); 929 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 930 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 931 932 pe += bytes; 933 src += bytes; 934 count -= bytes / 8; 935 } 936 } 937 938 /** 939 * sdma_v3_0_vm_write_pte - update PTEs by writing them manually 940 * 941 * @ib: indirect buffer to fill with commands 942 * @pe: addr of the page entry 943 * @addr: dst addr to write into pe 944 * @count: number of page entries to update 945 * @incr: increase next addr by incr bytes 946 * @flags: access flags 947 * 948 * Update PTEs by writing them manually using sDMA (CIK). 949 */ 950 static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, 951 uint64_t pe, 952 uint64_t addr, unsigned count, 953 uint32_t incr, uint32_t flags) 954 { 955 uint64_t value; 956 unsigned ndw; 957 958 while (count) { 959 ndw = count * 2; 960 if (ndw > 0xFFFFE) 961 ndw = 0xFFFFE; 962 963 /* for non-physically contiguous pages (system) */ 964 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 965 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 966 ib->ptr[ib->length_dw++] = pe; 967 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 968 ib->ptr[ib->length_dw++] = ndw; 969 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 970 if (flags & AMDGPU_PTE_SYSTEM) { 971 value = amdgpu_vm_map_gart(ib->ring->adev, addr); 972 value &= 0xFFFFFFFFFFFFF000ULL; 973 } else if (flags & AMDGPU_PTE_VALID) { 974 value = addr; 975 } else { 976 value = 0; 977 } 978 addr += incr; 979 value |= flags; 980 ib->ptr[ib->length_dw++] = value; 981 ib->ptr[ib->length_dw++] = upper_32_bits(value); 982 } 983 } 984 } 985 986 /** 987 * sdma_v3_0_vm_set_pte_pde - update the page tables using sDMA 988 * 989 * @ib: indirect buffer to fill with commands 990 * @pe: addr of the page entry 991 * @addr: dst addr to write into pe 992 * @count: number of page entries to update 993 * @incr: increase next addr by incr bytes 994 * @flags: access flags 995 * 996 * Update the page tables using sDMA (CIK). 997 */ 998 static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, 999 uint64_t pe, 1000 uint64_t addr, unsigned count, 1001 uint32_t incr, uint32_t flags) 1002 { 1003 uint64_t value; 1004 unsigned ndw; 1005 1006 while (count) { 1007 ndw = count; 1008 if (ndw > 0x7FFFF) 1009 ndw = 0x7FFFF; 1010 1011 if (flags & AMDGPU_PTE_VALID) 1012 value = addr; 1013 else 1014 value = 0; 1015 1016 /* for physically contiguous pages (vram) */ 1017 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); 1018 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 1019 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 1020 ib->ptr[ib->length_dw++] = flags; /* mask */ 1021 ib->ptr[ib->length_dw++] = 0; 1022 ib->ptr[ib->length_dw++] = value; /* value */ 1023 ib->ptr[ib->length_dw++] = upper_32_bits(value); 1024 ib->ptr[ib->length_dw++] = incr; /* increment size */ 1025 ib->ptr[ib->length_dw++] = 0; 1026 ib->ptr[ib->length_dw++] = ndw; /* number of entries */ 1027 1028 pe += ndw * 8; 1029 addr += ndw * incr; 1030 count -= ndw; 1031 } 1032 } 1033 1034 /** 1035 * sdma_v3_0_vm_pad_ib - pad the IB to the required number of dw 1036 * 1037 * @ib: indirect buffer to fill with padding 1038 * 1039 */ 1040 static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib) 1041 { 1042 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring); 1043 u32 pad_count; 1044 int i; 1045 1046 pad_count = (8 - (ib->length_dw & 0x7)) % 8; 1047 for (i = 0; i < pad_count; i++) 1048 if (sdma && sdma->burst_nop && (i == 0)) 1049 ib->ptr[ib->length_dw++] = 1050 SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | 1051 SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); 1052 else 1053 ib->ptr[ib->length_dw++] = 1054 SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 1055 } 1056 1057 /** 1058 * sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA 1059 * 1060 * @ring: amdgpu_ring pointer 1061 * @vm: amdgpu_vm pointer 1062 * 1063 * Update the page table base and flush the VM TLB 1064 * using sDMA (VI). 1065 */ 1066 static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 1067 unsigned vm_id, uint64_t pd_addr) 1068 { 1069 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1070 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1071 if (vm_id < 8) { 1072 amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 1073 } else { 1074 amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 1075 } 1076 amdgpu_ring_write(ring, pd_addr >> 12); 1077 1078 /* flush TLB */ 1079 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1080 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1081 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 1082 amdgpu_ring_write(ring, 1 << vm_id); 1083 1084 /* wait for flush */ 1085 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1086 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1087 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(0)); /* always */ 1088 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); 1089 amdgpu_ring_write(ring, 0); 1090 amdgpu_ring_write(ring, 0); /* reference */ 1091 amdgpu_ring_write(ring, 0); /* mask */ 1092 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1093 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 1094 } 1095 1096 static int sdma_v3_0_early_init(void *handle) 1097 { 1098 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1099 1100 switch (adev->asic_type) { 1101 case CHIP_STONEY: 1102 adev->sdma.num_instances = 1; 1103 break; 1104 default: 1105 adev->sdma.num_instances = SDMA_MAX_INSTANCE; 1106 break; 1107 } 1108 1109 sdma_v3_0_set_ring_funcs(adev); 1110 sdma_v3_0_set_buffer_funcs(adev); 1111 sdma_v3_0_set_vm_pte_funcs(adev); 1112 sdma_v3_0_set_irq_funcs(adev); 1113 1114 return 0; 1115 } 1116 1117 static int sdma_v3_0_sw_init(void *handle) 1118 { 1119 struct amdgpu_ring *ring; 1120 int r, i; 1121 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1122 1123 /* SDMA trap event */ 1124 r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq); 1125 if (r) 1126 return r; 1127 1128 /* SDMA Privileged inst */ 1129 r = amdgpu_irq_add_id(adev, 241, &adev->sdma.illegal_inst_irq); 1130 if (r) 1131 return r; 1132 1133 /* SDMA Privileged inst */ 1134 r = amdgpu_irq_add_id(adev, 247, &adev->sdma.illegal_inst_irq); 1135 if (r) 1136 return r; 1137 1138 r = sdma_v3_0_init_microcode(adev); 1139 if (r) { 1140 DRM_ERROR("Failed to load sdma firmware!\n"); 1141 return r; 1142 } 1143 1144 for (i = 0; i < adev->sdma.num_instances; i++) { 1145 ring = &adev->sdma.instance[i].ring; 1146 ring->ring_obj = NULL; 1147 ring->use_doorbell = true; 1148 ring->doorbell_index = (i == 0) ? 1149 AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; 1150 1151 sprintf(ring->name, "sdma%d", i); 1152 r = amdgpu_ring_init(adev, ring, 256 * 1024, 1153 SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf, 1154 &adev->sdma.trap_irq, 1155 (i == 0) ? 1156 AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP1, 1157 AMDGPU_RING_TYPE_SDMA); 1158 if (r) 1159 return r; 1160 } 1161 1162 return r; 1163 } 1164 1165 static int sdma_v3_0_sw_fini(void *handle) 1166 { 1167 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1168 int i; 1169 1170 for (i = 0; i < adev->sdma.num_instances; i++) 1171 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 1172 1173 return 0; 1174 } 1175 1176 static int sdma_v3_0_hw_init(void *handle) 1177 { 1178 int r; 1179 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1180 1181 sdma_v3_0_init_golden_registers(adev); 1182 1183 r = sdma_v3_0_start(adev); 1184 if (r) 1185 return r; 1186 1187 return r; 1188 } 1189 1190 static int sdma_v3_0_hw_fini(void *handle) 1191 { 1192 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1193 1194 sdma_v3_0_ctx_switch_enable(adev, false); 1195 sdma_v3_0_enable(adev, false); 1196 1197 return 0; 1198 } 1199 1200 static int sdma_v3_0_suspend(void *handle) 1201 { 1202 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1203 1204 return sdma_v3_0_hw_fini(adev); 1205 } 1206 1207 static int sdma_v3_0_resume(void *handle) 1208 { 1209 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1210 1211 return sdma_v3_0_hw_init(adev); 1212 } 1213 1214 static bool sdma_v3_0_is_idle(void *handle) 1215 { 1216 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1217 u32 tmp = RREG32(mmSRBM_STATUS2); 1218 1219 if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK | 1220 SRBM_STATUS2__SDMA1_BUSY_MASK)) 1221 return false; 1222 1223 return true; 1224 } 1225 1226 static int sdma_v3_0_wait_for_idle(void *handle) 1227 { 1228 unsigned i; 1229 u32 tmp; 1230 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1231 1232 for (i = 0; i < adev->usec_timeout; i++) { 1233 tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK | 1234 SRBM_STATUS2__SDMA1_BUSY_MASK); 1235 1236 if (!tmp) 1237 return 0; 1238 udelay(1); 1239 } 1240 return -ETIMEDOUT; 1241 } 1242 1243 static void sdma_v3_0_print_status(void *handle) 1244 { 1245 int i, j; 1246 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1247 1248 dev_info(adev->dev, "VI SDMA registers\n"); 1249 dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", 1250 RREG32(mmSRBM_STATUS2)); 1251 for (i = 0; i < adev->sdma.num_instances; i++) { 1252 dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n", 1253 i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i])); 1254 dev_info(adev->dev, " SDMA%d_F32_CNTL=0x%08X\n", 1255 i, RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i])); 1256 dev_info(adev->dev, " SDMA%d_CNTL=0x%08X\n", 1257 i, RREG32(mmSDMA0_CNTL + sdma_offsets[i])); 1258 dev_info(adev->dev, " SDMA%d_SEM_WAIT_FAIL_TIMER_CNTL=0x%08X\n", 1259 i, RREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i])); 1260 dev_info(adev->dev, " SDMA%d_GFX_IB_CNTL=0x%08X\n", 1261 i, RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i])); 1262 dev_info(adev->dev, " SDMA%d_GFX_RB_CNTL=0x%08X\n", 1263 i, RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i])); 1264 dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR=0x%08X\n", 1265 i, RREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i])); 1266 dev_info(adev->dev, " SDMA%d_GFX_RB_WPTR=0x%08X\n", 1267 i, RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i])); 1268 dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_HI=0x%08X\n", 1269 i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i])); 1270 dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_LO=0x%08X\n", 1271 i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i])); 1272 dev_info(adev->dev, " SDMA%d_GFX_RB_BASE=0x%08X\n", 1273 i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); 1274 dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", 1275 i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); 1276 dev_info(adev->dev, " SDMA%d_GFX_DOORBELL=0x%08X\n", 1277 i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i])); 1278 mutex_lock(&adev->srbm_mutex); 1279 for (j = 0; j < 16; j++) { 1280 vi_srbm_select(adev, 0, 0, 0, j); 1281 dev_info(adev->dev, " VM %d:\n", j); 1282 dev_info(adev->dev, " SDMA%d_GFX_VIRTUAL_ADDR=0x%08X\n", 1283 i, RREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i])); 1284 dev_info(adev->dev, " SDMA%d_GFX_APE1_CNTL=0x%08X\n", 1285 i, RREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i])); 1286 } 1287 vi_srbm_select(adev, 0, 0, 0, 0); 1288 mutex_unlock(&adev->srbm_mutex); 1289 } 1290 } 1291 1292 static int sdma_v3_0_soft_reset(void *handle) 1293 { 1294 u32 srbm_soft_reset = 0; 1295 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1296 u32 tmp = RREG32(mmSRBM_STATUS2); 1297 1298 if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) { 1299 /* sdma0 */ 1300 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); 1301 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0); 1302 WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); 1303 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; 1304 } 1305 if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) { 1306 /* sdma1 */ 1307 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); 1308 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0); 1309 WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); 1310 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; 1311 } 1312 1313 if (srbm_soft_reset) { 1314 sdma_v3_0_print_status((void *)adev); 1315 1316 tmp = RREG32(mmSRBM_SOFT_RESET); 1317 tmp |= srbm_soft_reset; 1318 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 1319 WREG32(mmSRBM_SOFT_RESET, tmp); 1320 tmp = RREG32(mmSRBM_SOFT_RESET); 1321 1322 udelay(50); 1323 1324 tmp &= ~srbm_soft_reset; 1325 WREG32(mmSRBM_SOFT_RESET, tmp); 1326 tmp = RREG32(mmSRBM_SOFT_RESET); 1327 1328 /* Wait a little for things to settle down */ 1329 udelay(50); 1330 1331 sdma_v3_0_print_status((void *)adev); 1332 } 1333 1334 return 0; 1335 } 1336 1337 static int sdma_v3_0_set_trap_irq_state(struct amdgpu_device *adev, 1338 struct amdgpu_irq_src *source, 1339 unsigned type, 1340 enum amdgpu_interrupt_state state) 1341 { 1342 u32 sdma_cntl; 1343 1344 switch (type) { 1345 case AMDGPU_SDMA_IRQ_TRAP0: 1346 switch (state) { 1347 case AMDGPU_IRQ_STATE_DISABLE: 1348 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); 1349 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0); 1350 WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl); 1351 break; 1352 case AMDGPU_IRQ_STATE_ENABLE: 1353 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); 1354 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1); 1355 WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl); 1356 break; 1357 default: 1358 break; 1359 } 1360 break; 1361 case AMDGPU_SDMA_IRQ_TRAP1: 1362 switch (state) { 1363 case AMDGPU_IRQ_STATE_DISABLE: 1364 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); 1365 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0); 1366 WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl); 1367 break; 1368 case AMDGPU_IRQ_STATE_ENABLE: 1369 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); 1370 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1); 1371 WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl); 1372 break; 1373 default: 1374 break; 1375 } 1376 break; 1377 default: 1378 break; 1379 } 1380 return 0; 1381 } 1382 1383 static int sdma_v3_0_process_trap_irq(struct amdgpu_device *adev, 1384 struct amdgpu_irq_src *source, 1385 struct amdgpu_iv_entry *entry) 1386 { 1387 u8 instance_id, queue_id; 1388 1389 instance_id = (entry->ring_id & 0x3) >> 0; 1390 queue_id = (entry->ring_id & 0xc) >> 2; 1391 DRM_DEBUG("IH: SDMA trap\n"); 1392 switch (instance_id) { 1393 case 0: 1394 switch (queue_id) { 1395 case 0: 1396 amdgpu_fence_process(&adev->sdma.instance[0].ring); 1397 break; 1398 case 1: 1399 /* XXX compute */ 1400 break; 1401 case 2: 1402 /* XXX compute */ 1403 break; 1404 } 1405 break; 1406 case 1: 1407 switch (queue_id) { 1408 case 0: 1409 amdgpu_fence_process(&adev->sdma.instance[1].ring); 1410 break; 1411 case 1: 1412 /* XXX compute */ 1413 break; 1414 case 2: 1415 /* XXX compute */ 1416 break; 1417 } 1418 break; 1419 } 1420 return 0; 1421 } 1422 1423 static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev, 1424 struct amdgpu_irq_src *source, 1425 struct amdgpu_iv_entry *entry) 1426 { 1427 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 1428 schedule_work(&adev->reset_work); 1429 return 0; 1430 } 1431 1432 static void fiji_update_sdma_medium_grain_clock_gating( 1433 struct amdgpu_device *adev, 1434 bool enable) 1435 { 1436 uint32_t temp, data; 1437 1438 if (enable) { 1439 temp = data = RREG32(mmSDMA0_CLK_CTRL); 1440 data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1441 SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1442 SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | 1443 SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1444 SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1445 SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1446 SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1447 SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); 1448 if (data != temp) 1449 WREG32(mmSDMA0_CLK_CTRL, data); 1450 1451 temp = data = RREG32(mmSDMA1_CLK_CTRL); 1452 data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1453 SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1454 SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | 1455 SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1456 SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1457 SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1458 SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1459 SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); 1460 1461 if (data != temp) 1462 WREG32(mmSDMA1_CLK_CTRL, data); 1463 } else { 1464 temp = data = RREG32(mmSDMA0_CLK_CTRL); 1465 data |= SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1466 SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1467 SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | 1468 SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1469 SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1470 SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1471 SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1472 SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK; 1473 1474 if (data != temp) 1475 WREG32(mmSDMA0_CLK_CTRL, data); 1476 1477 temp = data = RREG32(mmSDMA1_CLK_CTRL); 1478 data |= SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1479 SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1480 SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | 1481 SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1482 SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1483 SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1484 SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1485 SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK; 1486 1487 if (data != temp) 1488 WREG32(mmSDMA1_CLK_CTRL, data); 1489 } 1490 } 1491 1492 static void fiji_update_sdma_medium_grain_light_sleep( 1493 struct amdgpu_device *adev, 1494 bool enable) 1495 { 1496 uint32_t temp, data; 1497 1498 if (enable) { 1499 temp = data = RREG32(mmSDMA0_POWER_CNTL); 1500 data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1501 1502 if (temp != data) 1503 WREG32(mmSDMA0_POWER_CNTL, data); 1504 1505 temp = data = RREG32(mmSDMA1_POWER_CNTL); 1506 data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1507 1508 if (temp != data) 1509 WREG32(mmSDMA1_POWER_CNTL, data); 1510 } else { 1511 temp = data = RREG32(mmSDMA0_POWER_CNTL); 1512 data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1513 1514 if (temp != data) 1515 WREG32(mmSDMA0_POWER_CNTL, data); 1516 1517 temp = data = RREG32(mmSDMA1_POWER_CNTL); 1518 data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1519 1520 if (temp != data) 1521 WREG32(mmSDMA1_POWER_CNTL, data); 1522 } 1523 } 1524 1525 static int sdma_v3_0_set_clockgating_state(void *handle, 1526 enum amd_clockgating_state state) 1527 { 1528 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1529 1530 switch (adev->asic_type) { 1531 case CHIP_FIJI: 1532 fiji_update_sdma_medium_grain_clock_gating(adev, 1533 state == AMD_CG_STATE_GATE ? true : false); 1534 fiji_update_sdma_medium_grain_light_sleep(adev, 1535 state == AMD_CG_STATE_GATE ? true : false); 1536 break; 1537 default: 1538 break; 1539 } 1540 return 0; 1541 } 1542 1543 static int sdma_v3_0_set_powergating_state(void *handle, 1544 enum amd_powergating_state state) 1545 { 1546 return 0; 1547 } 1548 1549 const struct amd_ip_funcs sdma_v3_0_ip_funcs = { 1550 .early_init = sdma_v3_0_early_init, 1551 .late_init = NULL, 1552 .sw_init = sdma_v3_0_sw_init, 1553 .sw_fini = sdma_v3_0_sw_fini, 1554 .hw_init = sdma_v3_0_hw_init, 1555 .hw_fini = sdma_v3_0_hw_fini, 1556 .suspend = sdma_v3_0_suspend, 1557 .resume = sdma_v3_0_resume, 1558 .is_idle = sdma_v3_0_is_idle, 1559 .wait_for_idle = sdma_v3_0_wait_for_idle, 1560 .soft_reset = sdma_v3_0_soft_reset, 1561 .print_status = sdma_v3_0_print_status, 1562 .set_clockgating_state = sdma_v3_0_set_clockgating_state, 1563 .set_powergating_state = sdma_v3_0_set_powergating_state, 1564 }; 1565 1566 static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { 1567 .get_rptr = sdma_v3_0_ring_get_rptr, 1568 .get_wptr = sdma_v3_0_ring_get_wptr, 1569 .set_wptr = sdma_v3_0_ring_set_wptr, 1570 .parse_cs = NULL, 1571 .emit_ib = sdma_v3_0_ring_emit_ib, 1572 .emit_fence = sdma_v3_0_ring_emit_fence, 1573 .emit_semaphore = sdma_v3_0_ring_emit_semaphore, 1574 .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush, 1575 .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush, 1576 .test_ring = sdma_v3_0_ring_test_ring, 1577 .test_ib = sdma_v3_0_ring_test_ib, 1578 .insert_nop = sdma_v3_0_ring_insert_nop, 1579 }; 1580 1581 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) 1582 { 1583 int i; 1584 1585 for (i = 0; i < adev->sdma.num_instances; i++) 1586 adev->sdma.instance[i].ring.funcs = &sdma_v3_0_ring_funcs; 1587 } 1588 1589 static const struct amdgpu_irq_src_funcs sdma_v3_0_trap_irq_funcs = { 1590 .set = sdma_v3_0_set_trap_irq_state, 1591 .process = sdma_v3_0_process_trap_irq, 1592 }; 1593 1594 static const struct amdgpu_irq_src_funcs sdma_v3_0_illegal_inst_irq_funcs = { 1595 .process = sdma_v3_0_process_illegal_inst_irq, 1596 }; 1597 1598 static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) 1599 { 1600 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 1601 adev->sdma.trap_irq.funcs = &sdma_v3_0_trap_irq_funcs; 1602 adev->sdma.illegal_inst_irq.funcs = &sdma_v3_0_illegal_inst_irq_funcs; 1603 } 1604 1605 /** 1606 * sdma_v3_0_emit_copy_buffer - copy buffer using the sDMA engine 1607 * 1608 * @ring: amdgpu_ring structure holding ring information 1609 * @src_offset: src GPU address 1610 * @dst_offset: dst GPU address 1611 * @byte_count: number of bytes to xfer 1612 * 1613 * Copy GPU buffers using the DMA engine (VI). 1614 * Used by the amdgpu ttm implementation to move pages if 1615 * registered as the asic copy callback. 1616 */ 1617 static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, 1618 uint64_t src_offset, 1619 uint64_t dst_offset, 1620 uint32_t byte_count) 1621 { 1622 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1623 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 1624 ib->ptr[ib->length_dw++] = byte_count; 1625 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1626 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 1627 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 1628 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1629 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1630 } 1631 1632 /** 1633 * sdma_v3_0_emit_fill_buffer - fill buffer using the sDMA engine 1634 * 1635 * @ring: amdgpu_ring structure holding ring information 1636 * @src_data: value to write to buffer 1637 * @dst_offset: dst GPU address 1638 * @byte_count: number of bytes to xfer 1639 * 1640 * Fill GPU buffers using the DMA engine (VI). 1641 */ 1642 static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib *ib, 1643 uint32_t src_data, 1644 uint64_t dst_offset, 1645 uint32_t byte_count) 1646 { 1647 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); 1648 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1649 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1650 ib->ptr[ib->length_dw++] = src_data; 1651 ib->ptr[ib->length_dw++] = byte_count; 1652 } 1653 1654 static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = { 1655 .copy_max_bytes = 0x1fffff, 1656 .copy_num_dw = 7, 1657 .emit_copy_buffer = sdma_v3_0_emit_copy_buffer, 1658 1659 .fill_max_bytes = 0x1fffff, 1660 .fill_num_dw = 5, 1661 .emit_fill_buffer = sdma_v3_0_emit_fill_buffer, 1662 }; 1663 1664 static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev) 1665 { 1666 if (adev->mman.buffer_funcs == NULL) { 1667 adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs; 1668 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 1669 } 1670 } 1671 1672 static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { 1673 .copy_pte = sdma_v3_0_vm_copy_pte, 1674 .write_pte = sdma_v3_0_vm_write_pte, 1675 .set_pte_pde = sdma_v3_0_vm_set_pte_pde, 1676 .pad_ib = sdma_v3_0_vm_pad_ib, 1677 }; 1678 1679 static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) 1680 { 1681 if (adev->vm_manager.vm_pte_funcs == NULL) { 1682 adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; 1683 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; 1684 adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; 1685 } 1686 } 1687