1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_ucode.h" 28 #include "amdgpu_trace.h" 29 #include "vi.h" 30 #include "vid.h" 31 32 #include "oss/oss_3_0_d.h" 33 #include "oss/oss_3_0_sh_mask.h" 34 35 #include "gmc/gmc_8_1_d.h" 36 #include "gmc/gmc_8_1_sh_mask.h" 37 38 #include "gca/gfx_8_0_d.h" 39 #include "gca/gfx_8_0_enum.h" 40 #include "gca/gfx_8_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 45 #include "tonga_sdma_pkt_open.h" 46 47 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev); 48 static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev); 49 static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev); 50 static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev); 51 52 MODULE_FIRMWARE("amdgpu/tonga_sdma.bin"); 53 MODULE_FIRMWARE("amdgpu/tonga_sdma1.bin"); 54 MODULE_FIRMWARE("amdgpu/carrizo_sdma.bin"); 55 MODULE_FIRMWARE("amdgpu/carrizo_sdma1.bin"); 56 MODULE_FIRMWARE("amdgpu/fiji_sdma.bin"); 57 MODULE_FIRMWARE("amdgpu/fiji_sdma1.bin"); 58 MODULE_FIRMWARE("amdgpu/stoney_sdma.bin"); 59 MODULE_FIRMWARE("amdgpu/polaris10_sdma.bin"); 60 MODULE_FIRMWARE("amdgpu/polaris10_sdma1.bin"); 61 MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin"); 62 MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin"); 63 64 65 static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 66 { 67 SDMA0_REGISTER_OFFSET, 68 SDMA1_REGISTER_OFFSET 69 }; 70 71 static const u32 golden_settings_tonga_a11[] = 72 { 73 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 74 mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, 75 mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100, 76 mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 77 mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 78 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 79 mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, 80 mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100, 81 mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 82 mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 83 }; 84 85 static const u32 tonga_mgcg_cgcg_init[] = 86 { 87 mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, 88 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 89 }; 90 91 static const u32 golden_settings_fiji_a10[] = 92 { 93 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 94 mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100, 95 mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 96 mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 97 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 98 mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100, 99 mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 100 mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 101 }; 102 103 static const u32 fiji_mgcg_cgcg_init[] = 104 { 105 mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, 106 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 107 }; 108 109 static const u32 golden_settings_polaris11_a11[] = 110 { 111 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 112 mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, 113 mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100, 114 mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 115 mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 116 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 117 mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, 118 mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100, 119 mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 120 mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 121 }; 122 123 static const u32 golden_settings_polaris10_a11[] = 124 { 125 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 126 mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, 127 mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100, 128 mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 129 mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 130 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 131 mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, 132 mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100, 133 mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100, 134 mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100, 135 }; 136 137 static const u32 cz_golden_settings_a11[] = 138 { 139 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 140 mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, 141 mmSDMA0_GFX_IB_CNTL, 0x00000100, 0x00000100, 142 mmSDMA0_POWER_CNTL, 0x00000800, 0x0003c800, 143 mmSDMA0_RLC0_IB_CNTL, 0x00000100, 0x00000100, 144 mmSDMA0_RLC1_IB_CNTL, 0x00000100, 0x00000100, 145 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 146 mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, 147 mmSDMA1_GFX_IB_CNTL, 0x00000100, 0x00000100, 148 mmSDMA1_POWER_CNTL, 0x00000800, 0x0003c800, 149 mmSDMA1_RLC0_IB_CNTL, 0x00000100, 0x00000100, 150 mmSDMA1_RLC1_IB_CNTL, 0x00000100, 0x00000100, 151 }; 152 153 static const u32 cz_mgcg_cgcg_init[] = 154 { 155 mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, 156 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 157 }; 158 159 static const u32 stoney_golden_settings_a11[] = 160 { 161 mmSDMA0_GFX_IB_CNTL, 0x00000100, 0x00000100, 162 mmSDMA0_POWER_CNTL, 0x00000800, 0x0003c800, 163 mmSDMA0_RLC0_IB_CNTL, 0x00000100, 0x00000100, 164 mmSDMA0_RLC1_IB_CNTL, 0x00000100, 0x00000100, 165 }; 166 167 static const u32 stoney_mgcg_cgcg_init[] = 168 { 169 mmSDMA0_CLK_CTRL, 0xffffffff, 0x00000100, 170 }; 171 172 /* 173 * sDMA - System DMA 174 * Starting with CIK, the GPU has new asynchronous 175 * DMA engines. These engines are used for compute 176 * and gfx. There are two DMA engines (SDMA0, SDMA1) 177 * and each one supports 1 ring buffer used for gfx 178 * and 2 queues used for compute. 179 * 180 * The programming model is very similar to the CP 181 * (ring buffer, IBs, etc.), but sDMA has it's own 182 * packet format that is different from the PM4 format 183 * used by the CP. sDMA supports copying data, writing 184 * embedded data, solid fills, and a number of other 185 * things. It also has support for tiling/detiling of 186 * buffers. 187 */ 188 189 static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) 190 { 191 switch (adev->asic_type) { 192 case CHIP_FIJI: 193 amdgpu_program_register_sequence(adev, 194 fiji_mgcg_cgcg_init, 195 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 196 amdgpu_program_register_sequence(adev, 197 golden_settings_fiji_a10, 198 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 199 break; 200 case CHIP_TONGA: 201 amdgpu_program_register_sequence(adev, 202 tonga_mgcg_cgcg_init, 203 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 204 amdgpu_program_register_sequence(adev, 205 golden_settings_tonga_a11, 206 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 207 break; 208 case CHIP_POLARIS11: 209 amdgpu_program_register_sequence(adev, 210 golden_settings_polaris11_a11, 211 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 212 break; 213 case CHIP_POLARIS10: 214 amdgpu_program_register_sequence(adev, 215 golden_settings_polaris10_a11, 216 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 217 break; 218 case CHIP_CARRIZO: 219 amdgpu_program_register_sequence(adev, 220 cz_mgcg_cgcg_init, 221 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 222 amdgpu_program_register_sequence(adev, 223 cz_golden_settings_a11, 224 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 225 break; 226 case CHIP_STONEY: 227 amdgpu_program_register_sequence(adev, 228 stoney_mgcg_cgcg_init, 229 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 230 amdgpu_program_register_sequence(adev, 231 stoney_golden_settings_a11, 232 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 233 break; 234 default: 235 break; 236 } 237 } 238 239 /** 240 * sdma_v3_0_init_microcode - load ucode images from disk 241 * 242 * @adev: amdgpu_device pointer 243 * 244 * Use the firmware interface to load the ucode images into 245 * the driver (not loaded into hw). 246 * Returns 0 on success, error on failure. 247 */ 248 static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) 249 { 250 const char *chip_name; 251 char fw_name[30]; 252 int err = 0, i; 253 struct amdgpu_firmware_info *info = NULL; 254 const struct common_firmware_header *header = NULL; 255 const struct sdma_firmware_header_v1_0 *hdr; 256 257 DRM_DEBUG("\n"); 258 259 switch (adev->asic_type) { 260 case CHIP_TONGA: 261 chip_name = "tonga"; 262 break; 263 case CHIP_FIJI: 264 chip_name = "fiji"; 265 break; 266 case CHIP_POLARIS11: 267 chip_name = "polaris11"; 268 break; 269 case CHIP_POLARIS10: 270 chip_name = "polaris10"; 271 break; 272 case CHIP_CARRIZO: 273 chip_name = "carrizo"; 274 break; 275 case CHIP_STONEY: 276 chip_name = "stoney"; 277 break; 278 default: BUG(); 279 } 280 281 for (i = 0; i < adev->sdma.num_instances; i++) { 282 if (i == 0) 283 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); 284 else 285 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); 286 err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); 287 if (err) 288 goto out; 289 err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); 290 if (err) 291 goto out; 292 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 293 adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 294 adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 295 if (adev->sdma.instance[i].feature_version >= 20) 296 adev->sdma.instance[i].burst_nop = true; 297 298 if (adev->firmware.smu_load) { 299 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; 300 info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; 301 info->fw = adev->sdma.instance[i].fw; 302 header = (const struct common_firmware_header *)info->fw->data; 303 adev->firmware.fw_size += 304 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 305 } 306 } 307 out: 308 if (err) { 309 printk(KERN_ERR 310 "sdma_v3_0: Failed to load firmware \"%s\"\n", 311 fw_name); 312 for (i = 0; i < adev->sdma.num_instances; i++) { 313 release_firmware(adev->sdma.instance[i].fw); 314 adev->sdma.instance[i].fw = NULL; 315 } 316 } 317 return err; 318 } 319 320 /** 321 * sdma_v3_0_ring_get_rptr - get the current read pointer 322 * 323 * @ring: amdgpu ring pointer 324 * 325 * Get the current rptr from the hardware (VI+). 326 */ 327 static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) 328 { 329 u32 rptr; 330 331 /* XXX check if swapping is necessary on BE */ 332 rptr = ring->adev->wb.wb[ring->rptr_offs] >> 2; 333 334 return rptr; 335 } 336 337 /** 338 * sdma_v3_0_ring_get_wptr - get the current write pointer 339 * 340 * @ring: amdgpu ring pointer 341 * 342 * Get the current wptr from the hardware (VI+). 343 */ 344 static uint32_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) 345 { 346 struct amdgpu_device *adev = ring->adev; 347 u32 wptr; 348 349 if (ring->use_doorbell) { 350 /* XXX check if swapping is necessary on BE */ 351 wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; 352 } else { 353 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; 354 355 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2; 356 } 357 358 return wptr; 359 } 360 361 /** 362 * sdma_v3_0_ring_set_wptr - commit the write pointer 363 * 364 * @ring: amdgpu ring pointer 365 * 366 * Write the wptr back to the hardware (VI+). 367 */ 368 static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) 369 { 370 struct amdgpu_device *adev = ring->adev; 371 372 if (ring->use_doorbell) { 373 /* XXX check if swapping is necessary on BE */ 374 adev->wb.wb[ring->wptr_offs] = ring->wptr << 2; 375 WDOORBELL32(ring->doorbell_index, ring->wptr << 2); 376 } else { 377 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; 378 379 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); 380 } 381 } 382 383 static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 384 { 385 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 386 int i; 387 388 for (i = 0; i < count; i++) 389 if (sdma && sdma->burst_nop && (i == 0)) 390 amdgpu_ring_write(ring, ring->nop | 391 SDMA_PKT_NOP_HEADER_COUNT(count - 1)); 392 else 393 amdgpu_ring_write(ring, ring->nop); 394 } 395 396 /** 397 * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine 398 * 399 * @ring: amdgpu ring pointer 400 * @ib: IB object to schedule 401 * 402 * Schedule an IB in the DMA ring (VI). 403 */ 404 static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, 405 struct amdgpu_ib *ib, 406 unsigned vm_id, bool ctx_switch) 407 { 408 u32 vmid = vm_id & 0xf; 409 u32 next_rptr = ring->wptr + 5; 410 411 while ((next_rptr & 7) != 2) 412 next_rptr++; 413 next_rptr += 6; 414 415 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 416 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 417 amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc); 418 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr)); 419 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); 420 amdgpu_ring_write(ring, next_rptr); 421 422 /* IB packet must end on a 8 DW boundary */ 423 sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); 424 425 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 426 SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); 427 /* base must be 32 byte aligned */ 428 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); 429 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 430 amdgpu_ring_write(ring, ib->length_dw); 431 amdgpu_ring_write(ring, 0); 432 amdgpu_ring_write(ring, 0); 433 434 } 435 436 /** 437 * sdma_v3_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring 438 * 439 * @ring: amdgpu ring pointer 440 * 441 * Emit an hdp flush packet on the requested DMA ring. 442 */ 443 static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 444 { 445 u32 ref_and_mask = 0; 446 447 if (ring == &ring->adev->sdma.instance[0].ring) 448 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1); 449 else 450 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1); 451 452 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 453 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | 454 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ 455 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2); 456 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2); 457 amdgpu_ring_write(ring, ref_and_mask); /* reference */ 458 amdgpu_ring_write(ring, ref_and_mask); /* mask */ 459 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 460 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 461 } 462 463 static void sdma_v3_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 464 { 465 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 466 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 467 amdgpu_ring_write(ring, mmHDP_DEBUG0); 468 amdgpu_ring_write(ring, 1); 469 } 470 471 /** 472 * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring 473 * 474 * @ring: amdgpu ring pointer 475 * @fence: amdgpu fence object 476 * 477 * Add a DMA fence packet to the ring to write 478 * the fence seq number and DMA trap packet to generate 479 * an interrupt if needed (VI). 480 */ 481 static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 482 unsigned flags) 483 { 484 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 485 /* write the fence */ 486 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); 487 amdgpu_ring_write(ring, lower_32_bits(addr)); 488 amdgpu_ring_write(ring, upper_32_bits(addr)); 489 amdgpu_ring_write(ring, lower_32_bits(seq)); 490 491 /* optionally write high bits as well */ 492 if (write64bit) { 493 addr += 4; 494 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); 495 amdgpu_ring_write(ring, lower_32_bits(addr)); 496 amdgpu_ring_write(ring, upper_32_bits(addr)); 497 amdgpu_ring_write(ring, upper_32_bits(seq)); 498 } 499 500 /* generate an interrupt */ 501 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); 502 amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); 503 } 504 505 unsigned init_cond_exec(struct amdgpu_ring *ring) 506 { 507 unsigned ret; 508 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); 509 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 510 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 511 amdgpu_ring_write(ring, 1); 512 ret = ring->wptr;/* this is the offset we need patch later */ 513 amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ 514 return ret; 515 } 516 517 void patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 518 { 519 unsigned cur; 520 BUG_ON(ring->ring[offset] != 0x55aa55aa); 521 522 cur = ring->wptr - 1; 523 if (likely(cur > offset)) 524 ring->ring[offset] = cur - offset; 525 else 526 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 527 } 528 529 530 /** 531 * sdma_v3_0_gfx_stop - stop the gfx async dma engines 532 * 533 * @adev: amdgpu_device pointer 534 * 535 * Stop the gfx async dma ring buffers (VI). 536 */ 537 static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) 538 { 539 struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; 540 struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; 541 u32 rb_cntl, ib_cntl; 542 int i; 543 544 if ((adev->mman.buffer_funcs_ring == sdma0) || 545 (adev->mman.buffer_funcs_ring == sdma1)) 546 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 547 548 for (i = 0; i < adev->sdma.num_instances; i++) { 549 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 550 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); 551 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 552 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]); 553 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); 554 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 555 } 556 sdma0->ready = false; 557 sdma1->ready = false; 558 } 559 560 /** 561 * sdma_v3_0_rlc_stop - stop the compute async dma engines 562 * 563 * @adev: amdgpu_device pointer 564 * 565 * Stop the compute async dma queues (VI). 566 */ 567 static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev) 568 { 569 /* XXX todo */ 570 } 571 572 /** 573 * sdma_v3_0_ctx_switch_enable - stop the async dma engines context switch 574 * 575 * @adev: amdgpu_device pointer 576 * @enable: enable/disable the DMA MEs context switch. 577 * 578 * Halt or unhalt the async dma engines context switch (VI). 579 */ 580 static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) 581 { 582 u32 f32_cntl; 583 int i; 584 585 for (i = 0; i < adev->sdma.num_instances; i++) { 586 f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); 587 if (enable) 588 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, 589 AUTO_CTXSW_ENABLE, 1); 590 else 591 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, 592 AUTO_CTXSW_ENABLE, 0); 593 WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl); 594 } 595 } 596 597 /** 598 * sdma_v3_0_enable - stop the async dma engines 599 * 600 * @adev: amdgpu_device pointer 601 * @enable: enable/disable the DMA MEs. 602 * 603 * Halt or unhalt the async dma engines (VI). 604 */ 605 static void sdma_v3_0_enable(struct amdgpu_device *adev, bool enable) 606 { 607 u32 f32_cntl; 608 int i; 609 610 if (enable == false) { 611 sdma_v3_0_gfx_stop(adev); 612 sdma_v3_0_rlc_stop(adev); 613 } 614 615 for (i = 0; i < adev->sdma.num_instances; i++) { 616 f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]); 617 if (enable) 618 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0); 619 else 620 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1); 621 WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], f32_cntl); 622 } 623 } 624 625 /** 626 * sdma_v3_0_gfx_resume - setup and start the async dma engines 627 * 628 * @adev: amdgpu_device pointer 629 * 630 * Set up the gfx DMA ring buffers and enable them (VI). 631 * Returns 0 for success, error for failure. 632 */ 633 static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) 634 { 635 struct amdgpu_ring *ring; 636 u32 rb_cntl, ib_cntl; 637 u32 rb_bufsz; 638 u32 wb_offset; 639 u32 doorbell; 640 int i, j, r; 641 642 for (i = 0; i < adev->sdma.num_instances; i++) { 643 ring = &adev->sdma.instance[i].ring; 644 wb_offset = (ring->rptr_offs * 4); 645 646 mutex_lock(&adev->srbm_mutex); 647 for (j = 0; j < 16; j++) { 648 vi_srbm_select(adev, 0, 0, 0, j); 649 /* SDMA GFX */ 650 WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0); 651 WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0); 652 } 653 vi_srbm_select(adev, 0, 0, 0, 0); 654 mutex_unlock(&adev->srbm_mutex); 655 656 WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i], 657 adev->gfx.config.gb_addr_config & 0x70); 658 659 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 660 661 /* Set ring buffer size in dwords */ 662 rb_bufsz = order_base_2(ring->ring_size / 4); 663 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 664 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); 665 #ifdef __BIG_ENDIAN 666 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); 667 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, 668 RPTR_WRITEBACK_SWAP_ENABLE, 1); 669 #endif 670 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 671 672 /* Initialize the ring buffer's read and write pointers */ 673 WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); 674 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); 675 676 /* set the wb address whether it's enabled or not */ 677 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], 678 upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); 679 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], 680 lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); 681 682 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); 683 684 WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 685 WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); 686 687 ring->wptr = 0; 688 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); 689 690 doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]); 691 692 if (ring->use_doorbell) { 693 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, 694 OFFSET, ring->doorbell_index); 695 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); 696 } else { 697 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); 698 } 699 WREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i], doorbell); 700 701 /* enable DMA RB */ 702 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); 703 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 704 705 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]); 706 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); 707 #ifdef __BIG_ENDIAN 708 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); 709 #endif 710 /* enable DMA IBs */ 711 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 712 713 ring->ready = true; 714 715 r = amdgpu_ring_test_ring(ring); 716 if (r) { 717 ring->ready = false; 718 return r; 719 } 720 721 if (adev->mman.buffer_funcs_ring == ring) 722 amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); 723 } 724 725 return 0; 726 } 727 728 /** 729 * sdma_v3_0_rlc_resume - setup and start the async dma engines 730 * 731 * @adev: amdgpu_device pointer 732 * 733 * Set up the compute DMA queues and enable them (VI). 734 * Returns 0 for success, error for failure. 735 */ 736 static int sdma_v3_0_rlc_resume(struct amdgpu_device *adev) 737 { 738 /* XXX todo */ 739 return 0; 740 } 741 742 /** 743 * sdma_v3_0_load_microcode - load the sDMA ME ucode 744 * 745 * @adev: amdgpu_device pointer 746 * 747 * Loads the sDMA0/1 ucode. 748 * Returns 0 for success, -EINVAL if the ucode is not available. 749 */ 750 static int sdma_v3_0_load_microcode(struct amdgpu_device *adev) 751 { 752 const struct sdma_firmware_header_v1_0 *hdr; 753 const __le32 *fw_data; 754 u32 fw_size; 755 int i, j; 756 757 /* halt the MEs */ 758 sdma_v3_0_enable(adev, false); 759 760 for (i = 0; i < adev->sdma.num_instances; i++) { 761 if (!adev->sdma.instance[i].fw) 762 return -EINVAL; 763 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 764 amdgpu_ucode_print_sdma_hdr(&hdr->header); 765 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 766 fw_data = (const __le32 *) 767 (adev->sdma.instance[i].fw->data + 768 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 769 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); 770 for (j = 0; j < fw_size; j++) 771 WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++)); 772 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version); 773 } 774 775 return 0; 776 } 777 778 /** 779 * sdma_v3_0_start - setup and start the async dma engines 780 * 781 * @adev: amdgpu_device pointer 782 * 783 * Set up the DMA engines and enable them (VI). 784 * Returns 0 for success, error for failure. 785 */ 786 static int sdma_v3_0_start(struct amdgpu_device *adev) 787 { 788 int r, i; 789 790 if (!adev->pp_enabled) { 791 if (!adev->firmware.smu_load) { 792 r = sdma_v3_0_load_microcode(adev); 793 if (r) 794 return r; 795 } else { 796 for (i = 0; i < adev->sdma.num_instances; i++) { 797 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 798 (i == 0) ? 799 AMDGPU_UCODE_ID_SDMA0 : 800 AMDGPU_UCODE_ID_SDMA1); 801 if (r) 802 return -EINVAL; 803 } 804 } 805 } 806 807 /* unhalt the MEs */ 808 sdma_v3_0_enable(adev, true); 809 /* enable sdma ring preemption */ 810 sdma_v3_0_ctx_switch_enable(adev, true); 811 812 /* start the gfx rings and rlc compute queues */ 813 r = sdma_v3_0_gfx_resume(adev); 814 if (r) 815 return r; 816 r = sdma_v3_0_rlc_resume(adev); 817 if (r) 818 return r; 819 820 return 0; 821 } 822 823 /** 824 * sdma_v3_0_ring_test_ring - simple async dma engine test 825 * 826 * @ring: amdgpu_ring structure holding ring information 827 * 828 * Test the DMA engine by writing using it to write an 829 * value to memory. (VI). 830 * Returns 0 for success, error for failure. 831 */ 832 static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) 833 { 834 struct amdgpu_device *adev = ring->adev; 835 unsigned i; 836 unsigned index; 837 int r; 838 u32 tmp; 839 u64 gpu_addr; 840 841 r = amdgpu_wb_get(adev, &index); 842 if (r) { 843 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 844 return r; 845 } 846 847 gpu_addr = adev->wb.gpu_addr + (index * 4); 848 tmp = 0xCAFEDEAD; 849 adev->wb.wb[index] = cpu_to_le32(tmp); 850 851 r = amdgpu_ring_alloc(ring, 5); 852 if (r) { 853 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 854 amdgpu_wb_free(adev, index); 855 return r; 856 } 857 858 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 859 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 860 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 861 amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 862 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); 863 amdgpu_ring_write(ring, 0xDEADBEEF); 864 amdgpu_ring_commit(ring); 865 866 for (i = 0; i < adev->usec_timeout; i++) { 867 tmp = le32_to_cpu(adev->wb.wb[index]); 868 if (tmp == 0xDEADBEEF) 869 break; 870 DRM_UDELAY(1); 871 } 872 873 if (i < adev->usec_timeout) { 874 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 875 } else { 876 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", 877 ring->idx, tmp); 878 r = -EINVAL; 879 } 880 amdgpu_wb_free(adev, index); 881 882 return r; 883 } 884 885 /** 886 * sdma_v3_0_ring_test_ib - test an IB on the DMA engine 887 * 888 * @ring: amdgpu_ring structure holding ring information 889 * 890 * Test a simple IB in the DMA ring (VI). 891 * Returns 0 on success, error on failure. 892 */ 893 static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) 894 { 895 struct amdgpu_device *adev = ring->adev; 896 struct amdgpu_ib ib; 897 struct fence *f = NULL; 898 unsigned i; 899 unsigned index; 900 int r; 901 u32 tmp = 0; 902 u64 gpu_addr; 903 904 r = amdgpu_wb_get(adev, &index); 905 if (r) { 906 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 907 return r; 908 } 909 910 gpu_addr = adev->wb.gpu_addr + (index * 4); 911 tmp = 0xCAFEDEAD; 912 adev->wb.wb[index] = cpu_to_le32(tmp); 913 memset(&ib, 0, sizeof(ib)); 914 r = amdgpu_ib_get(adev, NULL, 256, &ib); 915 if (r) { 916 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 917 goto err0; 918 } 919 920 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 921 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 922 ib.ptr[1] = lower_32_bits(gpu_addr); 923 ib.ptr[2] = upper_32_bits(gpu_addr); 924 ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1); 925 ib.ptr[4] = 0xDEADBEEF; 926 ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 927 ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 928 ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 929 ib.length_dw = 8; 930 931 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 932 if (r) 933 goto err1; 934 935 r = fence_wait(f, false); 936 if (r) { 937 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 938 goto err1; 939 } 940 for (i = 0; i < adev->usec_timeout; i++) { 941 tmp = le32_to_cpu(adev->wb.wb[index]); 942 if (tmp == 0xDEADBEEF) 943 break; 944 DRM_UDELAY(1); 945 } 946 if (i < adev->usec_timeout) { 947 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", 948 ring->idx, i); 949 goto err1; 950 } else { 951 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); 952 r = -EINVAL; 953 } 954 err1: 955 fence_put(f); 956 amdgpu_ib_free(adev, &ib, NULL); 957 fence_put(f); 958 err0: 959 amdgpu_wb_free(adev, index); 960 return r; 961 } 962 963 /** 964 * sdma_v3_0_vm_copy_pte - update PTEs by copying them from the GART 965 * 966 * @ib: indirect buffer to fill with commands 967 * @pe: addr of the page entry 968 * @src: src addr to copy from 969 * @count: number of page entries to update 970 * 971 * Update PTEs by copying them from the GART using sDMA (CIK). 972 */ 973 static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib, 974 uint64_t pe, uint64_t src, 975 unsigned count) 976 { 977 while (count) { 978 unsigned bytes = count * 8; 979 if (bytes > 0x1FFFF8) 980 bytes = 0x1FFFF8; 981 982 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 983 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 984 ib->ptr[ib->length_dw++] = bytes; 985 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 986 ib->ptr[ib->length_dw++] = lower_32_bits(src); 987 ib->ptr[ib->length_dw++] = upper_32_bits(src); 988 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 989 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 990 991 pe += bytes; 992 src += bytes; 993 count -= bytes / 8; 994 } 995 } 996 997 /** 998 * sdma_v3_0_vm_write_pte - update PTEs by writing them manually 999 * 1000 * @ib: indirect buffer to fill with commands 1001 * @pe: addr of the page entry 1002 * @addr: dst addr to write into pe 1003 * @count: number of page entries to update 1004 * @incr: increase next addr by incr bytes 1005 * @flags: access flags 1006 * 1007 * Update PTEs by writing them manually using sDMA (CIK). 1008 */ 1009 static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, 1010 const dma_addr_t *pages_addr, uint64_t pe, 1011 uint64_t addr, unsigned count, 1012 uint32_t incr, uint32_t flags) 1013 { 1014 uint64_t value; 1015 unsigned ndw; 1016 1017 while (count) { 1018 ndw = count * 2; 1019 if (ndw > 0xFFFFE) 1020 ndw = 0xFFFFE; 1021 1022 /* for non-physically contiguous pages (system) */ 1023 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 1024 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 1025 ib->ptr[ib->length_dw++] = pe; 1026 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 1027 ib->ptr[ib->length_dw++] = ndw; 1028 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 1029 value = amdgpu_vm_map_gart(pages_addr, addr); 1030 addr += incr; 1031 value |= flags; 1032 ib->ptr[ib->length_dw++] = value; 1033 ib->ptr[ib->length_dw++] = upper_32_bits(value); 1034 } 1035 } 1036 } 1037 1038 /** 1039 * sdma_v3_0_vm_set_pte_pde - update the page tables using sDMA 1040 * 1041 * @ib: indirect buffer to fill with commands 1042 * @pe: addr of the page entry 1043 * @addr: dst addr to write into pe 1044 * @count: number of page entries to update 1045 * @incr: increase next addr by incr bytes 1046 * @flags: access flags 1047 * 1048 * Update the page tables using sDMA (CIK). 1049 */ 1050 static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, 1051 uint64_t pe, 1052 uint64_t addr, unsigned count, 1053 uint32_t incr, uint32_t flags) 1054 { 1055 uint64_t value; 1056 unsigned ndw; 1057 1058 while (count) { 1059 ndw = count; 1060 if (ndw > 0x7FFFF) 1061 ndw = 0x7FFFF; 1062 1063 if (flags & AMDGPU_PTE_VALID) 1064 value = addr; 1065 else 1066 value = 0; 1067 1068 /* for physically contiguous pages (vram) */ 1069 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); 1070 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 1071 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 1072 ib->ptr[ib->length_dw++] = flags; /* mask */ 1073 ib->ptr[ib->length_dw++] = 0; 1074 ib->ptr[ib->length_dw++] = value; /* value */ 1075 ib->ptr[ib->length_dw++] = upper_32_bits(value); 1076 ib->ptr[ib->length_dw++] = incr; /* increment size */ 1077 ib->ptr[ib->length_dw++] = 0; 1078 ib->ptr[ib->length_dw++] = ndw; /* number of entries */ 1079 1080 pe += ndw * 8; 1081 addr += ndw * incr; 1082 count -= ndw; 1083 } 1084 } 1085 1086 /** 1087 * sdma_v3_0_ring_pad_ib - pad the IB to the required number of dw 1088 * 1089 * @ib: indirect buffer to fill with padding 1090 * 1091 */ 1092 static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 1093 { 1094 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 1095 u32 pad_count; 1096 int i; 1097 1098 pad_count = (8 - (ib->length_dw & 0x7)) % 8; 1099 for (i = 0; i < pad_count; i++) 1100 if (sdma && sdma->burst_nop && (i == 0)) 1101 ib->ptr[ib->length_dw++] = 1102 SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | 1103 SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); 1104 else 1105 ib->ptr[ib->length_dw++] = 1106 SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 1107 } 1108 1109 /** 1110 * sdma_v3_0_ring_emit_pipeline_sync - sync the pipeline 1111 * 1112 * @ring: amdgpu_ring pointer 1113 * 1114 * Make sure all previous operations are completed (CIK). 1115 */ 1116 static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 1117 { 1118 uint32_t seq = ring->fence_drv.sync_seq; 1119 uint64_t addr = ring->fence_drv.gpu_addr; 1120 1121 /* wait for idle */ 1122 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1123 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1124 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ 1125 SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); 1126 amdgpu_ring_write(ring, addr & 0xfffffffc); 1127 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 1128 amdgpu_ring_write(ring, seq); /* reference */ 1129 amdgpu_ring_write(ring, 0xfffffff); /* mask */ 1130 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1131 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ 1132 } 1133 1134 /** 1135 * sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA 1136 * 1137 * @ring: amdgpu_ring pointer 1138 * @vm: amdgpu_vm pointer 1139 * 1140 * Update the page table base and flush the VM TLB 1141 * using sDMA (VI). 1142 */ 1143 static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 1144 unsigned vm_id, uint64_t pd_addr) 1145 { 1146 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1147 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1148 if (vm_id < 8) { 1149 amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 1150 } else { 1151 amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 1152 } 1153 amdgpu_ring_write(ring, pd_addr >> 12); 1154 1155 /* flush TLB */ 1156 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1157 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1158 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 1159 amdgpu_ring_write(ring, 1 << vm_id); 1160 1161 /* wait for flush */ 1162 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1163 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1164 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(0)); /* always */ 1165 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); 1166 amdgpu_ring_write(ring, 0); 1167 amdgpu_ring_write(ring, 0); /* reference */ 1168 amdgpu_ring_write(ring, 0); /* mask */ 1169 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1170 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 1171 } 1172 1173 static int sdma_v3_0_early_init(void *handle) 1174 { 1175 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1176 1177 switch (adev->asic_type) { 1178 case CHIP_STONEY: 1179 adev->sdma.num_instances = 1; 1180 break; 1181 default: 1182 adev->sdma.num_instances = SDMA_MAX_INSTANCE; 1183 break; 1184 } 1185 1186 sdma_v3_0_set_ring_funcs(adev); 1187 sdma_v3_0_set_buffer_funcs(adev); 1188 sdma_v3_0_set_vm_pte_funcs(adev); 1189 sdma_v3_0_set_irq_funcs(adev); 1190 1191 return 0; 1192 } 1193 1194 static int sdma_v3_0_sw_init(void *handle) 1195 { 1196 struct amdgpu_ring *ring; 1197 int r, i; 1198 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1199 1200 /* SDMA trap event */ 1201 r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq); 1202 if (r) 1203 return r; 1204 1205 /* SDMA Privileged inst */ 1206 r = amdgpu_irq_add_id(adev, 241, &adev->sdma.illegal_inst_irq); 1207 if (r) 1208 return r; 1209 1210 /* SDMA Privileged inst */ 1211 r = amdgpu_irq_add_id(adev, 247, &adev->sdma.illegal_inst_irq); 1212 if (r) 1213 return r; 1214 1215 r = sdma_v3_0_init_microcode(adev); 1216 if (r) { 1217 DRM_ERROR("Failed to load sdma firmware!\n"); 1218 return r; 1219 } 1220 1221 for (i = 0; i < adev->sdma.num_instances; i++) { 1222 ring = &adev->sdma.instance[i].ring; 1223 ring->ring_obj = NULL; 1224 ring->use_doorbell = true; 1225 ring->doorbell_index = (i == 0) ? 1226 AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; 1227 1228 sprintf(ring->name, "sdma%d", i); 1229 r = amdgpu_ring_init(adev, ring, 1024, 1230 SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf, 1231 &adev->sdma.trap_irq, 1232 (i == 0) ? 1233 AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP1, 1234 AMDGPU_RING_TYPE_SDMA); 1235 if (r) 1236 return r; 1237 } 1238 1239 return r; 1240 } 1241 1242 static int sdma_v3_0_sw_fini(void *handle) 1243 { 1244 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1245 int i; 1246 1247 for (i = 0; i < adev->sdma.num_instances; i++) 1248 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 1249 1250 return 0; 1251 } 1252 1253 static int sdma_v3_0_hw_init(void *handle) 1254 { 1255 int r; 1256 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1257 1258 sdma_v3_0_init_golden_registers(adev); 1259 1260 r = sdma_v3_0_start(adev); 1261 if (r) 1262 return r; 1263 1264 return r; 1265 } 1266 1267 static int sdma_v3_0_hw_fini(void *handle) 1268 { 1269 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1270 1271 sdma_v3_0_ctx_switch_enable(adev, false); 1272 sdma_v3_0_enable(adev, false); 1273 1274 return 0; 1275 } 1276 1277 static int sdma_v3_0_suspend(void *handle) 1278 { 1279 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1280 1281 return sdma_v3_0_hw_fini(adev); 1282 } 1283 1284 static int sdma_v3_0_resume(void *handle) 1285 { 1286 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1287 1288 return sdma_v3_0_hw_init(adev); 1289 } 1290 1291 static bool sdma_v3_0_is_idle(void *handle) 1292 { 1293 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1294 u32 tmp = RREG32(mmSRBM_STATUS2); 1295 1296 if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK | 1297 SRBM_STATUS2__SDMA1_BUSY_MASK)) 1298 return false; 1299 1300 return true; 1301 } 1302 1303 static int sdma_v3_0_wait_for_idle(void *handle) 1304 { 1305 unsigned i; 1306 u32 tmp; 1307 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1308 1309 for (i = 0; i < adev->usec_timeout; i++) { 1310 tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK | 1311 SRBM_STATUS2__SDMA1_BUSY_MASK); 1312 1313 if (!tmp) 1314 return 0; 1315 udelay(1); 1316 } 1317 return -ETIMEDOUT; 1318 } 1319 1320 static int sdma_v3_0_soft_reset(void *handle) 1321 { 1322 u32 srbm_soft_reset = 0; 1323 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1324 u32 tmp = RREG32(mmSRBM_STATUS2); 1325 1326 if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) { 1327 /* sdma0 */ 1328 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); 1329 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0); 1330 WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); 1331 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; 1332 } 1333 if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) { 1334 /* sdma1 */ 1335 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); 1336 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0); 1337 WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); 1338 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; 1339 } 1340 1341 if (srbm_soft_reset) { 1342 tmp = RREG32(mmSRBM_SOFT_RESET); 1343 tmp |= srbm_soft_reset; 1344 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 1345 WREG32(mmSRBM_SOFT_RESET, tmp); 1346 tmp = RREG32(mmSRBM_SOFT_RESET); 1347 1348 udelay(50); 1349 1350 tmp &= ~srbm_soft_reset; 1351 WREG32(mmSRBM_SOFT_RESET, tmp); 1352 tmp = RREG32(mmSRBM_SOFT_RESET); 1353 1354 /* Wait a little for things to settle down */ 1355 udelay(50); 1356 } 1357 1358 return 0; 1359 } 1360 1361 static int sdma_v3_0_set_trap_irq_state(struct amdgpu_device *adev, 1362 struct amdgpu_irq_src *source, 1363 unsigned type, 1364 enum amdgpu_interrupt_state state) 1365 { 1366 u32 sdma_cntl; 1367 1368 switch (type) { 1369 case AMDGPU_SDMA_IRQ_TRAP0: 1370 switch (state) { 1371 case AMDGPU_IRQ_STATE_DISABLE: 1372 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); 1373 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0); 1374 WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl); 1375 break; 1376 case AMDGPU_IRQ_STATE_ENABLE: 1377 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); 1378 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1); 1379 WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl); 1380 break; 1381 default: 1382 break; 1383 } 1384 break; 1385 case AMDGPU_SDMA_IRQ_TRAP1: 1386 switch (state) { 1387 case AMDGPU_IRQ_STATE_DISABLE: 1388 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); 1389 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0); 1390 WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl); 1391 break; 1392 case AMDGPU_IRQ_STATE_ENABLE: 1393 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); 1394 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1); 1395 WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl); 1396 break; 1397 default: 1398 break; 1399 } 1400 break; 1401 default: 1402 break; 1403 } 1404 return 0; 1405 } 1406 1407 static int sdma_v3_0_process_trap_irq(struct amdgpu_device *adev, 1408 struct amdgpu_irq_src *source, 1409 struct amdgpu_iv_entry *entry) 1410 { 1411 u8 instance_id, queue_id; 1412 1413 instance_id = (entry->ring_id & 0x3) >> 0; 1414 queue_id = (entry->ring_id & 0xc) >> 2; 1415 DRM_DEBUG("IH: SDMA trap\n"); 1416 switch (instance_id) { 1417 case 0: 1418 switch (queue_id) { 1419 case 0: 1420 amdgpu_fence_process(&adev->sdma.instance[0].ring); 1421 break; 1422 case 1: 1423 /* XXX compute */ 1424 break; 1425 case 2: 1426 /* XXX compute */ 1427 break; 1428 } 1429 break; 1430 case 1: 1431 switch (queue_id) { 1432 case 0: 1433 amdgpu_fence_process(&adev->sdma.instance[1].ring); 1434 break; 1435 case 1: 1436 /* XXX compute */ 1437 break; 1438 case 2: 1439 /* XXX compute */ 1440 break; 1441 } 1442 break; 1443 } 1444 return 0; 1445 } 1446 1447 static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev, 1448 struct amdgpu_irq_src *source, 1449 struct amdgpu_iv_entry *entry) 1450 { 1451 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 1452 schedule_work(&adev->reset_work); 1453 return 0; 1454 } 1455 1456 static void sdma_v3_0_update_sdma_medium_grain_clock_gating( 1457 struct amdgpu_device *adev, 1458 bool enable) 1459 { 1460 uint32_t temp, data; 1461 int i; 1462 1463 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { 1464 for (i = 0; i < adev->sdma.num_instances; i++) { 1465 temp = data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i]); 1466 data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1467 SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1468 SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | 1469 SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1470 SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1471 SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1472 SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1473 SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); 1474 if (data != temp) 1475 WREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i], data); 1476 } 1477 } else { 1478 for (i = 0; i < adev->sdma.num_instances; i++) { 1479 temp = data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i]); 1480 data |= SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1481 SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1482 SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | 1483 SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1484 SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1485 SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1486 SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1487 SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK; 1488 1489 if (data != temp) 1490 WREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i], data); 1491 } 1492 } 1493 } 1494 1495 static void sdma_v3_0_update_sdma_medium_grain_light_sleep( 1496 struct amdgpu_device *adev, 1497 bool enable) 1498 { 1499 uint32_t temp, data; 1500 int i; 1501 1502 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { 1503 for (i = 0; i < adev->sdma.num_instances; i++) { 1504 temp = data = RREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i]); 1505 data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1506 1507 if (temp != data) 1508 WREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i], data); 1509 } 1510 } else { 1511 for (i = 0; i < adev->sdma.num_instances; i++) { 1512 temp = data = RREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i]); 1513 data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1514 1515 if (temp != data) 1516 WREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i], data); 1517 } 1518 } 1519 } 1520 1521 static int sdma_v3_0_set_clockgating_state(void *handle, 1522 enum amd_clockgating_state state) 1523 { 1524 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1525 1526 switch (adev->asic_type) { 1527 case CHIP_FIJI: 1528 case CHIP_CARRIZO: 1529 case CHIP_STONEY: 1530 sdma_v3_0_update_sdma_medium_grain_clock_gating(adev, 1531 state == AMD_CG_STATE_GATE ? true : false); 1532 sdma_v3_0_update_sdma_medium_grain_light_sleep(adev, 1533 state == AMD_CG_STATE_GATE ? true : false); 1534 break; 1535 default: 1536 break; 1537 } 1538 return 0; 1539 } 1540 1541 static int sdma_v3_0_set_powergating_state(void *handle, 1542 enum amd_powergating_state state) 1543 { 1544 return 0; 1545 } 1546 1547 const struct amd_ip_funcs sdma_v3_0_ip_funcs = { 1548 .name = "sdma_v3_0", 1549 .early_init = sdma_v3_0_early_init, 1550 .late_init = NULL, 1551 .sw_init = sdma_v3_0_sw_init, 1552 .sw_fini = sdma_v3_0_sw_fini, 1553 .hw_init = sdma_v3_0_hw_init, 1554 .hw_fini = sdma_v3_0_hw_fini, 1555 .suspend = sdma_v3_0_suspend, 1556 .resume = sdma_v3_0_resume, 1557 .is_idle = sdma_v3_0_is_idle, 1558 .wait_for_idle = sdma_v3_0_wait_for_idle, 1559 .soft_reset = sdma_v3_0_soft_reset, 1560 .set_clockgating_state = sdma_v3_0_set_clockgating_state, 1561 .set_powergating_state = sdma_v3_0_set_powergating_state, 1562 }; 1563 1564 static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { 1565 .get_rptr = sdma_v3_0_ring_get_rptr, 1566 .get_wptr = sdma_v3_0_ring_get_wptr, 1567 .set_wptr = sdma_v3_0_ring_set_wptr, 1568 .parse_cs = NULL, 1569 .emit_ib = sdma_v3_0_ring_emit_ib, 1570 .emit_fence = sdma_v3_0_ring_emit_fence, 1571 .emit_pipeline_sync = sdma_v3_0_ring_emit_pipeline_sync, 1572 .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush, 1573 .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush, 1574 .emit_hdp_invalidate = sdma_v3_0_ring_emit_hdp_invalidate, 1575 .test_ring = sdma_v3_0_ring_test_ring, 1576 .test_ib = sdma_v3_0_ring_test_ib, 1577 .insert_nop = sdma_v3_0_ring_insert_nop, 1578 .pad_ib = sdma_v3_0_ring_pad_ib, 1579 }; 1580 1581 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) 1582 { 1583 int i; 1584 1585 for (i = 0; i < adev->sdma.num_instances; i++) 1586 adev->sdma.instance[i].ring.funcs = &sdma_v3_0_ring_funcs; 1587 } 1588 1589 static const struct amdgpu_irq_src_funcs sdma_v3_0_trap_irq_funcs = { 1590 .set = sdma_v3_0_set_trap_irq_state, 1591 .process = sdma_v3_0_process_trap_irq, 1592 }; 1593 1594 static const struct amdgpu_irq_src_funcs sdma_v3_0_illegal_inst_irq_funcs = { 1595 .process = sdma_v3_0_process_illegal_inst_irq, 1596 }; 1597 1598 static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) 1599 { 1600 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 1601 adev->sdma.trap_irq.funcs = &sdma_v3_0_trap_irq_funcs; 1602 adev->sdma.illegal_inst_irq.funcs = &sdma_v3_0_illegal_inst_irq_funcs; 1603 } 1604 1605 /** 1606 * sdma_v3_0_emit_copy_buffer - copy buffer using the sDMA engine 1607 * 1608 * @ring: amdgpu_ring structure holding ring information 1609 * @src_offset: src GPU address 1610 * @dst_offset: dst GPU address 1611 * @byte_count: number of bytes to xfer 1612 * 1613 * Copy GPU buffers using the DMA engine (VI). 1614 * Used by the amdgpu ttm implementation to move pages if 1615 * registered as the asic copy callback. 1616 */ 1617 static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, 1618 uint64_t src_offset, 1619 uint64_t dst_offset, 1620 uint32_t byte_count) 1621 { 1622 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1623 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 1624 ib->ptr[ib->length_dw++] = byte_count; 1625 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1626 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 1627 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 1628 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1629 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1630 } 1631 1632 /** 1633 * sdma_v3_0_emit_fill_buffer - fill buffer using the sDMA engine 1634 * 1635 * @ring: amdgpu_ring structure holding ring information 1636 * @src_data: value to write to buffer 1637 * @dst_offset: dst GPU address 1638 * @byte_count: number of bytes to xfer 1639 * 1640 * Fill GPU buffers using the DMA engine (VI). 1641 */ 1642 static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib *ib, 1643 uint32_t src_data, 1644 uint64_t dst_offset, 1645 uint32_t byte_count) 1646 { 1647 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); 1648 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1649 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1650 ib->ptr[ib->length_dw++] = src_data; 1651 ib->ptr[ib->length_dw++] = byte_count; 1652 } 1653 1654 static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = { 1655 .copy_max_bytes = 0x1fffff, 1656 .copy_num_dw = 7, 1657 .emit_copy_buffer = sdma_v3_0_emit_copy_buffer, 1658 1659 .fill_max_bytes = 0x1fffff, 1660 .fill_num_dw = 5, 1661 .emit_fill_buffer = sdma_v3_0_emit_fill_buffer, 1662 }; 1663 1664 static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev) 1665 { 1666 if (adev->mman.buffer_funcs == NULL) { 1667 adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs; 1668 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 1669 } 1670 } 1671 1672 static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { 1673 .copy_pte = sdma_v3_0_vm_copy_pte, 1674 .write_pte = sdma_v3_0_vm_write_pte, 1675 .set_pte_pde = sdma_v3_0_vm_set_pte_pde, 1676 }; 1677 1678 static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) 1679 { 1680 unsigned i; 1681 1682 if (adev->vm_manager.vm_pte_funcs == NULL) { 1683 adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; 1684 for (i = 0; i < adev->sdma.num_instances; i++) 1685 adev->vm_manager.vm_pte_rings[i] = 1686 &adev->sdma.instance[i].ring; 1687 1688 adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; 1689 } 1690 } 1691