1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/kthread.h> 27 #include <linux/pci.h> 28 #include <linux/uaccess.h> 29 30 #include <drm/drm_debugfs.h> 31 32 #include "amdgpu.h" 33 34 /** 35 * amdgpu_debugfs_add_files - Add simple debugfs entries 36 * 37 * @adev: Device to attach debugfs entries to 38 * @files: Array of function callbacks that respond to reads 39 * @nfiles: Number of callbacks to register 40 * 41 */ 42 int amdgpu_debugfs_add_files(struct amdgpu_device *adev, 43 const struct drm_info_list *files, 44 unsigned nfiles) 45 { 46 unsigned i; 47 48 for (i = 0; i < adev->debugfs_count; i++) { 49 if (adev->debugfs[i].files == files) { 50 /* Already registered */ 51 return 0; 52 } 53 } 54 55 i = adev->debugfs_count + 1; 56 if (i > AMDGPU_DEBUGFS_MAX_COMPONENTS) { 57 DRM_ERROR("Reached maximum number of debugfs components.\n"); 58 DRM_ERROR("Report so we increase " 59 "AMDGPU_DEBUGFS_MAX_COMPONENTS.\n"); 60 return -EINVAL; 61 } 62 adev->debugfs[adev->debugfs_count].files = files; 63 adev->debugfs[adev->debugfs_count].num_files = nfiles; 64 adev->debugfs_count = i; 65 #if defined(CONFIG_DEBUG_FS) 66 drm_debugfs_create_files(files, nfiles, 67 adev->ddev->primary->debugfs_root, 68 adev->ddev->primary); 69 #endif 70 return 0; 71 } 72 73 #if defined(CONFIG_DEBUG_FS) 74 75 /** 76 * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes 77 * 78 * @read: True if reading 79 * @f: open file handle 80 * @buf: User buffer to write/read to 81 * @size: Number of bytes to write/read 82 * @pos: Offset to seek to 83 * 84 * This debugfs entry has special meaning on the offset being sought. 85 * Various bits have different meanings: 86 * 87 * Bit 62: Indicates a GRBM bank switch is needed 88 * Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is 89 * zero) 90 * Bits 24..33: The SE or ME selector if needed 91 * Bits 34..43: The SH (or SA) or PIPE selector if needed 92 * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed 93 * 94 * Bit 23: Indicates that the PM power gating lock should be held 95 * This is necessary to read registers that might be 96 * unreliable during a power gating transistion. 97 * 98 * The lower bits are the BYTE offset of the register to read. This 99 * allows reading multiple registers in a single call and having 100 * the returned size reflect that. 101 */ 102 static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, 103 char __user *buf, size_t size, loff_t *pos) 104 { 105 struct amdgpu_device *adev = file_inode(f)->i_private; 106 ssize_t result = 0; 107 int r; 108 bool pm_pg_lock, use_bank, use_ring; 109 unsigned instance_bank, sh_bank, se_bank, me, pipe, queue, vmid; 110 111 pm_pg_lock = use_bank = use_ring = false; 112 instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0; 113 114 if (size & 0x3 || *pos & 0x3 || 115 ((*pos & (1ULL << 62)) && (*pos & (1ULL << 61)))) 116 return -EINVAL; 117 118 /* are we reading registers for which a PG lock is necessary? */ 119 pm_pg_lock = (*pos >> 23) & 1; 120 121 if (*pos & (1ULL << 62)) { 122 se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24; 123 sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34; 124 instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44; 125 126 if (se_bank == 0x3FF) 127 se_bank = 0xFFFFFFFF; 128 if (sh_bank == 0x3FF) 129 sh_bank = 0xFFFFFFFF; 130 if (instance_bank == 0x3FF) 131 instance_bank = 0xFFFFFFFF; 132 use_bank = 1; 133 } else if (*pos & (1ULL << 61)) { 134 135 me = (*pos & GENMASK_ULL(33, 24)) >> 24; 136 pipe = (*pos & GENMASK_ULL(43, 34)) >> 34; 137 queue = (*pos & GENMASK_ULL(53, 44)) >> 44; 138 vmid = (*pos & GENMASK_ULL(58, 54)) >> 54; 139 140 use_ring = 1; 141 } else { 142 use_bank = use_ring = 0; 143 } 144 145 *pos &= (1UL << 22) - 1; 146 147 if (use_bank) { 148 if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || 149 (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) 150 return -EINVAL; 151 mutex_lock(&adev->grbm_idx_mutex); 152 amdgpu_gfx_select_se_sh(adev, se_bank, 153 sh_bank, instance_bank); 154 } else if (use_ring) { 155 mutex_lock(&adev->srbm_mutex); 156 amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid); 157 } 158 159 if (pm_pg_lock) 160 mutex_lock(&adev->pm.mutex); 161 162 while (size) { 163 uint32_t value; 164 165 if (read) { 166 value = RREG32(*pos >> 2); 167 r = put_user(value, (uint32_t *)buf); 168 } else { 169 r = get_user(value, (uint32_t *)buf); 170 if (!r) 171 WREG32(*pos >> 2, value); 172 } 173 if (r) { 174 result = r; 175 goto end; 176 } 177 178 result += 4; 179 buf += 4; 180 *pos += 4; 181 size -= 4; 182 } 183 184 end: 185 if (use_bank) { 186 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 187 mutex_unlock(&adev->grbm_idx_mutex); 188 } else if (use_ring) { 189 amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); 190 mutex_unlock(&adev->srbm_mutex); 191 } 192 193 if (pm_pg_lock) 194 mutex_unlock(&adev->pm.mutex); 195 196 return result; 197 } 198 199 /** 200 * amdgpu_debugfs_regs_read - Callback for reading MMIO registers 201 */ 202 static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, 203 size_t size, loff_t *pos) 204 { 205 return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos); 206 } 207 208 /** 209 * amdgpu_debugfs_regs_write - Callback for writing MMIO registers 210 */ 211 static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, 212 size_t size, loff_t *pos) 213 { 214 return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); 215 } 216 217 218 /** 219 * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register 220 * 221 * @f: open file handle 222 * @buf: User buffer to store read data in 223 * @size: Number of bytes to read 224 * @pos: Offset to seek to 225 * 226 * The lower bits are the BYTE offset of the register to read. This 227 * allows reading multiple registers in a single call and having 228 * the returned size reflect that. 229 */ 230 static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, 231 size_t size, loff_t *pos) 232 { 233 struct amdgpu_device *adev = file_inode(f)->i_private; 234 ssize_t result = 0; 235 int r; 236 237 if (size & 0x3 || *pos & 0x3) 238 return -EINVAL; 239 240 while (size) { 241 uint32_t value; 242 243 value = RREG32_PCIE(*pos >> 2); 244 r = put_user(value, (uint32_t *)buf); 245 if (r) 246 return r; 247 248 result += 4; 249 buf += 4; 250 *pos += 4; 251 size -= 4; 252 } 253 254 return result; 255 } 256 257 /** 258 * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register 259 * 260 * @f: open file handle 261 * @buf: User buffer to write data from 262 * @size: Number of bytes to write 263 * @pos: Offset to seek to 264 * 265 * The lower bits are the BYTE offset of the register to write. This 266 * allows writing multiple registers in a single call and having 267 * the returned size reflect that. 268 */ 269 static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, 270 size_t size, loff_t *pos) 271 { 272 struct amdgpu_device *adev = file_inode(f)->i_private; 273 ssize_t result = 0; 274 int r; 275 276 if (size & 0x3 || *pos & 0x3) 277 return -EINVAL; 278 279 while (size) { 280 uint32_t value; 281 282 r = get_user(value, (uint32_t *)buf); 283 if (r) 284 return r; 285 286 WREG32_PCIE(*pos >> 2, value); 287 288 result += 4; 289 buf += 4; 290 *pos += 4; 291 size -= 4; 292 } 293 294 return result; 295 } 296 297 /** 298 * amdgpu_debugfs_regs_didt_read - Read from a DIDT register 299 * 300 * @f: open file handle 301 * @buf: User buffer to store read data in 302 * @size: Number of bytes to read 303 * @pos: Offset to seek to 304 * 305 * The lower bits are the BYTE offset of the register to read. This 306 * allows reading multiple registers in a single call and having 307 * the returned size reflect that. 308 */ 309 static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, 310 size_t size, loff_t *pos) 311 { 312 struct amdgpu_device *adev = file_inode(f)->i_private; 313 ssize_t result = 0; 314 int r; 315 316 if (size & 0x3 || *pos & 0x3) 317 return -EINVAL; 318 319 while (size) { 320 uint32_t value; 321 322 value = RREG32_DIDT(*pos >> 2); 323 r = put_user(value, (uint32_t *)buf); 324 if (r) 325 return r; 326 327 result += 4; 328 buf += 4; 329 *pos += 4; 330 size -= 4; 331 } 332 333 return result; 334 } 335 336 /** 337 * amdgpu_debugfs_regs_didt_write - Write to a DIDT register 338 * 339 * @f: open file handle 340 * @buf: User buffer to write data from 341 * @size: Number of bytes to write 342 * @pos: Offset to seek to 343 * 344 * The lower bits are the BYTE offset of the register to write. This 345 * allows writing multiple registers in a single call and having 346 * the returned size reflect that. 347 */ 348 static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, 349 size_t size, loff_t *pos) 350 { 351 struct amdgpu_device *adev = file_inode(f)->i_private; 352 ssize_t result = 0; 353 int r; 354 355 if (size & 0x3 || *pos & 0x3) 356 return -EINVAL; 357 358 while (size) { 359 uint32_t value; 360 361 r = get_user(value, (uint32_t *)buf); 362 if (r) 363 return r; 364 365 WREG32_DIDT(*pos >> 2, value); 366 367 result += 4; 368 buf += 4; 369 *pos += 4; 370 size -= 4; 371 } 372 373 return result; 374 } 375 376 /** 377 * amdgpu_debugfs_regs_smc_read - Read from a SMC register 378 * 379 * @f: open file handle 380 * @buf: User buffer to store read data in 381 * @size: Number of bytes to read 382 * @pos: Offset to seek to 383 * 384 * The lower bits are the BYTE offset of the register to read. This 385 * allows reading multiple registers in a single call and having 386 * the returned size reflect that. 387 */ 388 static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, 389 size_t size, loff_t *pos) 390 { 391 struct amdgpu_device *adev = file_inode(f)->i_private; 392 ssize_t result = 0; 393 int r; 394 395 if (size & 0x3 || *pos & 0x3) 396 return -EINVAL; 397 398 while (size) { 399 uint32_t value; 400 401 value = RREG32_SMC(*pos); 402 r = put_user(value, (uint32_t *)buf); 403 if (r) 404 return r; 405 406 result += 4; 407 buf += 4; 408 *pos += 4; 409 size -= 4; 410 } 411 412 return result; 413 } 414 415 /** 416 * amdgpu_debugfs_regs_smc_write - Write to a SMC register 417 * 418 * @f: open file handle 419 * @buf: User buffer to write data from 420 * @size: Number of bytes to write 421 * @pos: Offset to seek to 422 * 423 * The lower bits are the BYTE offset of the register to write. This 424 * allows writing multiple registers in a single call and having 425 * the returned size reflect that. 426 */ 427 static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, 428 size_t size, loff_t *pos) 429 { 430 struct amdgpu_device *adev = file_inode(f)->i_private; 431 ssize_t result = 0; 432 int r; 433 434 if (size & 0x3 || *pos & 0x3) 435 return -EINVAL; 436 437 while (size) { 438 uint32_t value; 439 440 r = get_user(value, (uint32_t *)buf); 441 if (r) 442 return r; 443 444 WREG32_SMC(*pos, value); 445 446 result += 4; 447 buf += 4; 448 *pos += 4; 449 size -= 4; 450 } 451 452 return result; 453 } 454 455 /** 456 * amdgpu_debugfs_gca_config_read - Read from gfx config data 457 * 458 * @f: open file handle 459 * @buf: User buffer to store read data in 460 * @size: Number of bytes to read 461 * @pos: Offset to seek to 462 * 463 * This file is used to access configuration data in a somewhat 464 * stable fashion. The format is a series of DWORDs with the first 465 * indicating which revision it is. New content is appended to the 466 * end so that older software can still read the data. 467 */ 468 469 static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, 470 size_t size, loff_t *pos) 471 { 472 struct amdgpu_device *adev = file_inode(f)->i_private; 473 ssize_t result = 0; 474 int r; 475 uint32_t *config, no_regs = 0; 476 477 if (size & 0x3 || *pos & 0x3) 478 return -EINVAL; 479 480 config = kmalloc_array(256, sizeof(*config), GFP_KERNEL); 481 if (!config) 482 return -ENOMEM; 483 484 /* version, increment each time something is added */ 485 config[no_regs++] = 3; 486 config[no_regs++] = adev->gfx.config.max_shader_engines; 487 config[no_regs++] = adev->gfx.config.max_tile_pipes; 488 config[no_regs++] = adev->gfx.config.max_cu_per_sh; 489 config[no_regs++] = adev->gfx.config.max_sh_per_se; 490 config[no_regs++] = adev->gfx.config.max_backends_per_se; 491 config[no_regs++] = adev->gfx.config.max_texture_channel_caches; 492 config[no_regs++] = adev->gfx.config.max_gprs; 493 config[no_regs++] = adev->gfx.config.max_gs_threads; 494 config[no_regs++] = adev->gfx.config.max_hw_contexts; 495 config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend; 496 config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend; 497 config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size; 498 config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size; 499 config[no_regs++] = adev->gfx.config.num_tile_pipes; 500 config[no_regs++] = adev->gfx.config.backend_enable_mask; 501 config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes; 502 config[no_regs++] = adev->gfx.config.mem_row_size_in_kb; 503 config[no_regs++] = adev->gfx.config.shader_engine_tile_size; 504 config[no_regs++] = adev->gfx.config.num_gpus; 505 config[no_regs++] = adev->gfx.config.multi_gpu_tile_size; 506 config[no_regs++] = adev->gfx.config.mc_arb_ramcfg; 507 config[no_regs++] = adev->gfx.config.gb_addr_config; 508 config[no_regs++] = adev->gfx.config.num_rbs; 509 510 /* rev==1 */ 511 config[no_regs++] = adev->rev_id; 512 config[no_regs++] = adev->pg_flags; 513 config[no_regs++] = adev->cg_flags; 514 515 /* rev==2 */ 516 config[no_regs++] = adev->family; 517 config[no_regs++] = adev->external_rev_id; 518 519 /* rev==3 */ 520 config[no_regs++] = adev->pdev->device; 521 config[no_regs++] = adev->pdev->revision; 522 config[no_regs++] = adev->pdev->subsystem_device; 523 config[no_regs++] = adev->pdev->subsystem_vendor; 524 525 while (size && (*pos < no_regs * 4)) { 526 uint32_t value; 527 528 value = config[*pos >> 2]; 529 r = put_user(value, (uint32_t *)buf); 530 if (r) { 531 kfree(config); 532 return r; 533 } 534 535 result += 4; 536 buf += 4; 537 *pos += 4; 538 size -= 4; 539 } 540 541 kfree(config); 542 return result; 543 } 544 545 /** 546 * amdgpu_debugfs_sensor_read - Read from the powerplay sensors 547 * 548 * @f: open file handle 549 * @buf: User buffer to store read data in 550 * @size: Number of bytes to read 551 * @pos: Offset to seek to 552 * 553 * The offset is treated as the BYTE address of one of the sensors 554 * enumerated in amd/include/kgd_pp_interface.h under the 555 * 'amd_pp_sensors' enumeration. For instance to read the UVD VCLK 556 * you would use the offset 3 * 4 = 12. 557 */ 558 static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, 559 size_t size, loff_t *pos) 560 { 561 struct amdgpu_device *adev = file_inode(f)->i_private; 562 int idx, x, outsize, r, valuesize; 563 uint32_t values[16]; 564 565 if (size & 3 || *pos & 0x3) 566 return -EINVAL; 567 568 if (!adev->pm.dpm_enabled) 569 return -EINVAL; 570 571 /* convert offset to sensor number */ 572 idx = *pos >> 2; 573 574 valuesize = sizeof(values); 575 r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); 576 if (r) 577 return r; 578 579 if (size > valuesize) 580 return -EINVAL; 581 582 outsize = 0; 583 x = 0; 584 if (!r) { 585 while (size) { 586 r = put_user(values[x++], (int32_t *)buf); 587 buf += 4; 588 size -= 4; 589 outsize += 4; 590 } 591 } 592 593 return !r ? outsize : r; 594 } 595 596 /** amdgpu_debugfs_wave_read - Read WAVE STATUS data 597 * 598 * @f: open file handle 599 * @buf: User buffer to store read data in 600 * @size: Number of bytes to read 601 * @pos: Offset to seek to 602 * 603 * The offset being sought changes which wave that the status data 604 * will be returned for. The bits are used as follows: 605 * 606 * Bits 0..6: Byte offset into data 607 * Bits 7..14: SE selector 608 * Bits 15..22: SH/SA selector 609 * Bits 23..30: CU/{WGP+SIMD} selector 610 * Bits 31..36: WAVE ID selector 611 * Bits 37..44: SIMD ID selector 612 * 613 * The returned data begins with one DWORD of version information 614 * Followed by WAVE STATUS registers relevant to the GFX IP version 615 * being used. See gfx_v8_0_read_wave_data() for an example output. 616 */ 617 static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, 618 size_t size, loff_t *pos) 619 { 620 struct amdgpu_device *adev = f->f_inode->i_private; 621 int r, x; 622 ssize_t result=0; 623 uint32_t offset, se, sh, cu, wave, simd, data[32]; 624 625 if (size & 3 || *pos & 3) 626 return -EINVAL; 627 628 /* decode offset */ 629 offset = (*pos & GENMASK_ULL(6, 0)); 630 se = (*pos & GENMASK_ULL(14, 7)) >> 7; 631 sh = (*pos & GENMASK_ULL(22, 15)) >> 15; 632 cu = (*pos & GENMASK_ULL(30, 23)) >> 23; 633 wave = (*pos & GENMASK_ULL(36, 31)) >> 31; 634 simd = (*pos & GENMASK_ULL(44, 37)) >> 37; 635 636 /* switch to the specific se/sh/cu */ 637 mutex_lock(&adev->grbm_idx_mutex); 638 amdgpu_gfx_select_se_sh(adev, se, sh, cu); 639 640 x = 0; 641 if (adev->gfx.funcs->read_wave_data) 642 adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x); 643 644 amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 645 mutex_unlock(&adev->grbm_idx_mutex); 646 647 if (!x) 648 return -EINVAL; 649 650 while (size && (offset < x * 4)) { 651 uint32_t value; 652 653 value = data[offset >> 2]; 654 r = put_user(value, (uint32_t *)buf); 655 if (r) 656 return r; 657 658 result += 4; 659 buf += 4; 660 offset += 4; 661 size -= 4; 662 } 663 664 return result; 665 } 666 667 /** amdgpu_debugfs_gpr_read - Read wave gprs 668 * 669 * @f: open file handle 670 * @buf: User buffer to store read data in 671 * @size: Number of bytes to read 672 * @pos: Offset to seek to 673 * 674 * The offset being sought changes which wave that the status data 675 * will be returned for. The bits are used as follows: 676 * 677 * Bits 0..11: Byte offset into data 678 * Bits 12..19: SE selector 679 * Bits 20..27: SH/SA selector 680 * Bits 28..35: CU/{WGP+SIMD} selector 681 * Bits 36..43: WAVE ID selector 682 * Bits 37..44: SIMD ID selector 683 * Bits 52..59: Thread selector 684 * Bits 60..61: Bank selector (VGPR=0,SGPR=1) 685 * 686 * The return data comes from the SGPR or VGPR register bank for 687 * the selected operational unit. 688 */ 689 static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, 690 size_t size, loff_t *pos) 691 { 692 struct amdgpu_device *adev = f->f_inode->i_private; 693 int r; 694 ssize_t result = 0; 695 uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data; 696 697 if (size & 3 || *pos & 3) 698 return -EINVAL; 699 700 /* decode offset */ 701 offset = *pos & GENMASK_ULL(11, 0); 702 se = (*pos & GENMASK_ULL(19, 12)) >> 12; 703 sh = (*pos & GENMASK_ULL(27, 20)) >> 20; 704 cu = (*pos & GENMASK_ULL(35, 28)) >> 28; 705 wave = (*pos & GENMASK_ULL(43, 36)) >> 36; 706 simd = (*pos & GENMASK_ULL(51, 44)) >> 44; 707 thread = (*pos & GENMASK_ULL(59, 52)) >> 52; 708 bank = (*pos & GENMASK_ULL(61, 60)) >> 60; 709 710 data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL); 711 if (!data) 712 return -ENOMEM; 713 714 /* switch to the specific se/sh/cu */ 715 mutex_lock(&adev->grbm_idx_mutex); 716 amdgpu_gfx_select_se_sh(adev, se, sh, cu); 717 718 if (bank == 0) { 719 if (adev->gfx.funcs->read_wave_vgprs) 720 adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data); 721 } else { 722 if (adev->gfx.funcs->read_wave_sgprs) 723 adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data); 724 } 725 726 amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 727 mutex_unlock(&adev->grbm_idx_mutex); 728 729 while (size) { 730 uint32_t value; 731 732 value = data[offset++]; 733 r = put_user(value, (uint32_t *)buf); 734 if (r) { 735 result = r; 736 goto err; 737 } 738 739 result += 4; 740 buf += 4; 741 size -= 4; 742 } 743 744 err: 745 kfree(data); 746 return result; 747 } 748 749 static const struct file_operations amdgpu_debugfs_regs_fops = { 750 .owner = THIS_MODULE, 751 .read = amdgpu_debugfs_regs_read, 752 .write = amdgpu_debugfs_regs_write, 753 .llseek = default_llseek 754 }; 755 static const struct file_operations amdgpu_debugfs_regs_didt_fops = { 756 .owner = THIS_MODULE, 757 .read = amdgpu_debugfs_regs_didt_read, 758 .write = amdgpu_debugfs_regs_didt_write, 759 .llseek = default_llseek 760 }; 761 static const struct file_operations amdgpu_debugfs_regs_pcie_fops = { 762 .owner = THIS_MODULE, 763 .read = amdgpu_debugfs_regs_pcie_read, 764 .write = amdgpu_debugfs_regs_pcie_write, 765 .llseek = default_llseek 766 }; 767 static const struct file_operations amdgpu_debugfs_regs_smc_fops = { 768 .owner = THIS_MODULE, 769 .read = amdgpu_debugfs_regs_smc_read, 770 .write = amdgpu_debugfs_regs_smc_write, 771 .llseek = default_llseek 772 }; 773 774 static const struct file_operations amdgpu_debugfs_gca_config_fops = { 775 .owner = THIS_MODULE, 776 .read = amdgpu_debugfs_gca_config_read, 777 .llseek = default_llseek 778 }; 779 780 static const struct file_operations amdgpu_debugfs_sensors_fops = { 781 .owner = THIS_MODULE, 782 .read = amdgpu_debugfs_sensor_read, 783 .llseek = default_llseek 784 }; 785 786 static const struct file_operations amdgpu_debugfs_wave_fops = { 787 .owner = THIS_MODULE, 788 .read = amdgpu_debugfs_wave_read, 789 .llseek = default_llseek 790 }; 791 static const struct file_operations amdgpu_debugfs_gpr_fops = { 792 .owner = THIS_MODULE, 793 .read = amdgpu_debugfs_gpr_read, 794 .llseek = default_llseek 795 }; 796 797 static const struct file_operations *debugfs_regs[] = { 798 &amdgpu_debugfs_regs_fops, 799 &amdgpu_debugfs_regs_didt_fops, 800 &amdgpu_debugfs_regs_pcie_fops, 801 &amdgpu_debugfs_regs_smc_fops, 802 &amdgpu_debugfs_gca_config_fops, 803 &amdgpu_debugfs_sensors_fops, 804 &amdgpu_debugfs_wave_fops, 805 &amdgpu_debugfs_gpr_fops, 806 }; 807 808 static const char *debugfs_regs_names[] = { 809 "amdgpu_regs", 810 "amdgpu_regs_didt", 811 "amdgpu_regs_pcie", 812 "amdgpu_regs_smc", 813 "amdgpu_gca_config", 814 "amdgpu_sensors", 815 "amdgpu_wave", 816 "amdgpu_gpr", 817 }; 818 819 /** 820 * amdgpu_debugfs_regs_init - Initialize debugfs entries that provide 821 * register access. 822 * 823 * @adev: The device to attach the debugfs entries to 824 */ 825 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) 826 { 827 struct drm_minor *minor = adev->ddev->primary; 828 struct dentry *ent, *root = minor->debugfs_root; 829 unsigned int i; 830 831 for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { 832 ent = debugfs_create_file(debugfs_regs_names[i], 833 S_IFREG | S_IRUGO, root, 834 adev, debugfs_regs[i]); 835 if (!i && !IS_ERR_OR_NULL(ent)) 836 i_size_write(ent->d_inode, adev->rmmio_size); 837 adev->debugfs_regs[i] = ent; 838 } 839 840 return 0; 841 } 842 843 void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) 844 { 845 unsigned i; 846 847 for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { 848 if (adev->debugfs_regs[i]) { 849 debugfs_remove(adev->debugfs_regs[i]); 850 adev->debugfs_regs[i] = NULL; 851 } 852 } 853 } 854 855 static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) 856 { 857 struct drm_info_node *node = (struct drm_info_node *) m->private; 858 struct drm_device *dev = node->minor->dev; 859 struct amdgpu_device *adev = dev->dev_private; 860 int r = 0, i; 861 862 /* hold on the scheduler */ 863 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 864 struct amdgpu_ring *ring = adev->rings[i]; 865 866 if (!ring || !ring->sched.thread) 867 continue; 868 kthread_park(ring->sched.thread); 869 } 870 871 seq_printf(m, "run ib test:\n"); 872 r = amdgpu_ib_ring_tests(adev); 873 if (r) 874 seq_printf(m, "ib ring tests failed (%d).\n", r); 875 else 876 seq_printf(m, "ib ring tests passed.\n"); 877 878 /* go on the scheduler */ 879 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 880 struct amdgpu_ring *ring = adev->rings[i]; 881 882 if (!ring || !ring->sched.thread) 883 continue; 884 kthread_unpark(ring->sched.thread); 885 } 886 887 return 0; 888 } 889 890 static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data) 891 { 892 struct drm_info_node *node = (struct drm_info_node *) m->private; 893 struct drm_device *dev = node->minor->dev; 894 struct amdgpu_device *adev = dev->dev_private; 895 896 seq_write(m, adev->bios, adev->bios_size); 897 return 0; 898 } 899 900 static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data) 901 { 902 struct drm_info_node *node = (struct drm_info_node *)m->private; 903 struct drm_device *dev = node->minor->dev; 904 struct amdgpu_device *adev = dev->dev_private; 905 906 seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev)); 907 return 0; 908 } 909 910 static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) 911 { 912 struct drm_info_node *node = (struct drm_info_node *)m->private; 913 struct drm_device *dev = node->minor->dev; 914 struct amdgpu_device *adev = dev->dev_private; 915 916 seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT)); 917 return 0; 918 } 919 920 static const struct drm_info_list amdgpu_debugfs_list[] = { 921 {"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump}, 922 {"amdgpu_test_ib", &amdgpu_debugfs_test_ib}, 923 {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram}, 924 {"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt}, 925 }; 926 927 static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring, 928 struct dma_fence **fences) 929 { 930 struct amdgpu_fence_driver *drv = &ring->fence_drv; 931 uint32_t sync_seq, last_seq; 932 933 last_seq = atomic_read(&ring->fence_drv.last_seq); 934 sync_seq = ring->fence_drv.sync_seq; 935 936 last_seq &= drv->num_fences_mask; 937 sync_seq &= drv->num_fences_mask; 938 939 do { 940 struct dma_fence *fence, **ptr; 941 942 ++last_seq; 943 last_seq &= drv->num_fences_mask; 944 ptr = &drv->fences[last_seq]; 945 946 fence = rcu_dereference_protected(*ptr, 1); 947 RCU_INIT_POINTER(*ptr, NULL); 948 949 if (!fence) 950 continue; 951 952 fences[last_seq] = fence; 953 954 } while (last_seq != sync_seq); 955 } 956 957 static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences, 958 int length) 959 { 960 int i; 961 struct dma_fence *fence; 962 963 for (i = 0; i < length; i++) { 964 fence = fences[i]; 965 if (!fence) 966 continue; 967 dma_fence_signal(fence); 968 dma_fence_put(fence); 969 } 970 } 971 972 static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) 973 { 974 struct drm_sched_job *s_job; 975 struct dma_fence *fence; 976 977 spin_lock(&sched->job_list_lock); 978 list_for_each_entry(s_job, &sched->ring_mirror_list, node) { 979 fence = sched->ops->run_job(s_job); 980 dma_fence_put(fence); 981 } 982 spin_unlock(&sched->job_list_lock); 983 } 984 985 static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring) 986 { 987 struct amdgpu_job *job; 988 struct drm_sched_job *s_job; 989 uint32_t preempt_seq; 990 struct dma_fence *fence, **ptr; 991 struct amdgpu_fence_driver *drv = &ring->fence_drv; 992 struct drm_gpu_scheduler *sched = &ring->sched; 993 994 if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) 995 return; 996 997 preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2)); 998 if (preempt_seq <= atomic_read(&drv->last_seq)) 999 return; 1000 1001 preempt_seq &= drv->num_fences_mask; 1002 ptr = &drv->fences[preempt_seq]; 1003 fence = rcu_dereference_protected(*ptr, 1); 1004 1005 spin_lock(&sched->job_list_lock); 1006 list_for_each_entry(s_job, &sched->ring_mirror_list, node) { 1007 job = to_amdgpu_job(s_job); 1008 if (job->fence == fence) 1009 /* mark the job as preempted */ 1010 job->preemption_status |= AMDGPU_IB_PREEMPTED; 1011 } 1012 spin_unlock(&sched->job_list_lock); 1013 } 1014 1015 static int amdgpu_debugfs_ib_preempt(void *data, u64 val) 1016 { 1017 int r, resched, length; 1018 struct amdgpu_ring *ring; 1019 struct dma_fence **fences = NULL; 1020 struct amdgpu_device *adev = (struct amdgpu_device *)data; 1021 1022 if (val >= AMDGPU_MAX_RINGS) 1023 return -EINVAL; 1024 1025 ring = adev->rings[val]; 1026 1027 if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread) 1028 return -EINVAL; 1029 1030 /* the last preemption failed */ 1031 if (ring->trail_seq != le32_to_cpu(*ring->trail_fence_cpu_addr)) 1032 return -EBUSY; 1033 1034 length = ring->fence_drv.num_fences_mask + 1; 1035 fences = kcalloc(length, sizeof(void *), GFP_KERNEL); 1036 if (!fences) 1037 return -ENOMEM; 1038 1039 /* stop the scheduler */ 1040 kthread_park(ring->sched.thread); 1041 1042 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); 1043 1044 /* preempt the IB */ 1045 r = amdgpu_ring_preempt_ib(ring); 1046 if (r) { 1047 DRM_WARN("failed to preempt ring %d\n", ring->idx); 1048 goto failure; 1049 } 1050 1051 amdgpu_fence_process(ring); 1052 1053 if (atomic_read(&ring->fence_drv.last_seq) != 1054 ring->fence_drv.sync_seq) { 1055 DRM_INFO("ring %d was preempted\n", ring->idx); 1056 1057 amdgpu_ib_preempt_mark_partial_job(ring); 1058 1059 /* swap out the old fences */ 1060 amdgpu_ib_preempt_fences_swap(ring, fences); 1061 1062 amdgpu_fence_driver_force_completion(ring); 1063 1064 /* resubmit unfinished jobs */ 1065 amdgpu_ib_preempt_job_recovery(&ring->sched); 1066 1067 /* wait for jobs finished */ 1068 amdgpu_fence_wait_empty(ring); 1069 1070 /* signal the old fences */ 1071 amdgpu_ib_preempt_signal_fences(fences, length); 1072 } 1073 1074 failure: 1075 /* restart the scheduler */ 1076 kthread_unpark(ring->sched.thread); 1077 1078 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); 1079 1080 if (fences) 1081 kfree(fences); 1082 1083 return 0; 1084 } 1085 1086 DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL, 1087 amdgpu_debugfs_ib_preempt, "%llu\n"); 1088 1089 int amdgpu_debugfs_init(struct amdgpu_device *adev) 1090 { 1091 adev->debugfs_preempt = 1092 debugfs_create_file("amdgpu_preempt_ib", 0600, 1093 adev->ddev->primary->debugfs_root, 1094 (void *)adev, &fops_ib_preempt); 1095 if (!(adev->debugfs_preempt)) { 1096 DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n"); 1097 return -EIO; 1098 } 1099 1100 return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list, 1101 ARRAY_SIZE(amdgpu_debugfs_list)); 1102 } 1103 1104 void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) 1105 { 1106 if (adev->debugfs_preempt) 1107 debugfs_remove(adev->debugfs_preempt); 1108 } 1109 1110 #else 1111 int amdgpu_debugfs_init(struct amdgpu_device *adev) 1112 { 1113 return 0; 1114 } 1115 void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) { } 1116 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) 1117 { 1118 return 0; 1119 } 1120 void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { } 1121 #endif 1122