1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/power_supply.h> 29 #include <linux/kthread.h> 30 #include <linux/module.h> 31 #include <linux/console.h> 32 #include <linux/slab.h> 33 #include <linux/iommu.h> 34 #include <linux/pci.h> 35 #include <linux/devcoredump.h> 36 #include <generated/utsrelease.h> 37 #include <linux/pci-p2pdma.h> 38 #include <linux/apple-gmux.h> 39 40 #include <drm/drm_aperture.h> 41 #include <drm/drm_atomic_helper.h> 42 #include <drm/drm_crtc_helper.h> 43 #include <drm/drm_fb_helper.h> 44 #include <drm/drm_probe_helper.h> 45 #include <drm/amdgpu_drm.h> 46 #include <linux/vgaarb.h> 47 #include <linux/vga_switcheroo.h> 48 #include <linux/efi.h> 49 #include "amdgpu.h" 50 #include "amdgpu_trace.h" 51 #include "amdgpu_i2c.h" 52 #include "atom.h" 53 #include "amdgpu_atombios.h" 54 #include "amdgpu_atomfirmware.h" 55 #include "amd_pcie.h" 56 #ifdef CONFIG_DRM_AMDGPU_SI 57 #include "si.h" 58 #endif 59 #ifdef CONFIG_DRM_AMDGPU_CIK 60 #include "cik.h" 61 #endif 62 #include "vi.h" 63 #include "soc15.h" 64 #include "nv.h" 65 #include "bif/bif_4_1_d.h" 66 #include <linux/firmware.h> 67 #include "amdgpu_vf_error.h" 68 69 #include "amdgpu_amdkfd.h" 70 #include "amdgpu_pm.h" 71 72 #include "amdgpu_xgmi.h" 73 #include "amdgpu_ras.h" 74 #include "amdgpu_pmu.h" 75 #include "amdgpu_fru_eeprom.h" 76 #include "amdgpu_reset.h" 77 78 #include <linux/suspend.h> 79 #include <drm/task_barrier.h> 80 #include <linux/pm_runtime.h> 81 82 #include <drm/drm_drv.h> 83 84 #if IS_ENABLED(CONFIG_X86) 85 #include <asm/intel-family.h> 86 #endif 87 88 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 89 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 91 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 92 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 93 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); 94 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); 95 96 #define AMDGPU_RESUME_MS 2000 97 #define AMDGPU_MAX_RETRY_LIMIT 2 98 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) 99 100 static const struct drm_driver amdgpu_kms_driver; 101 102 const char *amdgpu_asic_name[] = { 103 "TAHITI", 104 "PITCAIRN", 105 "VERDE", 106 "OLAND", 107 "HAINAN", 108 "BONAIRE", 109 "KAVERI", 110 "KABINI", 111 "HAWAII", 112 "MULLINS", 113 "TOPAZ", 114 "TONGA", 115 "FIJI", 116 "CARRIZO", 117 "STONEY", 118 "POLARIS10", 119 "POLARIS11", 120 "POLARIS12", 121 "VEGAM", 122 "VEGA10", 123 "VEGA12", 124 "VEGA20", 125 "RAVEN", 126 "ARCTURUS", 127 "RENOIR", 128 "ALDEBARAN", 129 "NAVI10", 130 "CYAN_SKILLFISH", 131 "NAVI14", 132 "NAVI12", 133 "SIENNA_CICHLID", 134 "NAVY_FLOUNDER", 135 "VANGOGH", 136 "DIMGREY_CAVEFISH", 137 "BEIGE_GOBY", 138 "YELLOW_CARP", 139 "IP DISCOVERY", 140 "LAST", 141 }; 142 143 /** 144 * DOC: pcie_replay_count 145 * 146 * The amdgpu driver provides a sysfs API for reporting the total number 147 * of PCIe replays (NAKs) 148 * The file pcie_replay_count is used for this and returns the total 149 * number of replays as a sum of the NAKs generated and NAKs received 150 */ 151 152 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, 153 struct device_attribute *attr, char *buf) 154 { 155 struct drm_device *ddev = dev_get_drvdata(dev); 156 struct amdgpu_device *adev = drm_to_adev(ddev); 157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); 158 159 return sysfs_emit(buf, "%llu\n", cnt); 160 } 161 162 static DEVICE_ATTR(pcie_replay_count, S_IRUGO, 163 amdgpu_device_get_pcie_replay_count, NULL); 164 165 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 166 167 /** 168 * DOC: product_name 169 * 170 * The amdgpu driver provides a sysfs API for reporting the product name 171 * for the device 172 * The file product_name is used for this and returns the product name 173 * as returned from the FRU. 174 * NOTE: This is only available for certain server cards 175 */ 176 177 static ssize_t amdgpu_device_get_product_name(struct device *dev, 178 struct device_attribute *attr, char *buf) 179 { 180 struct drm_device *ddev = dev_get_drvdata(dev); 181 struct amdgpu_device *adev = drm_to_adev(ddev); 182 183 return sysfs_emit(buf, "%s\n", adev->product_name); 184 } 185 186 static DEVICE_ATTR(product_name, S_IRUGO, 187 amdgpu_device_get_product_name, NULL); 188 189 /** 190 * DOC: product_number 191 * 192 * The amdgpu driver provides a sysfs API for reporting the part number 193 * for the device 194 * The file product_number is used for this and returns the part number 195 * as returned from the FRU. 196 * NOTE: This is only available for certain server cards 197 */ 198 199 static ssize_t amdgpu_device_get_product_number(struct device *dev, 200 struct device_attribute *attr, char *buf) 201 { 202 struct drm_device *ddev = dev_get_drvdata(dev); 203 struct amdgpu_device *adev = drm_to_adev(ddev); 204 205 return sysfs_emit(buf, "%s\n", adev->product_number); 206 } 207 208 static DEVICE_ATTR(product_number, S_IRUGO, 209 amdgpu_device_get_product_number, NULL); 210 211 /** 212 * DOC: serial_number 213 * 214 * The amdgpu driver provides a sysfs API for reporting the serial number 215 * for the device 216 * The file serial_number is used for this and returns the serial number 217 * as returned from the FRU. 218 * NOTE: This is only available for certain server cards 219 */ 220 221 static ssize_t amdgpu_device_get_serial_number(struct device *dev, 222 struct device_attribute *attr, char *buf) 223 { 224 struct drm_device *ddev = dev_get_drvdata(dev); 225 struct amdgpu_device *adev = drm_to_adev(ddev); 226 227 return sysfs_emit(buf, "%s\n", adev->serial); 228 } 229 230 static DEVICE_ATTR(serial_number, S_IRUGO, 231 amdgpu_device_get_serial_number, NULL); 232 233 /** 234 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control 235 * 236 * @dev: drm_device pointer 237 * 238 * Returns true if the device is a dGPU with ATPX power control, 239 * otherwise return false. 240 */ 241 bool amdgpu_device_supports_px(struct drm_device *dev) 242 { 243 struct amdgpu_device *adev = drm_to_adev(dev); 244 245 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid()) 246 return true; 247 return false; 248 } 249 250 /** 251 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources 252 * 253 * @dev: drm_device pointer 254 * 255 * Returns true if the device is a dGPU with ACPI power control, 256 * otherwise return false. 257 */ 258 bool amdgpu_device_supports_boco(struct drm_device *dev) 259 { 260 struct amdgpu_device *adev = drm_to_adev(dev); 261 262 if (adev->has_pr3 || 263 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid())) 264 return true; 265 return false; 266 } 267 268 /** 269 * amdgpu_device_supports_baco - Does the device support BACO 270 * 271 * @dev: drm_device pointer 272 * 273 * Returns true if the device supporte BACO, 274 * otherwise return false. 275 */ 276 bool amdgpu_device_supports_baco(struct drm_device *dev) 277 { 278 struct amdgpu_device *adev = drm_to_adev(dev); 279 280 return amdgpu_asic_supports_baco(adev); 281 } 282 283 /** 284 * amdgpu_device_supports_smart_shift - Is the device dGPU with 285 * smart shift support 286 * 287 * @dev: drm_device pointer 288 * 289 * Returns true if the device is a dGPU with Smart Shift support, 290 * otherwise returns false. 291 */ 292 bool amdgpu_device_supports_smart_shift(struct drm_device *dev) 293 { 294 return (amdgpu_device_supports_boco(dev) && 295 amdgpu_acpi_is_power_shift_control_supported()); 296 } 297 298 /* 299 * VRAM access helper functions 300 */ 301 302 /** 303 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA 304 * 305 * @adev: amdgpu_device pointer 306 * @pos: offset of the buffer in vram 307 * @buf: virtual address of the buffer in system memory 308 * @size: read/write size, sizeof(@buf) must > @size 309 * @write: true - write to vram, otherwise - read from vram 310 */ 311 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, 312 void *buf, size_t size, bool write) 313 { 314 unsigned long flags; 315 uint32_t hi = ~0, tmp = 0; 316 uint32_t *data = buf; 317 uint64_t last; 318 int idx; 319 320 if (!drm_dev_enter(adev_to_drm(adev), &idx)) 321 return; 322 323 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4)); 324 325 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 326 for (last = pos + size; pos < last; pos += 4) { 327 tmp = pos >> 31; 328 329 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); 330 if (tmp != hi) { 331 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); 332 hi = tmp; 333 } 334 if (write) 335 WREG32_NO_KIQ(mmMM_DATA, *data++); 336 else 337 *data++ = RREG32_NO_KIQ(mmMM_DATA); 338 } 339 340 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 341 drm_dev_exit(idx); 342 } 343 344 /** 345 * amdgpu_device_aper_access - access vram by vram aperature 346 * 347 * @adev: amdgpu_device pointer 348 * @pos: offset of the buffer in vram 349 * @buf: virtual address of the buffer in system memory 350 * @size: read/write size, sizeof(@buf) must > @size 351 * @write: true - write to vram, otherwise - read from vram 352 * 353 * The return value means how many bytes have been transferred. 354 */ 355 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, 356 void *buf, size_t size, bool write) 357 { 358 #ifdef CONFIG_64BIT 359 void __iomem *addr; 360 size_t count = 0; 361 uint64_t last; 362 363 if (!adev->mman.aper_base_kaddr) 364 return 0; 365 366 last = min(pos + size, adev->gmc.visible_vram_size); 367 if (last > pos) { 368 addr = adev->mman.aper_base_kaddr + pos; 369 count = last - pos; 370 371 if (write) { 372 memcpy_toio(addr, buf, count); 373 mb(); 374 amdgpu_device_flush_hdp(adev, NULL); 375 } else { 376 amdgpu_device_invalidate_hdp(adev, NULL); 377 mb(); 378 memcpy_fromio(buf, addr, count); 379 } 380 381 } 382 383 return count; 384 #else 385 return 0; 386 #endif 387 } 388 389 /** 390 * amdgpu_device_vram_access - read/write a buffer in vram 391 * 392 * @adev: amdgpu_device pointer 393 * @pos: offset of the buffer in vram 394 * @buf: virtual address of the buffer in system memory 395 * @size: read/write size, sizeof(@buf) must > @size 396 * @write: true - write to vram, otherwise - read from vram 397 */ 398 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, 399 void *buf, size_t size, bool write) 400 { 401 size_t count; 402 403 /* try to using vram apreature to access vram first */ 404 count = amdgpu_device_aper_access(adev, pos, buf, size, write); 405 size -= count; 406 if (size) { 407 /* using MM to access rest vram */ 408 pos += count; 409 buf += count; 410 amdgpu_device_mm_access(adev, pos, buf, size, write); 411 } 412 } 413 414 /* 415 * register access helper functions. 416 */ 417 418 /* Check if hw access should be skipped because of hotplug or device error */ 419 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) 420 { 421 if (adev->no_hw_access) 422 return true; 423 424 #ifdef CONFIG_LOCKDEP 425 /* 426 * This is a bit complicated to understand, so worth a comment. What we assert 427 * here is that the GPU reset is not running on another thread in parallel. 428 * 429 * For this we trylock the read side of the reset semaphore, if that succeeds 430 * we know that the reset is not running in paralell. 431 * 432 * If the trylock fails we assert that we are either already holding the read 433 * side of the lock or are the reset thread itself and hold the write side of 434 * the lock. 435 */ 436 if (in_task()) { 437 if (down_read_trylock(&adev->reset_domain->sem)) 438 up_read(&adev->reset_domain->sem); 439 else 440 lockdep_assert_held(&adev->reset_domain->sem); 441 } 442 #endif 443 return false; 444 } 445 446 /** 447 * amdgpu_device_rreg - read a memory mapped IO or indirect register 448 * 449 * @adev: amdgpu_device pointer 450 * @reg: dword aligned register offset 451 * @acc_flags: access flags which require special behavior 452 * 453 * Returns the 32 bit value from the offset specified. 454 */ 455 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, 456 uint32_t reg, uint32_t acc_flags) 457 { 458 uint32_t ret; 459 460 if (amdgpu_device_skip_hw_access(adev)) 461 return 0; 462 463 if ((reg * 4) < adev->rmmio_size) { 464 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && 465 amdgpu_sriov_runtime(adev) && 466 down_read_trylock(&adev->reset_domain->sem)) { 467 ret = amdgpu_kiq_rreg(adev, reg); 468 up_read(&adev->reset_domain->sem); 469 } else { 470 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 471 } 472 } else { 473 ret = adev->pcie_rreg(adev, reg * 4); 474 } 475 476 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); 477 478 return ret; 479 } 480 481 /* 482 * MMIO register read with bytes helper functions 483 * @offset:bytes offset from MMIO start 484 * 485 */ 486 487 /** 488 * amdgpu_mm_rreg8 - read a memory mapped IO register 489 * 490 * @adev: amdgpu_device pointer 491 * @offset: byte aligned register offset 492 * 493 * Returns the 8 bit value from the offset specified. 494 */ 495 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) 496 { 497 if (amdgpu_device_skip_hw_access(adev)) 498 return 0; 499 500 if (offset < adev->rmmio_size) 501 return (readb(adev->rmmio + offset)); 502 BUG(); 503 } 504 505 /* 506 * MMIO register write with bytes helper functions 507 * @offset:bytes offset from MMIO start 508 * @value: the value want to be written to the register 509 * 510 */ 511 /** 512 * amdgpu_mm_wreg8 - read a memory mapped IO register 513 * 514 * @adev: amdgpu_device pointer 515 * @offset: byte aligned register offset 516 * @value: 8 bit value to write 517 * 518 * Writes the value specified to the offset specified. 519 */ 520 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) 521 { 522 if (amdgpu_device_skip_hw_access(adev)) 523 return; 524 525 if (offset < adev->rmmio_size) 526 writeb(value, adev->rmmio + offset); 527 else 528 BUG(); 529 } 530 531 /** 532 * amdgpu_device_wreg - write to a memory mapped IO or indirect register 533 * 534 * @adev: amdgpu_device pointer 535 * @reg: dword aligned register offset 536 * @v: 32 bit value to write to the register 537 * @acc_flags: access flags which require special behavior 538 * 539 * Writes the value specified to the offset specified. 540 */ 541 void amdgpu_device_wreg(struct amdgpu_device *adev, 542 uint32_t reg, uint32_t v, 543 uint32_t acc_flags) 544 { 545 if (amdgpu_device_skip_hw_access(adev)) 546 return; 547 548 if ((reg * 4) < adev->rmmio_size) { 549 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && 550 amdgpu_sriov_runtime(adev) && 551 down_read_trylock(&adev->reset_domain->sem)) { 552 amdgpu_kiq_wreg(adev, reg, v); 553 up_read(&adev->reset_domain->sem); 554 } else { 555 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 556 } 557 } else { 558 adev->pcie_wreg(adev, reg * 4, v); 559 } 560 561 trace_amdgpu_device_wreg(adev->pdev->device, reg, v); 562 } 563 564 /** 565 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range 566 * 567 * @adev: amdgpu_device pointer 568 * @reg: mmio/rlc register 569 * @v: value to write 570 * 571 * this function is invoked only for the debugfs register access 572 */ 573 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, 574 uint32_t reg, uint32_t v) 575 { 576 if (amdgpu_device_skip_hw_access(adev)) 577 return; 578 579 if (amdgpu_sriov_fullaccess(adev) && 580 adev->gfx.rlc.funcs && 581 adev->gfx.rlc.funcs->is_rlcg_access_range) { 582 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) 583 return amdgpu_sriov_wreg(adev, reg, v, 0, 0); 584 } else if ((reg * 4) >= adev->rmmio_size) { 585 adev->pcie_wreg(adev, reg * 4, v); 586 } else { 587 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 588 } 589 } 590 591 /** 592 * amdgpu_mm_rdoorbell - read a doorbell dword 593 * 594 * @adev: amdgpu_device pointer 595 * @index: doorbell index 596 * 597 * Returns the value in the doorbell aperture at the 598 * requested doorbell index (CIK). 599 */ 600 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) 601 { 602 if (amdgpu_device_skip_hw_access(adev)) 603 return 0; 604 605 if (index < adev->doorbell.num_kernel_doorbells) { 606 return readl(adev->doorbell.ptr + index); 607 } else { 608 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 609 return 0; 610 } 611 } 612 613 /** 614 * amdgpu_mm_wdoorbell - write a doorbell dword 615 * 616 * @adev: amdgpu_device pointer 617 * @index: doorbell index 618 * @v: value to write 619 * 620 * Writes @v to the doorbell aperture at the 621 * requested doorbell index (CIK). 622 */ 623 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) 624 { 625 if (amdgpu_device_skip_hw_access(adev)) 626 return; 627 628 if (index < adev->doorbell.num_kernel_doorbells) { 629 writel(v, adev->doorbell.ptr + index); 630 } else { 631 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 632 } 633 } 634 635 /** 636 * amdgpu_mm_rdoorbell64 - read a doorbell Qword 637 * 638 * @adev: amdgpu_device pointer 639 * @index: doorbell index 640 * 641 * Returns the value in the doorbell aperture at the 642 * requested doorbell index (VEGA10+). 643 */ 644 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) 645 { 646 if (amdgpu_device_skip_hw_access(adev)) 647 return 0; 648 649 if (index < adev->doorbell.num_kernel_doorbells) { 650 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); 651 } else { 652 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 653 return 0; 654 } 655 } 656 657 /** 658 * amdgpu_mm_wdoorbell64 - write a doorbell Qword 659 * 660 * @adev: amdgpu_device pointer 661 * @index: doorbell index 662 * @v: value to write 663 * 664 * Writes @v to the doorbell aperture at the 665 * requested doorbell index (VEGA10+). 666 */ 667 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) 668 { 669 if (amdgpu_device_skip_hw_access(adev)) 670 return; 671 672 if (index < adev->doorbell.num_kernel_doorbells) { 673 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); 674 } else { 675 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 676 } 677 } 678 679 /** 680 * amdgpu_device_indirect_rreg - read an indirect register 681 * 682 * @adev: amdgpu_device pointer 683 * @reg_addr: indirect register address to read from 684 * 685 * Returns the value of indirect register @reg_addr 686 */ 687 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, 688 u32 reg_addr) 689 { 690 unsigned long flags, pcie_index, pcie_data; 691 void __iomem *pcie_index_offset; 692 void __iomem *pcie_data_offset; 693 u32 r; 694 695 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 696 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 697 698 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 699 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 700 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 701 702 writel(reg_addr, pcie_index_offset); 703 readl(pcie_index_offset); 704 r = readl(pcie_data_offset); 705 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 706 707 return r; 708 } 709 710 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, 711 u64 reg_addr) 712 { 713 unsigned long flags, pcie_index, pcie_index_hi, pcie_data; 714 u32 r; 715 void __iomem *pcie_index_offset; 716 void __iomem *pcie_index_hi_offset; 717 void __iomem *pcie_data_offset; 718 719 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 720 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 721 if (adev->nbio.funcs->get_pcie_index_hi_offset) 722 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); 723 else 724 pcie_index_hi = 0; 725 726 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 727 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 728 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 729 if (pcie_index_hi != 0) 730 pcie_index_hi_offset = (void __iomem *)adev->rmmio + 731 pcie_index_hi * 4; 732 733 writel(reg_addr, pcie_index_offset); 734 readl(pcie_index_offset); 735 if (pcie_index_hi != 0) { 736 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); 737 readl(pcie_index_hi_offset); 738 } 739 r = readl(pcie_data_offset); 740 741 /* clear the high bits */ 742 if (pcie_index_hi != 0) { 743 writel(0, pcie_index_hi_offset); 744 readl(pcie_index_hi_offset); 745 } 746 747 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 748 749 return r; 750 } 751 752 /** 753 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register 754 * 755 * @adev: amdgpu_device pointer 756 * @reg_addr: indirect register address to read from 757 * 758 * Returns the value of indirect register @reg_addr 759 */ 760 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, 761 u32 reg_addr) 762 { 763 unsigned long flags, pcie_index, pcie_data; 764 void __iomem *pcie_index_offset; 765 void __iomem *pcie_data_offset; 766 u64 r; 767 768 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 769 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 770 771 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 772 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 773 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 774 775 /* read low 32 bits */ 776 writel(reg_addr, pcie_index_offset); 777 readl(pcie_index_offset); 778 r = readl(pcie_data_offset); 779 /* read high 32 bits */ 780 writel(reg_addr + 4, pcie_index_offset); 781 readl(pcie_index_offset); 782 r |= ((u64)readl(pcie_data_offset) << 32); 783 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 784 785 return r; 786 } 787 788 /** 789 * amdgpu_device_indirect_wreg - write an indirect register address 790 * 791 * @adev: amdgpu_device pointer 792 * @pcie_index: mmio register offset 793 * @pcie_data: mmio register offset 794 * @reg_addr: indirect register offset 795 * @reg_data: indirect register data 796 * 797 */ 798 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, 799 u32 reg_addr, u32 reg_data) 800 { 801 unsigned long flags, pcie_index, pcie_data; 802 void __iomem *pcie_index_offset; 803 void __iomem *pcie_data_offset; 804 805 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 806 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 807 808 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 809 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 810 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 811 812 writel(reg_addr, pcie_index_offset); 813 readl(pcie_index_offset); 814 writel(reg_data, pcie_data_offset); 815 readl(pcie_data_offset); 816 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 817 } 818 819 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, 820 u64 reg_addr, u32 reg_data) 821 { 822 unsigned long flags, pcie_index, pcie_index_hi, pcie_data; 823 void __iomem *pcie_index_offset; 824 void __iomem *pcie_index_hi_offset; 825 void __iomem *pcie_data_offset; 826 827 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 828 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 829 if (adev->nbio.funcs->get_pcie_index_hi_offset) 830 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); 831 else 832 pcie_index_hi = 0; 833 834 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 835 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 836 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 837 if (pcie_index_hi != 0) 838 pcie_index_hi_offset = (void __iomem *)adev->rmmio + 839 pcie_index_hi * 4; 840 841 writel(reg_addr, pcie_index_offset); 842 readl(pcie_index_offset); 843 if (pcie_index_hi != 0) { 844 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); 845 readl(pcie_index_hi_offset); 846 } 847 writel(reg_data, pcie_data_offset); 848 readl(pcie_data_offset); 849 850 /* clear the high bits */ 851 if (pcie_index_hi != 0) { 852 writel(0, pcie_index_hi_offset); 853 readl(pcie_index_hi_offset); 854 } 855 856 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 857 } 858 859 /** 860 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address 861 * 862 * @adev: amdgpu_device pointer 863 * @pcie_index: mmio register offset 864 * @pcie_data: mmio register offset 865 * @reg_addr: indirect register offset 866 * @reg_data: indirect register data 867 * 868 */ 869 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, 870 u32 reg_addr, u64 reg_data) 871 { 872 unsigned long flags, pcie_index, pcie_data; 873 void __iomem *pcie_index_offset; 874 void __iomem *pcie_data_offset; 875 876 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 877 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 878 879 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 880 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 881 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 882 883 /* write low 32 bits */ 884 writel(reg_addr, pcie_index_offset); 885 readl(pcie_index_offset); 886 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset); 887 readl(pcie_data_offset); 888 /* write high 32 bits */ 889 writel(reg_addr + 4, pcie_index_offset); 890 readl(pcie_index_offset); 891 writel((u32)(reg_data >> 32), pcie_data_offset); 892 readl(pcie_data_offset); 893 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 894 } 895 896 /** 897 * amdgpu_device_get_rev_id - query device rev_id 898 * 899 * @adev: amdgpu_device pointer 900 * 901 * Return device rev_id 902 */ 903 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev) 904 { 905 return adev->nbio.funcs->get_rev_id(adev); 906 } 907 908 /** 909 * amdgpu_invalid_rreg - dummy reg read function 910 * 911 * @adev: amdgpu_device pointer 912 * @reg: offset of register 913 * 914 * Dummy register read function. Used for register blocks 915 * that certain asics don't have (all asics). 916 * Returns the value in the register. 917 */ 918 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 919 { 920 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 921 BUG(); 922 return 0; 923 } 924 925 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg) 926 { 927 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg); 928 BUG(); 929 return 0; 930 } 931 932 /** 933 * amdgpu_invalid_wreg - dummy reg write function 934 * 935 * @adev: amdgpu_device pointer 936 * @reg: offset of register 937 * @v: value to write to the register 938 * 939 * Dummy register read function. Used for register blocks 940 * that certain asics don't have (all asics). 941 */ 942 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 943 { 944 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 945 reg, v); 946 BUG(); 947 } 948 949 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v) 950 { 951 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n", 952 reg, v); 953 BUG(); 954 } 955 956 /** 957 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function 958 * 959 * @adev: amdgpu_device pointer 960 * @reg: offset of register 961 * 962 * Dummy register read function. Used for register blocks 963 * that certain asics don't have (all asics). 964 * Returns the value in the register. 965 */ 966 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) 967 { 968 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); 969 BUG(); 970 return 0; 971 } 972 973 /** 974 * amdgpu_invalid_wreg64 - dummy reg write function 975 * 976 * @adev: amdgpu_device pointer 977 * @reg: offset of register 978 * @v: value to write to the register 979 * 980 * Dummy register read function. Used for register blocks 981 * that certain asics don't have (all asics). 982 */ 983 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) 984 { 985 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", 986 reg, v); 987 BUG(); 988 } 989 990 /** 991 * amdgpu_block_invalid_rreg - dummy reg read function 992 * 993 * @adev: amdgpu_device pointer 994 * @block: offset of instance 995 * @reg: offset of register 996 * 997 * Dummy register read function. Used for register blocks 998 * that certain asics don't have (all asics). 999 * Returns the value in the register. 1000 */ 1001 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 1002 uint32_t block, uint32_t reg) 1003 { 1004 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 1005 reg, block); 1006 BUG(); 1007 return 0; 1008 } 1009 1010 /** 1011 * amdgpu_block_invalid_wreg - dummy reg write function 1012 * 1013 * @adev: amdgpu_device pointer 1014 * @block: offset of instance 1015 * @reg: offset of register 1016 * @v: value to write to the register 1017 * 1018 * Dummy register read function. Used for register blocks 1019 * that certain asics don't have (all asics). 1020 */ 1021 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 1022 uint32_t block, 1023 uint32_t reg, uint32_t v) 1024 { 1025 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 1026 reg, block, v); 1027 BUG(); 1028 } 1029 1030 /** 1031 * amdgpu_device_asic_init - Wrapper for atom asic_init 1032 * 1033 * @adev: amdgpu_device pointer 1034 * 1035 * Does any asic specific work and then calls atom asic init. 1036 */ 1037 static int amdgpu_device_asic_init(struct amdgpu_device *adev) 1038 { 1039 amdgpu_asic_pre_asic_init(adev); 1040 1041 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) || 1042 adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) 1043 return amdgpu_atomfirmware_asic_init(adev, true); 1044 else 1045 return amdgpu_atom_asic_init(adev->mode_info.atom_context); 1046 } 1047 1048 /** 1049 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page 1050 * 1051 * @adev: amdgpu_device pointer 1052 * 1053 * Allocates a scratch page of VRAM for use by various things in the 1054 * driver. 1055 */ 1056 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev) 1057 { 1058 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE, 1059 AMDGPU_GEM_DOMAIN_VRAM | 1060 AMDGPU_GEM_DOMAIN_GTT, 1061 &adev->mem_scratch.robj, 1062 &adev->mem_scratch.gpu_addr, 1063 (void **)&adev->mem_scratch.ptr); 1064 } 1065 1066 /** 1067 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page 1068 * 1069 * @adev: amdgpu_device pointer 1070 * 1071 * Frees the VRAM scratch page. 1072 */ 1073 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev) 1074 { 1075 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL); 1076 } 1077 1078 /** 1079 * amdgpu_device_program_register_sequence - program an array of registers. 1080 * 1081 * @adev: amdgpu_device pointer 1082 * @registers: pointer to the register array 1083 * @array_size: size of the register array 1084 * 1085 * Programs an array or registers with and and or masks. 1086 * This is a helper for setting golden registers. 1087 */ 1088 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 1089 const u32 *registers, 1090 const u32 array_size) 1091 { 1092 u32 tmp, reg, and_mask, or_mask; 1093 int i; 1094 1095 if (array_size % 3) 1096 return; 1097 1098 for (i = 0; i < array_size; i += 3) { 1099 reg = registers[i + 0]; 1100 and_mask = registers[i + 1]; 1101 or_mask = registers[i + 2]; 1102 1103 if (and_mask == 0xffffffff) { 1104 tmp = or_mask; 1105 } else { 1106 tmp = RREG32(reg); 1107 tmp &= ~and_mask; 1108 if (adev->family >= AMDGPU_FAMILY_AI) 1109 tmp |= (or_mask & and_mask); 1110 else 1111 tmp |= or_mask; 1112 } 1113 WREG32(reg, tmp); 1114 } 1115 } 1116 1117 /** 1118 * amdgpu_device_pci_config_reset - reset the GPU 1119 * 1120 * @adev: amdgpu_device pointer 1121 * 1122 * Resets the GPU using the pci config reset sequence. 1123 * Only applicable to asics prior to vega10. 1124 */ 1125 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 1126 { 1127 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 1128 } 1129 1130 /** 1131 * amdgpu_device_pci_reset - reset the GPU using generic PCI means 1132 * 1133 * @adev: amdgpu_device pointer 1134 * 1135 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.). 1136 */ 1137 int amdgpu_device_pci_reset(struct amdgpu_device *adev) 1138 { 1139 return pci_reset_function(adev->pdev); 1140 } 1141 1142 /* 1143 * GPU doorbell aperture helpers function. 1144 */ 1145 /** 1146 * amdgpu_device_doorbell_init - Init doorbell driver information. 1147 * 1148 * @adev: amdgpu_device pointer 1149 * 1150 * Init doorbell driver information (CIK) 1151 * Returns 0 on success, error on failure. 1152 */ 1153 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) 1154 { 1155 1156 /* No doorbell on SI hardware generation */ 1157 if (adev->asic_type < CHIP_BONAIRE) { 1158 adev->doorbell.base = 0; 1159 adev->doorbell.size = 0; 1160 adev->doorbell.num_kernel_doorbells = 0; 1161 adev->doorbell.ptr = NULL; 1162 return 0; 1163 } 1164 1165 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) 1166 return -EINVAL; 1167 1168 amdgpu_asic_init_doorbell_index(adev); 1169 1170 /* doorbell bar mapping */ 1171 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 1172 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 1173 1174 if (adev->enable_mes) { 1175 adev->doorbell.num_kernel_doorbells = 1176 adev->doorbell.size / sizeof(u32); 1177 } else { 1178 adev->doorbell.num_kernel_doorbells = 1179 min_t(u32, adev->doorbell.size / sizeof(u32), 1180 adev->doorbell_index.max_assignment+1); 1181 if (adev->doorbell.num_kernel_doorbells == 0) 1182 return -EINVAL; 1183 1184 /* For Vega, reserve and map two pages on doorbell BAR since SDMA 1185 * paging queue doorbell use the second page. The 1186 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the 1187 * doorbells are in the first page. So with paging queue enabled, 1188 * the max num_kernel_doorbells should + 1 page (0x400 in dword) 1189 */ 1190 if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) && 1191 adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0)) 1192 adev->doorbell.num_kernel_doorbells += 0x400; 1193 } 1194 1195 adev->doorbell.ptr = ioremap(adev->doorbell.base, 1196 adev->doorbell.num_kernel_doorbells * 1197 sizeof(u32)); 1198 if (adev->doorbell.ptr == NULL) 1199 return -ENOMEM; 1200 1201 return 0; 1202 } 1203 1204 /** 1205 * amdgpu_device_doorbell_fini - Tear down doorbell driver information. 1206 * 1207 * @adev: amdgpu_device pointer 1208 * 1209 * Tear down doorbell driver information (CIK) 1210 */ 1211 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) 1212 { 1213 iounmap(adev->doorbell.ptr); 1214 adev->doorbell.ptr = NULL; 1215 } 1216 1217 1218 1219 /* 1220 * amdgpu_device_wb_*() 1221 * Writeback is the method by which the GPU updates special pages in memory 1222 * with the status of certain GPU events (fences, ring pointers,etc.). 1223 */ 1224 1225 /** 1226 * amdgpu_device_wb_fini - Disable Writeback and free memory 1227 * 1228 * @adev: amdgpu_device pointer 1229 * 1230 * Disables Writeback and frees the Writeback memory (all asics). 1231 * Used at driver shutdown. 1232 */ 1233 static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 1234 { 1235 if (adev->wb.wb_obj) { 1236 amdgpu_bo_free_kernel(&adev->wb.wb_obj, 1237 &adev->wb.gpu_addr, 1238 (void **)&adev->wb.wb); 1239 adev->wb.wb_obj = NULL; 1240 } 1241 } 1242 1243 /** 1244 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory 1245 * 1246 * @adev: amdgpu_device pointer 1247 * 1248 * Initializes writeback and allocates writeback memory (all asics). 1249 * Used at driver startup. 1250 * Returns 0 on success or an -error on failure. 1251 */ 1252 static int amdgpu_device_wb_init(struct amdgpu_device *adev) 1253 { 1254 int r; 1255 1256 if (adev->wb.wb_obj == NULL) { 1257 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 1258 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 1259 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1260 &adev->wb.wb_obj, &adev->wb.gpu_addr, 1261 (void **)&adev->wb.wb); 1262 if (r) { 1263 dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 1264 return r; 1265 } 1266 1267 adev->wb.num_wb = AMDGPU_MAX_WB; 1268 memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 1269 1270 /* clear wb memory */ 1271 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 1272 } 1273 1274 return 0; 1275 } 1276 1277 /** 1278 * amdgpu_device_wb_get - Allocate a wb entry 1279 * 1280 * @adev: amdgpu_device pointer 1281 * @wb: wb index 1282 * 1283 * Allocate a wb slot for use by the driver (all asics). 1284 * Returns 0 on success or -EINVAL on failure. 1285 */ 1286 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 1287 { 1288 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 1289 1290 if (offset < adev->wb.num_wb) { 1291 __set_bit(offset, adev->wb.used); 1292 *wb = offset << 3; /* convert to dw offset */ 1293 return 0; 1294 } else { 1295 return -EINVAL; 1296 } 1297 } 1298 1299 /** 1300 * amdgpu_device_wb_free - Free a wb entry 1301 * 1302 * @adev: amdgpu_device pointer 1303 * @wb: wb index 1304 * 1305 * Free a wb slot allocated for use by the driver (all asics) 1306 */ 1307 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 1308 { 1309 wb >>= 3; 1310 if (wb < adev->wb.num_wb) 1311 __clear_bit(wb, adev->wb.used); 1312 } 1313 1314 /** 1315 * amdgpu_device_resize_fb_bar - try to resize FB BAR 1316 * 1317 * @adev: amdgpu_device pointer 1318 * 1319 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 1320 * to fail, but if any of the BARs is not accessible after the size we abort 1321 * driver loading by returning -ENODEV. 1322 */ 1323 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 1324 { 1325 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size); 1326 struct pci_bus *root; 1327 struct resource *res; 1328 unsigned i; 1329 u16 cmd; 1330 int r; 1331 1332 /* Bypass for VF */ 1333 if (amdgpu_sriov_vf(adev)) 1334 return 0; 1335 1336 /* skip if the bios has already enabled large BAR */ 1337 if (adev->gmc.real_vram_size && 1338 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size)) 1339 return 0; 1340 1341 /* Check if the root BUS has 64bit memory resources */ 1342 root = adev->pdev->bus; 1343 while (root->parent) 1344 root = root->parent; 1345 1346 pci_bus_for_each_resource(root, res, i) { 1347 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 1348 res->start > 0x100000000ull) 1349 break; 1350 } 1351 1352 /* Trying to resize is pointless without a root hub window above 4GB */ 1353 if (!res) 1354 return 0; 1355 1356 /* Limit the BAR size to what is available */ 1357 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1, 1358 rbar_size); 1359 1360 /* Disable memory decoding while we change the BAR addresses and size */ 1361 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 1362 pci_write_config_word(adev->pdev, PCI_COMMAND, 1363 cmd & ~PCI_COMMAND_MEMORY); 1364 1365 /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 1366 amdgpu_device_doorbell_fini(adev); 1367 if (adev->asic_type >= CHIP_BONAIRE) 1368 pci_release_resource(adev->pdev, 2); 1369 1370 pci_release_resource(adev->pdev, 0); 1371 1372 r = pci_resize_resource(adev->pdev, 0, rbar_size); 1373 if (r == -ENOSPC) 1374 DRM_INFO("Not enough PCI address space for a large BAR."); 1375 else if (r && r != -ENOTSUPP) 1376 DRM_ERROR("Problem resizing BAR0 (%d).", r); 1377 1378 pci_assign_unassigned_bus_resources(adev->pdev->bus); 1379 1380 /* When the doorbell or fb BAR isn't available we have no chance of 1381 * using the device. 1382 */ 1383 r = amdgpu_device_doorbell_init(adev); 1384 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 1385 return -ENODEV; 1386 1387 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 1388 1389 return 0; 1390 } 1391 1392 /* 1393 * GPU helpers function. 1394 */ 1395 /** 1396 * amdgpu_device_need_post - check if the hw need post or not 1397 * 1398 * @adev: amdgpu_device pointer 1399 * 1400 * Check if the asic has been initialized (all asics) at driver startup 1401 * or post is needed if hw reset is performed. 1402 * Returns true if need or false if not. 1403 */ 1404 bool amdgpu_device_need_post(struct amdgpu_device *adev) 1405 { 1406 uint32_t reg; 1407 1408 if (amdgpu_sriov_vf(adev)) 1409 return false; 1410 1411 if (amdgpu_passthrough(adev)) { 1412 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 1413 * some old smc fw still need driver do vPost otherwise gpu hang, while 1414 * those smc fw version above 22.15 doesn't have this flaw, so we force 1415 * vpost executed for smc version below 22.15 1416 */ 1417 if (adev->asic_type == CHIP_FIJI) { 1418 int err; 1419 uint32_t fw_ver; 1420 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 1421 /* force vPost if error occured */ 1422 if (err) 1423 return true; 1424 1425 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 1426 if (fw_ver < 0x00160e00) 1427 return true; 1428 } 1429 } 1430 1431 /* Don't post if we need to reset whole hive on init */ 1432 if (adev->gmc.xgmi.pending_reset) 1433 return false; 1434 1435 if (adev->has_hw_reset) { 1436 adev->has_hw_reset = false; 1437 return true; 1438 } 1439 1440 /* bios scratch used on CIK+ */ 1441 if (adev->asic_type >= CHIP_BONAIRE) 1442 return amdgpu_atombios_scratch_need_asic_init(adev); 1443 1444 /* check MEM_SIZE for older asics */ 1445 reg = amdgpu_asic_get_config_memsize(adev); 1446 1447 if ((reg != 0) && (reg != 0xffffffff)) 1448 return false; 1449 1450 return true; 1451 } 1452 1453 /** 1454 * amdgpu_device_should_use_aspm - check if the device should program ASPM 1455 * 1456 * @adev: amdgpu_device pointer 1457 * 1458 * Confirm whether the module parameter and pcie bridge agree that ASPM should 1459 * be set for this device. 1460 * 1461 * Returns true if it should be used or false if not. 1462 */ 1463 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) 1464 { 1465 switch (amdgpu_aspm) { 1466 case -1: 1467 break; 1468 case 0: 1469 return false; 1470 case 1: 1471 return true; 1472 default: 1473 return false; 1474 } 1475 return pcie_aspm_enabled(adev->pdev); 1476 } 1477 1478 bool amdgpu_device_aspm_support_quirk(void) 1479 { 1480 #if IS_ENABLED(CONFIG_X86) 1481 struct cpuinfo_x86 *c = &cpu_data(0); 1482 1483 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE); 1484 #else 1485 return true; 1486 #endif 1487 } 1488 1489 /* if we get transitioned to only one device, take VGA back */ 1490 /** 1491 * amdgpu_device_vga_set_decode - enable/disable vga decode 1492 * 1493 * @pdev: PCI device pointer 1494 * @state: enable/disable vga decode 1495 * 1496 * Enable/disable vga decode (all asics). 1497 * Returns VGA resource flags. 1498 */ 1499 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev, 1500 bool state) 1501 { 1502 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev)); 1503 amdgpu_asic_set_vga_state(adev, state); 1504 if (state) 1505 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 1506 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1507 else 1508 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1509 } 1510 1511 /** 1512 * amdgpu_device_check_block_size - validate the vm block size 1513 * 1514 * @adev: amdgpu_device pointer 1515 * 1516 * Validates the vm block size specified via module parameter. 1517 * The vm block size defines number of bits in page table versus page directory, 1518 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1519 * page table and the remaining bits are in the page directory. 1520 */ 1521 static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 1522 { 1523 /* defines number of bits in page table versus page directory, 1524 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1525 * page table and the remaining bits are in the page directory */ 1526 if (amdgpu_vm_block_size == -1) 1527 return; 1528 1529 if (amdgpu_vm_block_size < 9) { 1530 dev_warn(adev->dev, "VM page table size (%d) too small\n", 1531 amdgpu_vm_block_size); 1532 amdgpu_vm_block_size = -1; 1533 } 1534 } 1535 1536 /** 1537 * amdgpu_device_check_vm_size - validate the vm size 1538 * 1539 * @adev: amdgpu_device pointer 1540 * 1541 * Validates the vm size in GB specified via module parameter. 1542 * The VM size is the size of the GPU virtual memory space in GB. 1543 */ 1544 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 1545 { 1546 /* no need to check the default value */ 1547 if (amdgpu_vm_size == -1) 1548 return; 1549 1550 if (amdgpu_vm_size < 1) { 1551 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 1552 amdgpu_vm_size); 1553 amdgpu_vm_size = -1; 1554 } 1555 } 1556 1557 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 1558 { 1559 struct sysinfo si; 1560 bool is_os_64 = (sizeof(void *) == 8); 1561 uint64_t total_memory; 1562 uint64_t dram_size_seven_GB = 0x1B8000000; 1563 uint64_t dram_size_three_GB = 0xB8000000; 1564 1565 if (amdgpu_smu_memory_pool_size == 0) 1566 return; 1567 1568 if (!is_os_64) { 1569 DRM_WARN("Not 64-bit OS, feature not supported\n"); 1570 goto def_value; 1571 } 1572 si_meminfo(&si); 1573 total_memory = (uint64_t)si.totalram * si.mem_unit; 1574 1575 if ((amdgpu_smu_memory_pool_size == 1) || 1576 (amdgpu_smu_memory_pool_size == 2)) { 1577 if (total_memory < dram_size_three_GB) 1578 goto def_value1; 1579 } else if ((amdgpu_smu_memory_pool_size == 4) || 1580 (amdgpu_smu_memory_pool_size == 8)) { 1581 if (total_memory < dram_size_seven_GB) 1582 goto def_value1; 1583 } else { 1584 DRM_WARN("Smu memory pool size not supported\n"); 1585 goto def_value; 1586 } 1587 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 1588 1589 return; 1590 1591 def_value1: 1592 DRM_WARN("No enough system memory\n"); 1593 def_value: 1594 adev->pm.smu_prv_buffer_size = 0; 1595 } 1596 1597 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) 1598 { 1599 if (!(adev->flags & AMD_IS_APU) || 1600 adev->asic_type < CHIP_RAVEN) 1601 return 0; 1602 1603 switch (adev->asic_type) { 1604 case CHIP_RAVEN: 1605 if (adev->pdev->device == 0x15dd) 1606 adev->apu_flags |= AMD_APU_IS_RAVEN; 1607 if (adev->pdev->device == 0x15d8) 1608 adev->apu_flags |= AMD_APU_IS_PICASSO; 1609 break; 1610 case CHIP_RENOIR: 1611 if ((adev->pdev->device == 0x1636) || 1612 (adev->pdev->device == 0x164c)) 1613 adev->apu_flags |= AMD_APU_IS_RENOIR; 1614 else 1615 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; 1616 break; 1617 case CHIP_VANGOGH: 1618 adev->apu_flags |= AMD_APU_IS_VANGOGH; 1619 break; 1620 case CHIP_YELLOW_CARP: 1621 break; 1622 case CHIP_CYAN_SKILLFISH: 1623 if ((adev->pdev->device == 0x13FE) || 1624 (adev->pdev->device == 0x143F)) 1625 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2; 1626 break; 1627 default: 1628 break; 1629 } 1630 1631 return 0; 1632 } 1633 1634 /** 1635 * amdgpu_device_check_arguments - validate module params 1636 * 1637 * @adev: amdgpu_device pointer 1638 * 1639 * Validates certain module parameters and updates 1640 * the associated values used by the driver (all asics). 1641 */ 1642 static int amdgpu_device_check_arguments(struct amdgpu_device *adev) 1643 { 1644 if (amdgpu_sched_jobs < 4) { 1645 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 1646 amdgpu_sched_jobs); 1647 amdgpu_sched_jobs = 4; 1648 } else if (!is_power_of_2(amdgpu_sched_jobs)) { 1649 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 1650 amdgpu_sched_jobs); 1651 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 1652 } 1653 1654 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 1655 /* gart size must be greater or equal to 32M */ 1656 dev_warn(adev->dev, "gart size (%d) too small\n", 1657 amdgpu_gart_size); 1658 amdgpu_gart_size = -1; 1659 } 1660 1661 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 1662 /* gtt size must be greater or equal to 32M */ 1663 dev_warn(adev->dev, "gtt size (%d) too small\n", 1664 amdgpu_gtt_size); 1665 amdgpu_gtt_size = -1; 1666 } 1667 1668 /* valid range is between 4 and 9 inclusive */ 1669 if (amdgpu_vm_fragment_size != -1 && 1670 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 1671 dev_warn(adev->dev, "valid range is between 4 and 9\n"); 1672 amdgpu_vm_fragment_size = -1; 1673 } 1674 1675 if (amdgpu_sched_hw_submission < 2) { 1676 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n", 1677 amdgpu_sched_hw_submission); 1678 amdgpu_sched_hw_submission = 2; 1679 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) { 1680 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n", 1681 amdgpu_sched_hw_submission); 1682 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission); 1683 } 1684 1685 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) { 1686 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n"); 1687 amdgpu_reset_method = -1; 1688 } 1689 1690 amdgpu_device_check_smu_prv_buffer_size(adev); 1691 1692 amdgpu_device_check_vm_size(adev); 1693 1694 amdgpu_device_check_block_size(adev); 1695 1696 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 1697 1698 return 0; 1699 } 1700 1701 /** 1702 * amdgpu_switcheroo_set_state - set switcheroo state 1703 * 1704 * @pdev: pci dev pointer 1705 * @state: vga_switcheroo state 1706 * 1707 * Callback for the switcheroo driver. Suspends or resumes 1708 * the asics before or after it is powered up using ACPI methods. 1709 */ 1710 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, 1711 enum vga_switcheroo_state state) 1712 { 1713 struct drm_device *dev = pci_get_drvdata(pdev); 1714 int r; 1715 1716 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF) 1717 return; 1718 1719 if (state == VGA_SWITCHEROO_ON) { 1720 pr_info("switched on\n"); 1721 /* don't suspend or resume card normally */ 1722 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1723 1724 pci_set_power_state(pdev, PCI_D0); 1725 amdgpu_device_load_pci_state(pdev); 1726 r = pci_enable_device(pdev); 1727 if (r) 1728 DRM_WARN("pci_enable_device failed (%d)\n", r); 1729 amdgpu_device_resume(dev, true); 1730 1731 dev->switch_power_state = DRM_SWITCH_POWER_ON; 1732 } else { 1733 pr_info("switched off\n"); 1734 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1735 amdgpu_device_suspend(dev, true); 1736 amdgpu_device_cache_pci_state(pdev); 1737 /* Shut down the device */ 1738 pci_disable_device(pdev); 1739 pci_set_power_state(pdev, PCI_D3cold); 1740 dev->switch_power_state = DRM_SWITCH_POWER_OFF; 1741 } 1742 } 1743 1744 /** 1745 * amdgpu_switcheroo_can_switch - see if switcheroo state can change 1746 * 1747 * @pdev: pci dev pointer 1748 * 1749 * Callback for the switcheroo driver. Check of the switcheroo 1750 * state can be changed. 1751 * Returns true if the state can be changed, false if not. 1752 */ 1753 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 1754 { 1755 struct drm_device *dev = pci_get_drvdata(pdev); 1756 1757 /* 1758 * FIXME: open_count is protected by drm_global_mutex but that would lead to 1759 * locking inversion with the driver load path. And the access here is 1760 * completely racy anyway. So don't bother with locking for now. 1761 */ 1762 return atomic_read(&dev->open_count) == 0; 1763 } 1764 1765 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 1766 .set_gpu_state = amdgpu_switcheroo_set_state, 1767 .reprobe = NULL, 1768 .can_switch = amdgpu_switcheroo_can_switch, 1769 }; 1770 1771 /** 1772 * amdgpu_device_ip_set_clockgating_state - set the CG state 1773 * 1774 * @dev: amdgpu_device pointer 1775 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1776 * @state: clockgating state (gate or ungate) 1777 * 1778 * Sets the requested clockgating state for all instances of 1779 * the hardware IP specified. 1780 * Returns the error code from the last instance. 1781 */ 1782 int amdgpu_device_ip_set_clockgating_state(void *dev, 1783 enum amd_ip_block_type block_type, 1784 enum amd_clockgating_state state) 1785 { 1786 struct amdgpu_device *adev = dev; 1787 int i, r = 0; 1788 1789 for (i = 0; i < adev->num_ip_blocks; i++) { 1790 if (!adev->ip_blocks[i].status.valid) 1791 continue; 1792 if (adev->ip_blocks[i].version->type != block_type) 1793 continue; 1794 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1795 continue; 1796 r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1797 (void *)adev, state); 1798 if (r) 1799 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1800 adev->ip_blocks[i].version->funcs->name, r); 1801 } 1802 return r; 1803 } 1804 1805 /** 1806 * amdgpu_device_ip_set_powergating_state - set the PG state 1807 * 1808 * @dev: amdgpu_device pointer 1809 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1810 * @state: powergating state (gate or ungate) 1811 * 1812 * Sets the requested powergating state for all instances of 1813 * the hardware IP specified. 1814 * Returns the error code from the last instance. 1815 */ 1816 int amdgpu_device_ip_set_powergating_state(void *dev, 1817 enum amd_ip_block_type block_type, 1818 enum amd_powergating_state state) 1819 { 1820 struct amdgpu_device *adev = dev; 1821 int i, r = 0; 1822 1823 for (i = 0; i < adev->num_ip_blocks; i++) { 1824 if (!adev->ip_blocks[i].status.valid) 1825 continue; 1826 if (adev->ip_blocks[i].version->type != block_type) 1827 continue; 1828 if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1829 continue; 1830 r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1831 (void *)adev, state); 1832 if (r) 1833 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1834 adev->ip_blocks[i].version->funcs->name, r); 1835 } 1836 return r; 1837 } 1838 1839 /** 1840 * amdgpu_device_ip_get_clockgating_state - get the CG state 1841 * 1842 * @adev: amdgpu_device pointer 1843 * @flags: clockgating feature flags 1844 * 1845 * Walks the list of IPs on the device and updates the clockgating 1846 * flags for each IP. 1847 * Updates @flags with the feature flags for each hardware IP where 1848 * clockgating is enabled. 1849 */ 1850 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1851 u64 *flags) 1852 { 1853 int i; 1854 1855 for (i = 0; i < adev->num_ip_blocks; i++) { 1856 if (!adev->ip_blocks[i].status.valid) 1857 continue; 1858 if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1859 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1860 } 1861 } 1862 1863 /** 1864 * amdgpu_device_ip_wait_for_idle - wait for idle 1865 * 1866 * @adev: amdgpu_device pointer 1867 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1868 * 1869 * Waits for the request hardware IP to be idle. 1870 * Returns 0 for success or a negative error code on failure. 1871 */ 1872 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1873 enum amd_ip_block_type block_type) 1874 { 1875 int i, r; 1876 1877 for (i = 0; i < adev->num_ip_blocks; i++) { 1878 if (!adev->ip_blocks[i].status.valid) 1879 continue; 1880 if (adev->ip_blocks[i].version->type == block_type) { 1881 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1882 if (r) 1883 return r; 1884 break; 1885 } 1886 } 1887 return 0; 1888 1889 } 1890 1891 /** 1892 * amdgpu_device_ip_is_idle - is the hardware IP idle 1893 * 1894 * @adev: amdgpu_device pointer 1895 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1896 * 1897 * Check if the hardware IP is idle or not. 1898 * Returns true if it the IP is idle, false if not. 1899 */ 1900 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1901 enum amd_ip_block_type block_type) 1902 { 1903 int i; 1904 1905 for (i = 0; i < adev->num_ip_blocks; i++) { 1906 if (!adev->ip_blocks[i].status.valid) 1907 continue; 1908 if (adev->ip_blocks[i].version->type == block_type) 1909 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1910 } 1911 return true; 1912 1913 } 1914 1915 /** 1916 * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1917 * 1918 * @adev: amdgpu_device pointer 1919 * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1920 * 1921 * Returns a pointer to the hardware IP block structure 1922 * if it exists for the asic, otherwise NULL. 1923 */ 1924 struct amdgpu_ip_block * 1925 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1926 enum amd_ip_block_type type) 1927 { 1928 int i; 1929 1930 for (i = 0; i < adev->num_ip_blocks; i++) 1931 if (adev->ip_blocks[i].version->type == type) 1932 return &adev->ip_blocks[i]; 1933 1934 return NULL; 1935 } 1936 1937 /** 1938 * amdgpu_device_ip_block_version_cmp 1939 * 1940 * @adev: amdgpu_device pointer 1941 * @type: enum amd_ip_block_type 1942 * @major: major version 1943 * @minor: minor version 1944 * 1945 * return 0 if equal or greater 1946 * return 1 if smaller or the ip_block doesn't exist 1947 */ 1948 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1949 enum amd_ip_block_type type, 1950 u32 major, u32 minor) 1951 { 1952 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1953 1954 if (ip_block && ((ip_block->version->major > major) || 1955 ((ip_block->version->major == major) && 1956 (ip_block->version->minor >= minor)))) 1957 return 0; 1958 1959 return 1; 1960 } 1961 1962 /** 1963 * amdgpu_device_ip_block_add 1964 * 1965 * @adev: amdgpu_device pointer 1966 * @ip_block_version: pointer to the IP to add 1967 * 1968 * Adds the IP block driver information to the collection of IPs 1969 * on the asic. 1970 */ 1971 int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1972 const struct amdgpu_ip_block_version *ip_block_version) 1973 { 1974 if (!ip_block_version) 1975 return -EINVAL; 1976 1977 switch (ip_block_version->type) { 1978 case AMD_IP_BLOCK_TYPE_VCN: 1979 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK) 1980 return 0; 1981 break; 1982 case AMD_IP_BLOCK_TYPE_JPEG: 1983 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK) 1984 return 0; 1985 break; 1986 default: 1987 break; 1988 } 1989 1990 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1991 ip_block_version->funcs->name); 1992 1993 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1994 1995 return 0; 1996 } 1997 1998 /** 1999 * amdgpu_device_enable_virtual_display - enable virtual display feature 2000 * 2001 * @adev: amdgpu_device pointer 2002 * 2003 * Enabled the virtual display feature if the user has enabled it via 2004 * the module parameter virtual_display. This feature provides a virtual 2005 * display hardware on headless boards or in virtualized environments. 2006 * This function parses and validates the configuration string specified by 2007 * the user and configues the virtual display configuration (number of 2008 * virtual connectors, crtcs, etc.) specified. 2009 */ 2010 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 2011 { 2012 adev->enable_virtual_display = false; 2013 2014 if (amdgpu_virtual_display) { 2015 const char *pci_address_name = pci_name(adev->pdev); 2016 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 2017 2018 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 2019 pciaddstr_tmp = pciaddstr; 2020 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 2021 pciaddname = strsep(&pciaddname_tmp, ","); 2022 if (!strcmp("all", pciaddname) 2023 || !strcmp(pci_address_name, pciaddname)) { 2024 long num_crtc; 2025 int res = -1; 2026 2027 adev->enable_virtual_display = true; 2028 2029 if (pciaddname_tmp) 2030 res = kstrtol(pciaddname_tmp, 10, 2031 &num_crtc); 2032 2033 if (!res) { 2034 if (num_crtc < 1) 2035 num_crtc = 1; 2036 if (num_crtc > 6) 2037 num_crtc = 6; 2038 adev->mode_info.num_crtc = num_crtc; 2039 } else { 2040 adev->mode_info.num_crtc = 1; 2041 } 2042 break; 2043 } 2044 } 2045 2046 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 2047 amdgpu_virtual_display, pci_address_name, 2048 adev->enable_virtual_display, adev->mode_info.num_crtc); 2049 2050 kfree(pciaddstr); 2051 } 2052 } 2053 2054 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) 2055 { 2056 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) { 2057 adev->mode_info.num_crtc = 1; 2058 adev->enable_virtual_display = true; 2059 DRM_INFO("virtual_display:%d, num_crtc:%d\n", 2060 adev->enable_virtual_display, adev->mode_info.num_crtc); 2061 } 2062 } 2063 2064 /** 2065 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 2066 * 2067 * @adev: amdgpu_device pointer 2068 * 2069 * Parses the asic configuration parameters specified in the gpu info 2070 * firmware and makes them availale to the driver for use in configuring 2071 * the asic. 2072 * Returns 0 on success, -EINVAL on failure. 2073 */ 2074 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 2075 { 2076 const char *chip_name; 2077 char fw_name[40]; 2078 int err; 2079 const struct gpu_info_firmware_header_v1_0 *hdr; 2080 2081 adev->firmware.gpu_info_fw = NULL; 2082 2083 if (adev->mman.discovery_bin) { 2084 /* 2085 * FIXME: The bounding box is still needed by Navi12, so 2086 * temporarily read it from gpu_info firmware. Should be dropped 2087 * when DAL no longer needs it. 2088 */ 2089 if (adev->asic_type != CHIP_NAVI12) 2090 return 0; 2091 } 2092 2093 switch (adev->asic_type) { 2094 default: 2095 return 0; 2096 case CHIP_VEGA10: 2097 chip_name = "vega10"; 2098 break; 2099 case CHIP_VEGA12: 2100 chip_name = "vega12"; 2101 break; 2102 case CHIP_RAVEN: 2103 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 2104 chip_name = "raven2"; 2105 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 2106 chip_name = "picasso"; 2107 else 2108 chip_name = "raven"; 2109 break; 2110 case CHIP_ARCTURUS: 2111 chip_name = "arcturus"; 2112 break; 2113 case CHIP_NAVI12: 2114 chip_name = "navi12"; 2115 break; 2116 } 2117 2118 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 2119 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name); 2120 if (err) { 2121 dev_err(adev->dev, 2122 "Failed to get gpu_info firmware \"%s\"\n", 2123 fw_name); 2124 goto out; 2125 } 2126 2127 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 2128 amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 2129 2130 switch (hdr->version_major) { 2131 case 1: 2132 { 2133 const struct gpu_info_firmware_v1_0 *gpu_info_fw = 2134 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 2135 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2136 2137 /* 2138 * Should be droped when DAL no longer needs it. 2139 */ 2140 if (adev->asic_type == CHIP_NAVI12) 2141 goto parse_soc_bounding_box; 2142 2143 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 2144 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 2145 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 2146 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 2147 adev->gfx.config.max_texture_channel_caches = 2148 le32_to_cpu(gpu_info_fw->gc_num_tccs); 2149 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 2150 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 2151 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 2152 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 2153 adev->gfx.config.double_offchip_lds_buf = 2154 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 2155 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 2156 adev->gfx.cu_info.max_waves_per_simd = 2157 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 2158 adev->gfx.cu_info.max_scratch_slots_per_cu = 2159 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 2160 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 2161 if (hdr->version_minor >= 1) { 2162 const struct gpu_info_firmware_v1_1 *gpu_info_fw = 2163 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data + 2164 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2165 adev->gfx.config.num_sc_per_sh = 2166 le32_to_cpu(gpu_info_fw->num_sc_per_sh); 2167 adev->gfx.config.num_packer_per_sc = 2168 le32_to_cpu(gpu_info_fw->num_packer_per_sc); 2169 } 2170 2171 parse_soc_bounding_box: 2172 /* 2173 * soc bounding box info is not integrated in disocovery table, 2174 * we always need to parse it from gpu info firmware if needed. 2175 */ 2176 if (hdr->version_minor == 2) { 2177 const struct gpu_info_firmware_v1_2 *gpu_info_fw = 2178 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + 2179 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2180 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; 2181 } 2182 break; 2183 } 2184 default: 2185 dev_err(adev->dev, 2186 "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 2187 err = -EINVAL; 2188 goto out; 2189 } 2190 out: 2191 return err; 2192 } 2193 2194 /** 2195 * amdgpu_device_ip_early_init - run early init for hardware IPs 2196 * 2197 * @adev: amdgpu_device pointer 2198 * 2199 * Early initialization pass for hardware IPs. The hardware IPs that make 2200 * up each asic are discovered each IP's early_init callback is run. This 2201 * is the first stage in initializing the asic. 2202 * Returns 0 on success, negative error code on failure. 2203 */ 2204 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 2205 { 2206 struct drm_device *dev = adev_to_drm(adev); 2207 struct pci_dev *parent; 2208 int i, r; 2209 bool total; 2210 2211 amdgpu_device_enable_virtual_display(adev); 2212 2213 if (amdgpu_sriov_vf(adev)) { 2214 r = amdgpu_virt_request_full_gpu(adev, true); 2215 if (r) 2216 return r; 2217 } 2218 2219 switch (adev->asic_type) { 2220 #ifdef CONFIG_DRM_AMDGPU_SI 2221 case CHIP_VERDE: 2222 case CHIP_TAHITI: 2223 case CHIP_PITCAIRN: 2224 case CHIP_OLAND: 2225 case CHIP_HAINAN: 2226 adev->family = AMDGPU_FAMILY_SI; 2227 r = si_set_ip_blocks(adev); 2228 if (r) 2229 return r; 2230 break; 2231 #endif 2232 #ifdef CONFIG_DRM_AMDGPU_CIK 2233 case CHIP_BONAIRE: 2234 case CHIP_HAWAII: 2235 case CHIP_KAVERI: 2236 case CHIP_KABINI: 2237 case CHIP_MULLINS: 2238 if (adev->flags & AMD_IS_APU) 2239 adev->family = AMDGPU_FAMILY_KV; 2240 else 2241 adev->family = AMDGPU_FAMILY_CI; 2242 2243 r = cik_set_ip_blocks(adev); 2244 if (r) 2245 return r; 2246 break; 2247 #endif 2248 case CHIP_TOPAZ: 2249 case CHIP_TONGA: 2250 case CHIP_FIJI: 2251 case CHIP_POLARIS10: 2252 case CHIP_POLARIS11: 2253 case CHIP_POLARIS12: 2254 case CHIP_VEGAM: 2255 case CHIP_CARRIZO: 2256 case CHIP_STONEY: 2257 if (adev->flags & AMD_IS_APU) 2258 adev->family = AMDGPU_FAMILY_CZ; 2259 else 2260 adev->family = AMDGPU_FAMILY_VI; 2261 2262 r = vi_set_ip_blocks(adev); 2263 if (r) 2264 return r; 2265 break; 2266 default: 2267 r = amdgpu_discovery_set_ip_blocks(adev); 2268 if (r) 2269 return r; 2270 break; 2271 } 2272 2273 if (amdgpu_has_atpx() && 2274 (amdgpu_is_atpx_hybrid() || 2275 amdgpu_has_atpx_dgpu_power_cntl()) && 2276 ((adev->flags & AMD_IS_APU) == 0) && 2277 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) 2278 adev->flags |= AMD_IS_PX; 2279 2280 if (!(adev->flags & AMD_IS_APU)) { 2281 parent = pci_upstream_bridge(adev->pdev); 2282 adev->has_pr3 = parent ? pci_pr3_present(parent) : false; 2283 } 2284 2285 2286 adev->pm.pp_feature = amdgpu_pp_feature_mask; 2287 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) 2288 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 2289 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) 2290 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK; 2291 2292 total = true; 2293 for (i = 0; i < adev->num_ip_blocks; i++) { 2294 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 2295 DRM_ERROR("disabled ip block: %d <%s>\n", 2296 i, adev->ip_blocks[i].version->funcs->name); 2297 adev->ip_blocks[i].status.valid = false; 2298 } else { 2299 if (adev->ip_blocks[i].version->funcs->early_init) { 2300 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 2301 if (r == -ENOENT) { 2302 adev->ip_blocks[i].status.valid = false; 2303 } else if (r) { 2304 DRM_ERROR("early_init of IP block <%s> failed %d\n", 2305 adev->ip_blocks[i].version->funcs->name, r); 2306 total = false; 2307 } else { 2308 adev->ip_blocks[i].status.valid = true; 2309 } 2310 } else { 2311 adev->ip_blocks[i].status.valid = true; 2312 } 2313 } 2314 /* get the vbios after the asic_funcs are set up */ 2315 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { 2316 r = amdgpu_device_parse_gpu_info_fw(adev); 2317 if (r) 2318 return r; 2319 2320 /* Read BIOS */ 2321 if (!amdgpu_get_bios(adev)) 2322 return -EINVAL; 2323 2324 r = amdgpu_atombios_init(adev); 2325 if (r) { 2326 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 2327 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 2328 return r; 2329 } 2330 2331 /*get pf2vf msg info at it's earliest time*/ 2332 if (amdgpu_sriov_vf(adev)) 2333 amdgpu_virt_init_data_exchange(adev); 2334 2335 } 2336 } 2337 if (!total) 2338 return -ENODEV; 2339 2340 amdgpu_amdkfd_device_probe(adev); 2341 adev->cg_flags &= amdgpu_cg_mask; 2342 adev->pg_flags &= amdgpu_pg_mask; 2343 2344 return 0; 2345 } 2346 2347 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) 2348 { 2349 int i, r; 2350 2351 for (i = 0; i < adev->num_ip_blocks; i++) { 2352 if (!adev->ip_blocks[i].status.sw) 2353 continue; 2354 if (adev->ip_blocks[i].status.hw) 2355 continue; 2356 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2357 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || 2358 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2359 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 2360 if (r) { 2361 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 2362 adev->ip_blocks[i].version->funcs->name, r); 2363 return r; 2364 } 2365 adev->ip_blocks[i].status.hw = true; 2366 } 2367 } 2368 2369 return 0; 2370 } 2371 2372 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) 2373 { 2374 int i, r; 2375 2376 for (i = 0; i < adev->num_ip_blocks; i++) { 2377 if (!adev->ip_blocks[i].status.sw) 2378 continue; 2379 if (adev->ip_blocks[i].status.hw) 2380 continue; 2381 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 2382 if (r) { 2383 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 2384 adev->ip_blocks[i].version->funcs->name, r); 2385 return r; 2386 } 2387 adev->ip_blocks[i].status.hw = true; 2388 } 2389 2390 return 0; 2391 } 2392 2393 static int amdgpu_device_fw_loading(struct amdgpu_device *adev) 2394 { 2395 int r = 0; 2396 int i; 2397 uint32_t smu_version; 2398 2399 if (adev->asic_type >= CHIP_VEGA10) { 2400 for (i = 0; i < adev->num_ip_blocks; i++) { 2401 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) 2402 continue; 2403 2404 if (!adev->ip_blocks[i].status.sw) 2405 continue; 2406 2407 /* no need to do the fw loading again if already done*/ 2408 if (adev->ip_blocks[i].status.hw == true) 2409 break; 2410 2411 if (amdgpu_in_reset(adev) || adev->in_suspend) { 2412 r = adev->ip_blocks[i].version->funcs->resume(adev); 2413 if (r) { 2414 DRM_ERROR("resume of IP block <%s> failed %d\n", 2415 adev->ip_blocks[i].version->funcs->name, r); 2416 return r; 2417 } 2418 } else { 2419 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 2420 if (r) { 2421 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 2422 adev->ip_blocks[i].version->funcs->name, r); 2423 return r; 2424 } 2425 } 2426 2427 adev->ip_blocks[i].status.hw = true; 2428 break; 2429 } 2430 } 2431 2432 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA) 2433 r = amdgpu_pm_load_smu_firmware(adev, &smu_version); 2434 2435 return r; 2436 } 2437 2438 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) 2439 { 2440 long timeout; 2441 int r, i; 2442 2443 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 2444 struct amdgpu_ring *ring = adev->rings[i]; 2445 2446 /* No need to setup the GPU scheduler for rings that don't need it */ 2447 if (!ring || ring->no_scheduler) 2448 continue; 2449 2450 switch (ring->funcs->type) { 2451 case AMDGPU_RING_TYPE_GFX: 2452 timeout = adev->gfx_timeout; 2453 break; 2454 case AMDGPU_RING_TYPE_COMPUTE: 2455 timeout = adev->compute_timeout; 2456 break; 2457 case AMDGPU_RING_TYPE_SDMA: 2458 timeout = adev->sdma_timeout; 2459 break; 2460 default: 2461 timeout = adev->video_timeout; 2462 break; 2463 } 2464 2465 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, 2466 ring->num_hw_submission, 0, 2467 timeout, adev->reset_domain->wq, 2468 ring->sched_score, ring->name, 2469 adev->dev); 2470 if (r) { 2471 DRM_ERROR("Failed to create scheduler on ring %s.\n", 2472 ring->name); 2473 return r; 2474 } 2475 } 2476 2477 return 0; 2478 } 2479 2480 2481 /** 2482 * amdgpu_device_ip_init - run init for hardware IPs 2483 * 2484 * @adev: amdgpu_device pointer 2485 * 2486 * Main initialization pass for hardware IPs. The list of all the hardware 2487 * IPs that make up the asic is walked and the sw_init and hw_init callbacks 2488 * are run. sw_init initializes the software state associated with each IP 2489 * and hw_init initializes the hardware associated with each IP. 2490 * Returns 0 on success, negative error code on failure. 2491 */ 2492 static int amdgpu_device_ip_init(struct amdgpu_device *adev) 2493 { 2494 int i, r; 2495 2496 r = amdgpu_ras_init(adev); 2497 if (r) 2498 return r; 2499 2500 for (i = 0; i < adev->num_ip_blocks; i++) { 2501 if (!adev->ip_blocks[i].status.valid) 2502 continue; 2503 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 2504 if (r) { 2505 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 2506 adev->ip_blocks[i].version->funcs->name, r); 2507 goto init_failed; 2508 } 2509 adev->ip_blocks[i].status.sw = true; 2510 2511 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { 2512 /* need to do common hw init early so everything is set up for gmc */ 2513 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 2514 if (r) { 2515 DRM_ERROR("hw_init %d failed %d\n", i, r); 2516 goto init_failed; 2517 } 2518 adev->ip_blocks[i].status.hw = true; 2519 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2520 /* need to do gmc hw init early so we can allocate gpu mem */ 2521 /* Try to reserve bad pages early */ 2522 if (amdgpu_sriov_vf(adev)) 2523 amdgpu_virt_exchange_data(adev); 2524 2525 r = amdgpu_device_mem_scratch_init(adev); 2526 if (r) { 2527 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r); 2528 goto init_failed; 2529 } 2530 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 2531 if (r) { 2532 DRM_ERROR("hw_init %d failed %d\n", i, r); 2533 goto init_failed; 2534 } 2535 r = amdgpu_device_wb_init(adev); 2536 if (r) { 2537 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 2538 goto init_failed; 2539 } 2540 adev->ip_blocks[i].status.hw = true; 2541 2542 /* right after GMC hw init, we create CSA */ 2543 if (amdgpu_mcbp) { 2544 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, 2545 AMDGPU_GEM_DOMAIN_VRAM | 2546 AMDGPU_GEM_DOMAIN_GTT, 2547 AMDGPU_CSA_SIZE); 2548 if (r) { 2549 DRM_ERROR("allocate CSA failed %d\n", r); 2550 goto init_failed; 2551 } 2552 } 2553 } 2554 } 2555 2556 if (amdgpu_sriov_vf(adev)) 2557 amdgpu_virt_init_data_exchange(adev); 2558 2559 r = amdgpu_ib_pool_init(adev); 2560 if (r) { 2561 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2562 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2563 goto init_failed; 2564 } 2565 2566 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 2567 if (r) 2568 goto init_failed; 2569 2570 r = amdgpu_device_ip_hw_init_phase1(adev); 2571 if (r) 2572 goto init_failed; 2573 2574 r = amdgpu_device_fw_loading(adev); 2575 if (r) 2576 goto init_failed; 2577 2578 r = amdgpu_device_ip_hw_init_phase2(adev); 2579 if (r) 2580 goto init_failed; 2581 2582 /* 2583 * retired pages will be loaded from eeprom and reserved here, 2584 * it should be called after amdgpu_device_ip_hw_init_phase2 since 2585 * for some ASICs the RAS EEPROM code relies on SMU fully functioning 2586 * for I2C communication which only true at this point. 2587 * 2588 * amdgpu_ras_recovery_init may fail, but the upper only cares the 2589 * failure from bad gpu situation and stop amdgpu init process 2590 * accordingly. For other failed cases, it will still release all 2591 * the resource and print error message, rather than returning one 2592 * negative value to upper level. 2593 * 2594 * Note: theoretically, this should be called before all vram allocations 2595 * to protect retired page from abusing 2596 */ 2597 r = amdgpu_ras_recovery_init(adev); 2598 if (r) 2599 goto init_failed; 2600 2601 /** 2602 * In case of XGMI grab extra reference for reset domain for this device 2603 */ 2604 if (adev->gmc.xgmi.num_physical_nodes > 1) { 2605 if (amdgpu_xgmi_add_device(adev) == 0) { 2606 if (!amdgpu_sriov_vf(adev)) { 2607 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); 2608 2609 if (WARN_ON(!hive)) { 2610 r = -ENOENT; 2611 goto init_failed; 2612 } 2613 2614 if (!hive->reset_domain || 2615 !amdgpu_reset_get_reset_domain(hive->reset_domain)) { 2616 r = -ENOENT; 2617 amdgpu_put_xgmi_hive(hive); 2618 goto init_failed; 2619 } 2620 2621 /* Drop the early temporary reset domain we created for device */ 2622 amdgpu_reset_put_reset_domain(adev->reset_domain); 2623 adev->reset_domain = hive->reset_domain; 2624 amdgpu_put_xgmi_hive(hive); 2625 } 2626 } 2627 } 2628 2629 r = amdgpu_device_init_schedulers(adev); 2630 if (r) 2631 goto init_failed; 2632 2633 /* Don't init kfd if whole hive need to be reset during init */ 2634 if (!adev->gmc.xgmi.pending_reset) 2635 amdgpu_amdkfd_device_init(adev); 2636 2637 amdgpu_fru_get_product_info(adev); 2638 2639 init_failed: 2640 2641 return r; 2642 } 2643 2644 /** 2645 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 2646 * 2647 * @adev: amdgpu_device pointer 2648 * 2649 * Writes a reset magic value to the gart pointer in VRAM. The driver calls 2650 * this function before a GPU reset. If the value is retained after a 2651 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 2652 */ 2653 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 2654 { 2655 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 2656 } 2657 2658 /** 2659 * amdgpu_device_check_vram_lost - check if vram is valid 2660 * 2661 * @adev: amdgpu_device pointer 2662 * 2663 * Checks the reset magic value written to the gart pointer in VRAM. 2664 * The driver calls this after a GPU reset to see if the contents of 2665 * VRAM is lost or now. 2666 * returns true if vram is lost, false if not. 2667 */ 2668 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 2669 { 2670 if (memcmp(adev->gart.ptr, adev->reset_magic, 2671 AMDGPU_RESET_MAGIC_NUM)) 2672 return true; 2673 2674 if (!amdgpu_in_reset(adev)) 2675 return false; 2676 2677 /* 2678 * For all ASICs with baco/mode1 reset, the VRAM is 2679 * always assumed to be lost. 2680 */ 2681 switch (amdgpu_asic_reset_method(adev)) { 2682 case AMD_RESET_METHOD_BACO: 2683 case AMD_RESET_METHOD_MODE1: 2684 return true; 2685 default: 2686 return false; 2687 } 2688 } 2689 2690 /** 2691 * amdgpu_device_set_cg_state - set clockgating for amdgpu device 2692 * 2693 * @adev: amdgpu_device pointer 2694 * @state: clockgating state (gate or ungate) 2695 * 2696 * The list of all the hardware IPs that make up the asic is walked and the 2697 * set_clockgating_state callbacks are run. 2698 * Late initialization pass enabling clockgating for hardware IPs. 2699 * Fini or suspend, pass disabling clockgating for hardware IPs. 2700 * Returns 0 on success, negative error code on failure. 2701 */ 2702 2703 int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 2704 enum amd_clockgating_state state) 2705 { 2706 int i, j, r; 2707 2708 if (amdgpu_emu_mode == 1) 2709 return 0; 2710 2711 for (j = 0; j < adev->num_ip_blocks; j++) { 2712 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2713 if (!adev->ip_blocks[i].status.late_initialized) 2714 continue; 2715 /* skip CG for GFX, SDMA on S0ix */ 2716 if (adev->in_s0ix && 2717 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || 2718 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) 2719 continue; 2720 /* skip CG for VCE/UVD, it's handled specially */ 2721 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2722 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2723 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2724 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2725 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 2726 /* enable clockgating to save power */ 2727 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 2728 state); 2729 if (r) { 2730 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 2731 adev->ip_blocks[i].version->funcs->name, r); 2732 return r; 2733 } 2734 } 2735 } 2736 2737 return 0; 2738 } 2739 2740 int amdgpu_device_set_pg_state(struct amdgpu_device *adev, 2741 enum amd_powergating_state state) 2742 { 2743 int i, j, r; 2744 2745 if (amdgpu_emu_mode == 1) 2746 return 0; 2747 2748 for (j = 0; j < adev->num_ip_blocks; j++) { 2749 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2750 if (!adev->ip_blocks[i].status.late_initialized) 2751 continue; 2752 /* skip PG for GFX, SDMA on S0ix */ 2753 if (adev->in_s0ix && 2754 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || 2755 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) 2756 continue; 2757 /* skip CG for VCE/UVD, it's handled specially */ 2758 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2759 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2760 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2761 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2762 adev->ip_blocks[i].version->funcs->set_powergating_state) { 2763 /* enable powergating to save power */ 2764 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 2765 state); 2766 if (r) { 2767 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 2768 adev->ip_blocks[i].version->funcs->name, r); 2769 return r; 2770 } 2771 } 2772 } 2773 return 0; 2774 } 2775 2776 static int amdgpu_device_enable_mgpu_fan_boost(void) 2777 { 2778 struct amdgpu_gpu_instance *gpu_ins; 2779 struct amdgpu_device *adev; 2780 int i, ret = 0; 2781 2782 mutex_lock(&mgpu_info.mutex); 2783 2784 /* 2785 * MGPU fan boost feature should be enabled 2786 * only when there are two or more dGPUs in 2787 * the system 2788 */ 2789 if (mgpu_info.num_dgpu < 2) 2790 goto out; 2791 2792 for (i = 0; i < mgpu_info.num_dgpu; i++) { 2793 gpu_ins = &(mgpu_info.gpu_ins[i]); 2794 adev = gpu_ins->adev; 2795 if (!(adev->flags & AMD_IS_APU) && 2796 !gpu_ins->mgpu_fan_enabled) { 2797 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); 2798 if (ret) 2799 break; 2800 2801 gpu_ins->mgpu_fan_enabled = 1; 2802 } 2803 } 2804 2805 out: 2806 mutex_unlock(&mgpu_info.mutex); 2807 2808 return ret; 2809 } 2810 2811 /** 2812 * amdgpu_device_ip_late_init - run late init for hardware IPs 2813 * 2814 * @adev: amdgpu_device pointer 2815 * 2816 * Late initialization pass for hardware IPs. The list of all the hardware 2817 * IPs that make up the asic is walked and the late_init callbacks are run. 2818 * late_init covers any special initialization that an IP requires 2819 * after all of the have been initialized or something that needs to happen 2820 * late in the init process. 2821 * Returns 0 on success, negative error code on failure. 2822 */ 2823 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 2824 { 2825 struct amdgpu_gpu_instance *gpu_instance; 2826 int i = 0, r; 2827 2828 for (i = 0; i < adev->num_ip_blocks; i++) { 2829 if (!adev->ip_blocks[i].status.hw) 2830 continue; 2831 if (adev->ip_blocks[i].version->funcs->late_init) { 2832 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 2833 if (r) { 2834 DRM_ERROR("late_init of IP block <%s> failed %d\n", 2835 adev->ip_blocks[i].version->funcs->name, r); 2836 return r; 2837 } 2838 } 2839 adev->ip_blocks[i].status.late_initialized = true; 2840 } 2841 2842 r = amdgpu_ras_late_init(adev); 2843 if (r) { 2844 DRM_ERROR("amdgpu_ras_late_init failed %d", r); 2845 return r; 2846 } 2847 2848 amdgpu_ras_set_error_query_ready(adev, true); 2849 2850 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 2851 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 2852 2853 amdgpu_device_fill_reset_magic(adev); 2854 2855 r = amdgpu_device_enable_mgpu_fan_boost(); 2856 if (r) 2857 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 2858 2859 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */ 2860 if (amdgpu_passthrough(adev) && 2861 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) || 2862 adev->asic_type == CHIP_ALDEBARAN)) 2863 amdgpu_dpm_handle_passthrough_sbr(adev, true); 2864 2865 if (adev->gmc.xgmi.num_physical_nodes > 1) { 2866 mutex_lock(&mgpu_info.mutex); 2867 2868 /* 2869 * Reset device p-state to low as this was booted with high. 2870 * 2871 * This should be performed only after all devices from the same 2872 * hive get initialized. 2873 * 2874 * However, it's unknown how many device in the hive in advance. 2875 * As this is counted one by one during devices initializations. 2876 * 2877 * So, we wait for all XGMI interlinked devices initialized. 2878 * This may bring some delays as those devices may come from 2879 * different hives. But that should be OK. 2880 */ 2881 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { 2882 for (i = 0; i < mgpu_info.num_gpu; i++) { 2883 gpu_instance = &(mgpu_info.gpu_ins[i]); 2884 if (gpu_instance->adev->flags & AMD_IS_APU) 2885 continue; 2886 2887 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 2888 AMDGPU_XGMI_PSTATE_MIN); 2889 if (r) { 2890 DRM_ERROR("pstate setting failed (%d).\n", r); 2891 break; 2892 } 2893 } 2894 } 2895 2896 mutex_unlock(&mgpu_info.mutex); 2897 } 2898 2899 return 0; 2900 } 2901 2902 /** 2903 * amdgpu_device_smu_fini_early - smu hw_fini wrapper 2904 * 2905 * @adev: amdgpu_device pointer 2906 * 2907 * For ASICs need to disable SMC first 2908 */ 2909 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev) 2910 { 2911 int i, r; 2912 2913 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)) 2914 return; 2915 2916 for (i = 0; i < adev->num_ip_blocks; i++) { 2917 if (!adev->ip_blocks[i].status.hw) 2918 continue; 2919 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 2920 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2921 /* XXX handle errors */ 2922 if (r) { 2923 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2924 adev->ip_blocks[i].version->funcs->name, r); 2925 } 2926 adev->ip_blocks[i].status.hw = false; 2927 break; 2928 } 2929 } 2930 } 2931 2932 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) 2933 { 2934 int i, r; 2935 2936 for (i = 0; i < adev->num_ip_blocks; i++) { 2937 if (!adev->ip_blocks[i].version->funcs->early_fini) 2938 continue; 2939 2940 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev); 2941 if (r) { 2942 DRM_DEBUG("early_fini of IP block <%s> failed %d\n", 2943 adev->ip_blocks[i].version->funcs->name, r); 2944 } 2945 } 2946 2947 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2948 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2949 2950 amdgpu_amdkfd_suspend(adev, false); 2951 2952 /* Workaroud for ASICs need to disable SMC first */ 2953 amdgpu_device_smu_fini_early(adev); 2954 2955 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2956 if (!adev->ip_blocks[i].status.hw) 2957 continue; 2958 2959 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2960 /* XXX handle errors */ 2961 if (r) { 2962 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2963 adev->ip_blocks[i].version->funcs->name, r); 2964 } 2965 2966 adev->ip_blocks[i].status.hw = false; 2967 } 2968 2969 if (amdgpu_sriov_vf(adev)) { 2970 if (amdgpu_virt_release_full_gpu(adev, false)) 2971 DRM_ERROR("failed to release exclusive mode on fini\n"); 2972 } 2973 2974 return 0; 2975 } 2976 2977 /** 2978 * amdgpu_device_ip_fini - run fini for hardware IPs 2979 * 2980 * @adev: amdgpu_device pointer 2981 * 2982 * Main teardown pass for hardware IPs. The list of all the hardware 2983 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 2984 * are run. hw_fini tears down the hardware associated with each IP 2985 * and sw_fini tears down any software state associated with each IP. 2986 * Returns 0 on success, negative error code on failure. 2987 */ 2988 static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 2989 { 2990 int i, r; 2991 2992 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) 2993 amdgpu_virt_release_ras_err_handler_data(adev); 2994 2995 if (adev->gmc.xgmi.num_physical_nodes > 1) 2996 amdgpu_xgmi_remove_device(adev); 2997 2998 amdgpu_amdkfd_device_fini_sw(adev); 2999 3000 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 3001 if (!adev->ip_blocks[i].status.sw) 3002 continue; 3003 3004 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 3005 amdgpu_ucode_free_bo(adev); 3006 amdgpu_free_static_csa(&adev->virt.csa_obj); 3007 amdgpu_device_wb_fini(adev); 3008 amdgpu_device_mem_scratch_fini(adev); 3009 amdgpu_ib_pool_fini(adev); 3010 } 3011 3012 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 3013 /* XXX handle errors */ 3014 if (r) { 3015 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 3016 adev->ip_blocks[i].version->funcs->name, r); 3017 } 3018 adev->ip_blocks[i].status.sw = false; 3019 adev->ip_blocks[i].status.valid = false; 3020 } 3021 3022 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 3023 if (!adev->ip_blocks[i].status.late_initialized) 3024 continue; 3025 if (adev->ip_blocks[i].version->funcs->late_fini) 3026 adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 3027 adev->ip_blocks[i].status.late_initialized = false; 3028 } 3029 3030 amdgpu_ras_fini(adev); 3031 3032 return 0; 3033 } 3034 3035 /** 3036 * amdgpu_device_delayed_init_work_handler - work handler for IB tests 3037 * 3038 * @work: work_struct. 3039 */ 3040 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) 3041 { 3042 struct amdgpu_device *adev = 3043 container_of(work, struct amdgpu_device, delayed_init_work.work); 3044 int r; 3045 3046 r = amdgpu_ib_ring_tests(adev); 3047 if (r) 3048 DRM_ERROR("ib ring test failed (%d).\n", r); 3049 } 3050 3051 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 3052 { 3053 struct amdgpu_device *adev = 3054 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 3055 3056 WARN_ON_ONCE(adev->gfx.gfx_off_state); 3057 WARN_ON_ONCE(adev->gfx.gfx_off_req_count); 3058 3059 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 3060 adev->gfx.gfx_off_state = true; 3061 } 3062 3063 /** 3064 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 3065 * 3066 * @adev: amdgpu_device pointer 3067 * 3068 * Main suspend function for hardware IPs. The list of all the hardware 3069 * IPs that make up the asic is walked, clockgating is disabled and the 3070 * suspend callbacks are run. suspend puts the hardware and software state 3071 * in each IP into a state suitable for suspend. 3072 * Returns 0 on success, negative error code on failure. 3073 */ 3074 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 3075 { 3076 int i, r; 3077 3078 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 3079 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 3080 3081 /* 3082 * Per PMFW team's suggestion, driver needs to handle gfxoff 3083 * and df cstate features disablement for gpu reset(e.g. Mode1Reset) 3084 * scenario. Add the missing df cstate disablement here. 3085 */ 3086 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) 3087 dev_warn(adev->dev, "Failed to disallow df cstate"); 3088 3089 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 3090 if (!adev->ip_blocks[i].status.valid) 3091 continue; 3092 3093 /* displays are handled separately */ 3094 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE) 3095 continue; 3096 3097 /* XXX handle errors */ 3098 r = adev->ip_blocks[i].version->funcs->suspend(adev); 3099 /* XXX handle errors */ 3100 if (r) { 3101 DRM_ERROR("suspend of IP block <%s> failed %d\n", 3102 adev->ip_blocks[i].version->funcs->name, r); 3103 return r; 3104 } 3105 3106 adev->ip_blocks[i].status.hw = false; 3107 } 3108 3109 return 0; 3110 } 3111 3112 /** 3113 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 3114 * 3115 * @adev: amdgpu_device pointer 3116 * 3117 * Main suspend function for hardware IPs. The list of all the hardware 3118 * IPs that make up the asic is walked, clockgating is disabled and the 3119 * suspend callbacks are run. suspend puts the hardware and software state 3120 * in each IP into a state suitable for suspend. 3121 * Returns 0 on success, negative error code on failure. 3122 */ 3123 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 3124 { 3125 int i, r; 3126 3127 if (adev->in_s0ix) 3128 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry); 3129 3130 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 3131 if (!adev->ip_blocks[i].status.valid) 3132 continue; 3133 /* displays are handled in phase1 */ 3134 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 3135 continue; 3136 /* PSP lost connection when err_event_athub occurs */ 3137 if (amdgpu_ras_intr_triggered() && 3138 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 3139 adev->ip_blocks[i].status.hw = false; 3140 continue; 3141 } 3142 3143 /* skip unnecessary suspend if we do not initialize them yet */ 3144 if (adev->gmc.xgmi.pending_reset && 3145 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 3146 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC || 3147 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 3148 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) { 3149 adev->ip_blocks[i].status.hw = false; 3150 continue; 3151 } 3152 3153 /* skip suspend of gfx/mes and psp for S0ix 3154 * gfx is in gfxoff state, so on resume it will exit gfxoff just 3155 * like at runtime. PSP is also part of the always on hardware 3156 * so no need to suspend it. 3157 */ 3158 if (adev->in_s0ix && 3159 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP || 3160 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || 3161 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES)) 3162 continue; 3163 3164 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */ 3165 if (adev->in_s0ix && 3166 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) && 3167 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) 3168 continue; 3169 3170 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot. 3171 * These are in TMR, hence are expected to be reused by PSP-TOS to reload 3172 * from this location and RLC Autoload automatically also gets loaded 3173 * from here based on PMFW -> PSP message during re-init sequence. 3174 * Therefore, the psp suspend & resume should be skipped to avoid destroy 3175 * the TMR and reload FWs again for IMU enabled APU ASICs. 3176 */ 3177 if (amdgpu_in_reset(adev) && 3178 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs && 3179 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 3180 continue; 3181 3182 /* XXX handle errors */ 3183 r = adev->ip_blocks[i].version->funcs->suspend(adev); 3184 /* XXX handle errors */ 3185 if (r) { 3186 DRM_ERROR("suspend of IP block <%s> failed %d\n", 3187 adev->ip_blocks[i].version->funcs->name, r); 3188 } 3189 adev->ip_blocks[i].status.hw = false; 3190 /* handle putting the SMC in the appropriate state */ 3191 if (!amdgpu_sriov_vf(adev)) { 3192 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 3193 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); 3194 if (r) { 3195 DRM_ERROR("SMC failed to set mp1 state %d, %d\n", 3196 adev->mp1_state, r); 3197 return r; 3198 } 3199 } 3200 } 3201 } 3202 3203 return 0; 3204 } 3205 3206 /** 3207 * amdgpu_device_ip_suspend - run suspend for hardware IPs 3208 * 3209 * @adev: amdgpu_device pointer 3210 * 3211 * Main suspend function for hardware IPs. The list of all the hardware 3212 * IPs that make up the asic is walked, clockgating is disabled and the 3213 * suspend callbacks are run. suspend puts the hardware and software state 3214 * in each IP into a state suitable for suspend. 3215 * Returns 0 on success, negative error code on failure. 3216 */ 3217 int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 3218 { 3219 int r; 3220 3221 if (amdgpu_sriov_vf(adev)) { 3222 amdgpu_virt_fini_data_exchange(adev); 3223 amdgpu_virt_request_full_gpu(adev, false); 3224 } 3225 3226 r = amdgpu_device_ip_suspend_phase1(adev); 3227 if (r) 3228 return r; 3229 r = amdgpu_device_ip_suspend_phase2(adev); 3230 3231 if (amdgpu_sriov_vf(adev)) 3232 amdgpu_virt_release_full_gpu(adev, false); 3233 3234 return r; 3235 } 3236 3237 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 3238 { 3239 int i, r; 3240 3241 static enum amd_ip_block_type ip_order[] = { 3242 AMD_IP_BLOCK_TYPE_COMMON, 3243 AMD_IP_BLOCK_TYPE_GMC, 3244 AMD_IP_BLOCK_TYPE_PSP, 3245 AMD_IP_BLOCK_TYPE_IH, 3246 }; 3247 3248 for (i = 0; i < adev->num_ip_blocks; i++) { 3249 int j; 3250 struct amdgpu_ip_block *block; 3251 3252 block = &adev->ip_blocks[i]; 3253 block->status.hw = false; 3254 3255 for (j = 0; j < ARRAY_SIZE(ip_order); j++) { 3256 3257 if (block->version->type != ip_order[j] || 3258 !block->status.valid) 3259 continue; 3260 3261 r = block->version->funcs->hw_init(adev); 3262 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 3263 if (r) 3264 return r; 3265 block->status.hw = true; 3266 } 3267 } 3268 3269 return 0; 3270 } 3271 3272 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 3273 { 3274 int i, r; 3275 3276 static enum amd_ip_block_type ip_order[] = { 3277 AMD_IP_BLOCK_TYPE_SMC, 3278 AMD_IP_BLOCK_TYPE_DCE, 3279 AMD_IP_BLOCK_TYPE_GFX, 3280 AMD_IP_BLOCK_TYPE_SDMA, 3281 AMD_IP_BLOCK_TYPE_MES, 3282 AMD_IP_BLOCK_TYPE_UVD, 3283 AMD_IP_BLOCK_TYPE_VCE, 3284 AMD_IP_BLOCK_TYPE_VCN, 3285 AMD_IP_BLOCK_TYPE_JPEG 3286 }; 3287 3288 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 3289 int j; 3290 struct amdgpu_ip_block *block; 3291 3292 for (j = 0; j < adev->num_ip_blocks; j++) { 3293 block = &adev->ip_blocks[j]; 3294 3295 if (block->version->type != ip_order[i] || 3296 !block->status.valid || 3297 block->status.hw) 3298 continue; 3299 3300 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) 3301 r = block->version->funcs->resume(adev); 3302 else 3303 r = block->version->funcs->hw_init(adev); 3304 3305 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 3306 if (r) 3307 return r; 3308 block->status.hw = true; 3309 } 3310 } 3311 3312 return 0; 3313 } 3314 3315 /** 3316 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 3317 * 3318 * @adev: amdgpu_device pointer 3319 * 3320 * First resume function for hardware IPs. The list of all the hardware 3321 * IPs that make up the asic is walked and the resume callbacks are run for 3322 * COMMON, GMC, and IH. resume puts the hardware into a functional state 3323 * after a suspend and updates the software state as necessary. This 3324 * function is also used for restoring the GPU after a GPU reset. 3325 * Returns 0 on success, negative error code on failure. 3326 */ 3327 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 3328 { 3329 int i, r; 3330 3331 for (i = 0; i < adev->num_ip_blocks; i++) { 3332 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 3333 continue; 3334 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 3335 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 3336 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 3337 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) { 3338 3339 r = adev->ip_blocks[i].version->funcs->resume(adev); 3340 if (r) { 3341 DRM_ERROR("resume of IP block <%s> failed %d\n", 3342 adev->ip_blocks[i].version->funcs->name, r); 3343 return r; 3344 } 3345 adev->ip_blocks[i].status.hw = true; 3346 } 3347 } 3348 3349 return 0; 3350 } 3351 3352 /** 3353 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 3354 * 3355 * @adev: amdgpu_device pointer 3356 * 3357 * First resume function for hardware IPs. The list of all the hardware 3358 * IPs that make up the asic is walked and the resume callbacks are run for 3359 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 3360 * functional state after a suspend and updates the software state as 3361 * necessary. This function is also used for restoring the GPU after a GPU 3362 * reset. 3363 * Returns 0 on success, negative error code on failure. 3364 */ 3365 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 3366 { 3367 int i, r; 3368 3369 for (i = 0; i < adev->num_ip_blocks; i++) { 3370 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 3371 continue; 3372 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 3373 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 3374 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 3375 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 3376 continue; 3377 r = adev->ip_blocks[i].version->funcs->resume(adev); 3378 if (r) { 3379 DRM_ERROR("resume of IP block <%s> failed %d\n", 3380 adev->ip_blocks[i].version->funcs->name, r); 3381 return r; 3382 } 3383 adev->ip_blocks[i].status.hw = true; 3384 } 3385 3386 return 0; 3387 } 3388 3389 /** 3390 * amdgpu_device_ip_resume - run resume for hardware IPs 3391 * 3392 * @adev: amdgpu_device pointer 3393 * 3394 * Main resume function for hardware IPs. The hardware IPs 3395 * are split into two resume functions because they are 3396 * are also used in in recovering from a GPU reset and some additional 3397 * steps need to be take between them. In this case (S3/S4) they are 3398 * run sequentially. 3399 * Returns 0 on success, negative error code on failure. 3400 */ 3401 static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 3402 { 3403 int r; 3404 3405 if (!adev->in_s0ix) { 3406 r = amdgpu_amdkfd_resume_iommu(adev); 3407 if (r) 3408 return r; 3409 } 3410 3411 r = amdgpu_device_ip_resume_phase1(adev); 3412 if (r) 3413 return r; 3414 3415 r = amdgpu_device_fw_loading(adev); 3416 if (r) 3417 return r; 3418 3419 r = amdgpu_device_ip_resume_phase2(adev); 3420 3421 return r; 3422 } 3423 3424 /** 3425 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 3426 * 3427 * @adev: amdgpu_device pointer 3428 * 3429 * Query the VBIOS data tables to determine if the board supports SR-IOV. 3430 */ 3431 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 3432 { 3433 if (amdgpu_sriov_vf(adev)) { 3434 if (adev->is_atom_fw) { 3435 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev)) 3436 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 3437 } else { 3438 if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 3439 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 3440 } 3441 3442 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 3443 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 3444 } 3445 } 3446 3447 /** 3448 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 3449 * 3450 * @asic_type: AMD asic type 3451 * 3452 * Check if there is DC (new modesetting infrastructre) support for an asic. 3453 * returns true if DC has support, false if not. 3454 */ 3455 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 3456 { 3457 switch (asic_type) { 3458 #ifdef CONFIG_DRM_AMDGPU_SI 3459 case CHIP_HAINAN: 3460 #endif 3461 case CHIP_TOPAZ: 3462 /* chips with no display hardware */ 3463 return false; 3464 #if defined(CONFIG_DRM_AMD_DC) 3465 case CHIP_TAHITI: 3466 case CHIP_PITCAIRN: 3467 case CHIP_VERDE: 3468 case CHIP_OLAND: 3469 /* 3470 * We have systems in the wild with these ASICs that require 3471 * LVDS and VGA support which is not supported with DC. 3472 * 3473 * Fallback to the non-DC driver here by default so as not to 3474 * cause regressions. 3475 */ 3476 #if defined(CONFIG_DRM_AMD_DC_SI) 3477 return amdgpu_dc > 0; 3478 #else 3479 return false; 3480 #endif 3481 case CHIP_BONAIRE: 3482 case CHIP_KAVERI: 3483 case CHIP_KABINI: 3484 case CHIP_MULLINS: 3485 /* 3486 * We have systems in the wild with these ASICs that require 3487 * VGA support which is not supported with DC. 3488 * 3489 * Fallback to the non-DC driver here by default so as not to 3490 * cause regressions. 3491 */ 3492 return amdgpu_dc > 0; 3493 default: 3494 return amdgpu_dc != 0; 3495 #else 3496 default: 3497 if (amdgpu_dc > 0) 3498 DRM_INFO_ONCE("Display Core has been requested via kernel parameter " 3499 "but isn't supported by ASIC, ignoring\n"); 3500 return false; 3501 #endif 3502 } 3503 } 3504 3505 /** 3506 * amdgpu_device_has_dc_support - check if dc is supported 3507 * 3508 * @adev: amdgpu_device pointer 3509 * 3510 * Returns true for supported, false for not supported 3511 */ 3512 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 3513 { 3514 if (adev->enable_virtual_display || 3515 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) 3516 return false; 3517 3518 return amdgpu_device_asic_has_dc_support(adev->asic_type); 3519 } 3520 3521 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) 3522 { 3523 struct amdgpu_device *adev = 3524 container_of(__work, struct amdgpu_device, xgmi_reset_work); 3525 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); 3526 3527 /* It's a bug to not have a hive within this function */ 3528 if (WARN_ON(!hive)) 3529 return; 3530 3531 /* 3532 * Use task barrier to synchronize all xgmi reset works across the 3533 * hive. task_barrier_enter and task_barrier_exit will block 3534 * until all the threads running the xgmi reset works reach 3535 * those points. task_barrier_full will do both blocks. 3536 */ 3537 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { 3538 3539 task_barrier_enter(&hive->tb); 3540 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev)); 3541 3542 if (adev->asic_reset_res) 3543 goto fail; 3544 3545 task_barrier_exit(&hive->tb); 3546 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev)); 3547 3548 if (adev->asic_reset_res) 3549 goto fail; 3550 3551 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && 3552 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) 3553 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); 3554 } else { 3555 3556 task_barrier_full(&hive->tb); 3557 adev->asic_reset_res = amdgpu_asic_reset(adev); 3558 } 3559 3560 fail: 3561 if (adev->asic_reset_res) 3562 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", 3563 adev->asic_reset_res, adev_to_drm(adev)->unique); 3564 amdgpu_put_xgmi_hive(hive); 3565 } 3566 3567 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) 3568 { 3569 char *input = amdgpu_lockup_timeout; 3570 char *timeout_setting = NULL; 3571 int index = 0; 3572 long timeout; 3573 int ret = 0; 3574 3575 /* 3576 * By default timeout for non compute jobs is 10000 3577 * and 60000 for compute jobs. 3578 * In SR-IOV or passthrough mode, timeout for compute 3579 * jobs are 60000 by default. 3580 */ 3581 adev->gfx_timeout = msecs_to_jiffies(10000); 3582 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 3583 if (amdgpu_sriov_vf(adev)) 3584 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ? 3585 msecs_to_jiffies(60000) : msecs_to_jiffies(10000); 3586 else 3587 adev->compute_timeout = msecs_to_jiffies(60000); 3588 3589 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 3590 while ((timeout_setting = strsep(&input, ",")) && 3591 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 3592 ret = kstrtol(timeout_setting, 0, &timeout); 3593 if (ret) 3594 return ret; 3595 3596 if (timeout == 0) { 3597 index++; 3598 continue; 3599 } else if (timeout < 0) { 3600 timeout = MAX_SCHEDULE_TIMEOUT; 3601 dev_warn(adev->dev, "lockup timeout disabled"); 3602 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); 3603 } else { 3604 timeout = msecs_to_jiffies(timeout); 3605 } 3606 3607 switch (index++) { 3608 case 0: 3609 adev->gfx_timeout = timeout; 3610 break; 3611 case 1: 3612 adev->compute_timeout = timeout; 3613 break; 3614 case 2: 3615 adev->sdma_timeout = timeout; 3616 break; 3617 case 3: 3618 adev->video_timeout = timeout; 3619 break; 3620 default: 3621 break; 3622 } 3623 } 3624 /* 3625 * There is only one value specified and 3626 * it should apply to all non-compute jobs. 3627 */ 3628 if (index == 1) { 3629 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 3630 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) 3631 adev->compute_timeout = adev->gfx_timeout; 3632 } 3633 } 3634 3635 return ret; 3636 } 3637 3638 /** 3639 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU 3640 * 3641 * @adev: amdgpu_device pointer 3642 * 3643 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode 3644 */ 3645 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) 3646 { 3647 struct iommu_domain *domain; 3648 3649 domain = iommu_get_domain_for_dev(adev->dev); 3650 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) 3651 adev->ram_is_direct_mapped = true; 3652 } 3653 3654 static const struct attribute *amdgpu_dev_attributes[] = { 3655 &dev_attr_product_name.attr, 3656 &dev_attr_product_number.attr, 3657 &dev_attr_serial_number.attr, 3658 &dev_attr_pcie_replay_count.attr, 3659 NULL 3660 }; 3661 3662 /** 3663 * amdgpu_device_init - initialize the driver 3664 * 3665 * @adev: amdgpu_device pointer 3666 * @flags: driver flags 3667 * 3668 * Initializes the driver info and hw (all asics). 3669 * Returns 0 for success or an error on failure. 3670 * Called at driver startup. 3671 */ 3672 int amdgpu_device_init(struct amdgpu_device *adev, 3673 uint32_t flags) 3674 { 3675 struct drm_device *ddev = adev_to_drm(adev); 3676 struct pci_dev *pdev = adev->pdev; 3677 int r, i; 3678 bool px = false; 3679 u32 max_MBps; 3680 int tmp; 3681 3682 adev->shutdown = false; 3683 adev->flags = flags; 3684 3685 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) 3686 adev->asic_type = amdgpu_force_asic_type; 3687 else 3688 adev->asic_type = flags & AMD_ASIC_MASK; 3689 3690 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 3691 if (amdgpu_emu_mode == 1) 3692 adev->usec_timeout *= 10; 3693 adev->gmc.gart_size = 512 * 1024 * 1024; 3694 adev->accel_working = false; 3695 adev->num_rings = 0; 3696 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub()); 3697 adev->mman.buffer_funcs = NULL; 3698 adev->mman.buffer_funcs_ring = NULL; 3699 adev->vm_manager.vm_pte_funcs = NULL; 3700 adev->vm_manager.vm_pte_num_scheds = 0; 3701 adev->gmc.gmc_funcs = NULL; 3702 adev->harvest_ip_mask = 0x0; 3703 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 3704 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 3705 3706 adev->smc_rreg = &amdgpu_invalid_rreg; 3707 adev->smc_wreg = &amdgpu_invalid_wreg; 3708 adev->pcie_rreg = &amdgpu_invalid_rreg; 3709 adev->pcie_wreg = &amdgpu_invalid_wreg; 3710 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext; 3711 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext; 3712 adev->pciep_rreg = &amdgpu_invalid_rreg; 3713 adev->pciep_wreg = &amdgpu_invalid_wreg; 3714 adev->pcie_rreg64 = &amdgpu_invalid_rreg64; 3715 adev->pcie_wreg64 = &amdgpu_invalid_wreg64; 3716 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 3717 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 3718 adev->didt_rreg = &amdgpu_invalid_rreg; 3719 adev->didt_wreg = &amdgpu_invalid_wreg; 3720 adev->gc_cac_rreg = &amdgpu_invalid_rreg; 3721 adev->gc_cac_wreg = &amdgpu_invalid_wreg; 3722 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 3723 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 3724 3725 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 3726 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 3727 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 3728 3729 /* mutex initialization are all done here so we 3730 * can recall function without having locking issues */ 3731 mutex_init(&adev->firmware.mutex); 3732 mutex_init(&adev->pm.mutex); 3733 mutex_init(&adev->gfx.gpu_clock_mutex); 3734 mutex_init(&adev->srbm_mutex); 3735 mutex_init(&adev->gfx.pipe_reserve_mutex); 3736 mutex_init(&adev->gfx.gfx_off_mutex); 3737 mutex_init(&adev->gfx.partition_mutex); 3738 mutex_init(&adev->grbm_idx_mutex); 3739 mutex_init(&adev->mn_lock); 3740 mutex_init(&adev->virt.vf_errors.lock); 3741 hash_init(adev->mn_hash); 3742 mutex_init(&adev->psp.mutex); 3743 mutex_init(&adev->notifier_lock); 3744 mutex_init(&adev->pm.stable_pstate_ctx_lock); 3745 mutex_init(&adev->benchmark_mutex); 3746 3747 amdgpu_device_init_apu_flags(adev); 3748 3749 r = amdgpu_device_check_arguments(adev); 3750 if (r) 3751 return r; 3752 3753 spin_lock_init(&adev->mmio_idx_lock); 3754 spin_lock_init(&adev->smc_idx_lock); 3755 spin_lock_init(&adev->pcie_idx_lock); 3756 spin_lock_init(&adev->uvd_ctx_idx_lock); 3757 spin_lock_init(&adev->didt_idx_lock); 3758 spin_lock_init(&adev->gc_cac_idx_lock); 3759 spin_lock_init(&adev->se_cac_idx_lock); 3760 spin_lock_init(&adev->audio_endpt_idx_lock); 3761 spin_lock_init(&adev->mm_stats.lock); 3762 3763 INIT_LIST_HEAD(&adev->shadow_list); 3764 mutex_init(&adev->shadow_list_lock); 3765 3766 INIT_LIST_HEAD(&adev->reset_list); 3767 3768 INIT_LIST_HEAD(&adev->ras_list); 3769 3770 INIT_DELAYED_WORK(&adev->delayed_init_work, 3771 amdgpu_device_delayed_init_work_handler); 3772 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 3773 amdgpu_device_delay_enable_gfx_off); 3774 3775 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); 3776 3777 adev->gfx.gfx_off_req_count = 1; 3778 adev->gfx.gfx_off_residency = 0; 3779 adev->gfx.gfx_off_entrycount = 0; 3780 adev->pm.ac_power = power_supply_is_system_supplied() > 0; 3781 3782 atomic_set(&adev->throttling_logging_enabled, 1); 3783 /* 3784 * If throttling continues, logging will be performed every minute 3785 * to avoid log flooding. "-1" is subtracted since the thermal 3786 * throttling interrupt comes every second. Thus, the total logging 3787 * interval is 59 seconds(retelimited printk interval) + 1(waiting 3788 * for throttling interrupt) = 60 seconds. 3789 */ 3790 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1); 3791 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE); 3792 3793 /* Registers mapping */ 3794 /* TODO: block userspace mapping of io register */ 3795 if (adev->asic_type >= CHIP_BONAIRE) { 3796 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 3797 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 3798 } else { 3799 adev->rmmio_base = pci_resource_start(adev->pdev, 2); 3800 adev->rmmio_size = pci_resource_len(adev->pdev, 2); 3801 } 3802 3803 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++) 3804 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN); 3805 3806 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 3807 if (adev->rmmio == NULL) { 3808 return -ENOMEM; 3809 } 3810 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 3811 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); 3812 3813 amdgpu_device_get_pcie_info(adev); 3814 3815 if (amdgpu_mcbp) 3816 DRM_INFO("MCBP is enabled\n"); 3817 3818 /* 3819 * Reset domain needs to be present early, before XGMI hive discovered 3820 * (if any) and intitialized to use reset sem and in_gpu reset flag 3821 * early on during init and before calling to RREG32. 3822 */ 3823 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); 3824 if (!adev->reset_domain) 3825 return -ENOMEM; 3826 3827 /* detect hw virtualization here */ 3828 amdgpu_detect_virtualization(adev); 3829 3830 r = amdgpu_device_get_job_timeout_settings(adev); 3831 if (r) { 3832 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); 3833 return r; 3834 } 3835 3836 /* early init functions */ 3837 r = amdgpu_device_ip_early_init(adev); 3838 if (r) 3839 return r; 3840 3841 /* Get rid of things like offb */ 3842 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); 3843 if (r) 3844 return r; 3845 3846 /* Enable TMZ based on IP_VERSION */ 3847 amdgpu_gmc_tmz_set(adev); 3848 3849 amdgpu_gmc_noretry_set(adev); 3850 /* Need to get xgmi info early to decide the reset behavior*/ 3851 if (adev->gmc.xgmi.supported) { 3852 r = adev->gfxhub.funcs->get_xgmi_info(adev); 3853 if (r) 3854 return r; 3855 } 3856 3857 /* enable PCIE atomic ops */ 3858 if (amdgpu_sriov_vf(adev)) 3859 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) 3860 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags == 3861 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); 3862 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a 3863 * internal path natively support atomics, set have_atomics_support to true. 3864 */ 3865 else if ((adev->flags & AMD_IS_APU) && 3866 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) 3867 adev->have_atomics_support = true; 3868 else 3869 adev->have_atomics_support = 3870 !pci_enable_atomic_ops_to_root(adev->pdev, 3871 PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 3872 PCI_EXP_DEVCAP2_ATOMIC_COMP64); 3873 if (!adev->have_atomics_support) 3874 dev_info(adev->dev, "PCIE atomic ops is not supported\n"); 3875 3876 /* doorbell bar mapping and doorbell index init*/ 3877 amdgpu_device_doorbell_init(adev); 3878 3879 if (amdgpu_emu_mode == 1) { 3880 /* post the asic on emulation mode */ 3881 emu_soc_asic_init(adev); 3882 goto fence_driver_init; 3883 } 3884 3885 amdgpu_reset_init(adev); 3886 3887 /* detect if we are with an SRIOV vbios */ 3888 amdgpu_device_detect_sriov_bios(adev); 3889 3890 /* check if we need to reset the asic 3891 * E.g., driver was not cleanly unloaded previously, etc. 3892 */ 3893 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { 3894 if (adev->gmc.xgmi.num_physical_nodes) { 3895 dev_info(adev->dev, "Pending hive reset.\n"); 3896 adev->gmc.xgmi.pending_reset = true; 3897 /* Only need to init necessary block for SMU to handle the reset */ 3898 for (i = 0; i < adev->num_ip_blocks; i++) { 3899 if (!adev->ip_blocks[i].status.valid) 3900 continue; 3901 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 3902 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 3903 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 3904 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) { 3905 DRM_DEBUG("IP %s disabled for hw_init.\n", 3906 adev->ip_blocks[i].version->funcs->name); 3907 adev->ip_blocks[i].status.hw = true; 3908 } 3909 } 3910 } else { 3911 tmp = amdgpu_reset_method; 3912 /* It should do a default reset when loading or reloading the driver, 3913 * regardless of the module parameter reset_method. 3914 */ 3915 amdgpu_reset_method = AMD_RESET_METHOD_NONE; 3916 r = amdgpu_asic_reset(adev); 3917 amdgpu_reset_method = tmp; 3918 if (r) { 3919 dev_err(adev->dev, "asic reset on init failed\n"); 3920 goto failed; 3921 } 3922 } 3923 } 3924 3925 /* Post card if necessary */ 3926 if (amdgpu_device_need_post(adev)) { 3927 if (!adev->bios) { 3928 dev_err(adev->dev, "no vBIOS found\n"); 3929 r = -EINVAL; 3930 goto failed; 3931 } 3932 DRM_INFO("GPU posting now...\n"); 3933 r = amdgpu_device_asic_init(adev); 3934 if (r) { 3935 dev_err(adev->dev, "gpu post error!\n"); 3936 goto failed; 3937 } 3938 } 3939 3940 if (adev->is_atom_fw) { 3941 /* Initialize clocks */ 3942 r = amdgpu_atomfirmware_get_clock_info(adev); 3943 if (r) { 3944 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 3945 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3946 goto failed; 3947 } 3948 } else { 3949 /* Initialize clocks */ 3950 r = amdgpu_atombios_get_clock_info(adev); 3951 if (r) { 3952 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 3953 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3954 goto failed; 3955 } 3956 /* init i2c buses */ 3957 if (!amdgpu_device_has_dc_support(adev)) 3958 amdgpu_atombios_i2c_init(adev); 3959 } 3960 3961 fence_driver_init: 3962 /* Fence driver */ 3963 r = amdgpu_fence_driver_sw_init(adev); 3964 if (r) { 3965 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n"); 3966 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 3967 goto failed; 3968 } 3969 3970 /* init the mode config */ 3971 drm_mode_config_init(adev_to_drm(adev)); 3972 3973 r = amdgpu_device_ip_init(adev); 3974 if (r) { 3975 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 3976 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 3977 goto release_ras_con; 3978 } 3979 3980 amdgpu_fence_driver_hw_init(adev); 3981 3982 dev_info(adev->dev, 3983 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", 3984 adev->gfx.config.max_shader_engines, 3985 adev->gfx.config.max_sh_per_se, 3986 adev->gfx.config.max_cu_per_sh, 3987 adev->gfx.cu_info.number); 3988 3989 adev->accel_working = true; 3990 3991 amdgpu_vm_check_compute_bug(adev); 3992 3993 /* Initialize the buffer migration limit. */ 3994 if (amdgpu_moverate >= 0) 3995 max_MBps = amdgpu_moverate; 3996 else 3997 max_MBps = 8; /* Allow 8 MB/s. */ 3998 /* Get a log2 for easy divisions. */ 3999 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 4000 4001 r = amdgpu_pm_sysfs_init(adev); 4002 if (r) 4003 DRM_ERROR("registering pm sysfs failed (%d).\n", r); 4004 4005 r = amdgpu_ucode_sysfs_init(adev); 4006 if (r) { 4007 adev->ucode_sysfs_en = false; 4008 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); 4009 } else 4010 adev->ucode_sysfs_en = true; 4011 4012 r = amdgpu_psp_sysfs_init(adev); 4013 if (r) { 4014 adev->psp_sysfs_en = false; 4015 if (!amdgpu_sriov_vf(adev)) 4016 DRM_ERROR("Creating psp sysfs failed\n"); 4017 } else 4018 adev->psp_sysfs_en = true; 4019 4020 /* 4021 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. 4022 * Otherwise the mgpu fan boost feature will be skipped due to the 4023 * gpu instance is counted less. 4024 */ 4025 amdgpu_register_gpu_instance(adev); 4026 4027 /* enable clockgating, etc. after ib tests, etc. since some blocks require 4028 * explicit gating rather than handling it automatically. 4029 */ 4030 if (!adev->gmc.xgmi.pending_reset) { 4031 r = amdgpu_device_ip_late_init(adev); 4032 if (r) { 4033 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 4034 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 4035 goto release_ras_con; 4036 } 4037 /* must succeed. */ 4038 amdgpu_ras_resume(adev); 4039 queue_delayed_work(system_wq, &adev->delayed_init_work, 4040 msecs_to_jiffies(AMDGPU_RESUME_MS)); 4041 } 4042 4043 if (amdgpu_sriov_vf(adev)) { 4044 amdgpu_virt_release_full_gpu(adev, true); 4045 flush_delayed_work(&adev->delayed_init_work); 4046 } 4047 4048 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); 4049 if (r) 4050 dev_err(adev->dev, "Could not create amdgpu device attr\n"); 4051 4052 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 4053 r = amdgpu_pmu_init(adev); 4054 if (r) 4055 dev_err(adev->dev, "amdgpu_pmu_init failed\n"); 4056 4057 /* Have stored pci confspace at hand for restore in sudden PCI error */ 4058 if (amdgpu_device_cache_pci_state(adev->pdev)) 4059 pci_restore_state(pdev); 4060 4061 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 4062 /* this will fail for cards that aren't VGA class devices, just 4063 * ignore it */ 4064 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) 4065 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); 4066 4067 px = amdgpu_device_supports_px(ddev); 4068 4069 if (px || (!pci_is_thunderbolt_attached(adev->pdev) && 4070 apple_gmux_detect(NULL, NULL))) 4071 vga_switcheroo_register_client(adev->pdev, 4072 &amdgpu_switcheroo_ops, px); 4073 4074 if (px) 4075 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 4076 4077 if (adev->gmc.xgmi.pending_reset) 4078 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work, 4079 msecs_to_jiffies(AMDGPU_RESUME_MS)); 4080 4081 amdgpu_device_check_iommu_direct_map(adev); 4082 4083 return 0; 4084 4085 release_ras_con: 4086 if (amdgpu_sriov_vf(adev)) 4087 amdgpu_virt_release_full_gpu(adev, true); 4088 4089 /* failed in exclusive mode due to timeout */ 4090 if (amdgpu_sriov_vf(adev) && 4091 !amdgpu_sriov_runtime(adev) && 4092 amdgpu_virt_mmio_blocked(adev) && 4093 !amdgpu_virt_wait_reset(adev)) { 4094 dev_err(adev->dev, "VF exclusive mode timeout\n"); 4095 /* Don't send request since VF is inactive. */ 4096 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 4097 adev->virt.ops = NULL; 4098 r = -EAGAIN; 4099 } 4100 amdgpu_release_ras_context(adev); 4101 4102 failed: 4103 amdgpu_vf_error_trans_all(adev); 4104 4105 return r; 4106 } 4107 4108 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) 4109 { 4110 4111 /* Clear all CPU mappings pointing to this device */ 4112 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); 4113 4114 /* Unmap all mapped bars - Doorbell, registers and VRAM */ 4115 amdgpu_device_doorbell_fini(adev); 4116 4117 iounmap(adev->rmmio); 4118 adev->rmmio = NULL; 4119 if (adev->mman.aper_base_kaddr) 4120 iounmap(adev->mman.aper_base_kaddr); 4121 adev->mman.aper_base_kaddr = NULL; 4122 4123 /* Memory manager related */ 4124 if (!adev->gmc.xgmi.connected_to_cpu) { 4125 arch_phys_wc_del(adev->gmc.vram_mtrr); 4126 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); 4127 } 4128 } 4129 4130 /** 4131 * amdgpu_device_fini_hw - tear down the driver 4132 * 4133 * @adev: amdgpu_device pointer 4134 * 4135 * Tear down the driver info (all asics). 4136 * Called at driver shutdown. 4137 */ 4138 void amdgpu_device_fini_hw(struct amdgpu_device *adev) 4139 { 4140 dev_info(adev->dev, "amdgpu: finishing device.\n"); 4141 flush_delayed_work(&adev->delayed_init_work); 4142 adev->shutdown = true; 4143 4144 /* make sure IB test finished before entering exclusive mode 4145 * to avoid preemption on IB test 4146 * */ 4147 if (amdgpu_sriov_vf(adev)) { 4148 amdgpu_virt_request_full_gpu(adev, false); 4149 amdgpu_virt_fini_data_exchange(adev); 4150 } 4151 4152 /* disable all interrupts */ 4153 amdgpu_irq_disable_all(adev); 4154 if (adev->mode_info.mode_config_initialized) { 4155 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev))) 4156 drm_helper_force_disable_all(adev_to_drm(adev)); 4157 else 4158 drm_atomic_helper_shutdown(adev_to_drm(adev)); 4159 } 4160 amdgpu_fence_driver_hw_fini(adev); 4161 4162 if (adev->mman.initialized) 4163 drain_workqueue(adev->mman.bdev.wq); 4164 4165 if (adev->pm.sysfs_initialized) 4166 amdgpu_pm_sysfs_fini(adev); 4167 if (adev->ucode_sysfs_en) 4168 amdgpu_ucode_sysfs_fini(adev); 4169 if (adev->psp_sysfs_en) 4170 amdgpu_psp_sysfs_fini(adev); 4171 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); 4172 4173 /* disable ras feature must before hw fini */ 4174 amdgpu_ras_pre_fini(adev); 4175 4176 amdgpu_device_ip_fini_early(adev); 4177 4178 amdgpu_irq_fini_hw(adev); 4179 4180 if (adev->mman.initialized) 4181 ttm_device_clear_dma_mappings(&adev->mman.bdev); 4182 4183 amdgpu_gart_dummy_page_fini(adev); 4184 4185 if (drm_dev_is_unplugged(adev_to_drm(adev))) 4186 amdgpu_device_unmap_mmio(adev); 4187 4188 } 4189 4190 void amdgpu_device_fini_sw(struct amdgpu_device *adev) 4191 { 4192 int idx; 4193 bool px; 4194 4195 amdgpu_fence_driver_sw_fini(adev); 4196 amdgpu_device_ip_fini(adev); 4197 amdgpu_ucode_release(&adev->firmware.gpu_info_fw); 4198 adev->accel_working = false; 4199 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); 4200 4201 amdgpu_reset_fini(adev); 4202 4203 /* free i2c buses */ 4204 if (!amdgpu_device_has_dc_support(adev)) 4205 amdgpu_i2c_fini(adev); 4206 4207 if (amdgpu_emu_mode != 1) 4208 amdgpu_atombios_fini(adev); 4209 4210 kfree(adev->bios); 4211 adev->bios = NULL; 4212 4213 px = amdgpu_device_supports_px(adev_to_drm(adev)); 4214 4215 if (px || (!pci_is_thunderbolt_attached(adev->pdev) && 4216 apple_gmux_detect(NULL, NULL))) 4217 vga_switcheroo_unregister_client(adev->pdev); 4218 4219 if (px) 4220 vga_switcheroo_fini_domain_pm_ops(adev->dev); 4221 4222 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) 4223 vga_client_unregister(adev->pdev); 4224 4225 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 4226 4227 iounmap(adev->rmmio); 4228 adev->rmmio = NULL; 4229 amdgpu_device_doorbell_fini(adev); 4230 drm_dev_exit(idx); 4231 } 4232 4233 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 4234 amdgpu_pmu_fini(adev); 4235 if (adev->mman.discovery_bin) 4236 amdgpu_discovery_fini(adev); 4237 4238 amdgpu_reset_put_reset_domain(adev->reset_domain); 4239 adev->reset_domain = NULL; 4240 4241 kfree(adev->pci_state); 4242 4243 } 4244 4245 /** 4246 * amdgpu_device_evict_resources - evict device resources 4247 * @adev: amdgpu device object 4248 * 4249 * Evicts all ttm device resources(vram BOs, gart table) from the lru list 4250 * of the vram memory type. Mainly used for evicting device resources 4251 * at suspend time. 4252 * 4253 */ 4254 static int amdgpu_device_evict_resources(struct amdgpu_device *adev) 4255 { 4256 int ret; 4257 4258 /* No need to evict vram on APUs for suspend to ram or s2idle */ 4259 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) 4260 return 0; 4261 4262 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); 4263 if (ret) 4264 DRM_WARN("evicting device resources failed\n"); 4265 return ret; 4266 } 4267 4268 /* 4269 * Suspend & resume. 4270 */ 4271 /** 4272 * amdgpu_device_suspend - initiate device suspend 4273 * 4274 * @dev: drm dev pointer 4275 * @fbcon : notify the fbdev of suspend 4276 * 4277 * Puts the hw in the suspend state (all asics). 4278 * Returns 0 for success or an error on failure. 4279 * Called at driver suspend. 4280 */ 4281 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) 4282 { 4283 struct amdgpu_device *adev = drm_to_adev(dev); 4284 int r = 0; 4285 4286 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 4287 return 0; 4288 4289 adev->in_suspend = true; 4290 4291 /* Evict the majority of BOs before grabbing the full access */ 4292 r = amdgpu_device_evict_resources(adev); 4293 if (r) 4294 return r; 4295 4296 if (amdgpu_sriov_vf(adev)) { 4297 amdgpu_virt_fini_data_exchange(adev); 4298 r = amdgpu_virt_request_full_gpu(adev, false); 4299 if (r) 4300 return r; 4301 } 4302 4303 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) 4304 DRM_WARN("smart shift update failed\n"); 4305 4306 if (fbcon) 4307 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); 4308 4309 cancel_delayed_work_sync(&adev->delayed_init_work); 4310 4311 amdgpu_ras_suspend(adev); 4312 4313 amdgpu_device_ip_suspend_phase1(adev); 4314 4315 if (!adev->in_s0ix) 4316 amdgpu_amdkfd_suspend(adev, adev->in_runpm); 4317 4318 r = amdgpu_device_evict_resources(adev); 4319 if (r) 4320 return r; 4321 4322 amdgpu_fence_driver_hw_fini(adev); 4323 4324 amdgpu_device_ip_suspend_phase2(adev); 4325 4326 if (amdgpu_sriov_vf(adev)) 4327 amdgpu_virt_release_full_gpu(adev, false); 4328 4329 return 0; 4330 } 4331 4332 /** 4333 * amdgpu_device_resume - initiate device resume 4334 * 4335 * @dev: drm dev pointer 4336 * @fbcon : notify the fbdev of resume 4337 * 4338 * Bring the hw back to operating state (all asics). 4339 * Returns 0 for success or an error on failure. 4340 * Called at driver resume. 4341 */ 4342 int amdgpu_device_resume(struct drm_device *dev, bool fbcon) 4343 { 4344 struct amdgpu_device *adev = drm_to_adev(dev); 4345 int r = 0; 4346 4347 if (amdgpu_sriov_vf(adev)) { 4348 r = amdgpu_virt_request_full_gpu(adev, true); 4349 if (r) 4350 return r; 4351 } 4352 4353 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 4354 return 0; 4355 4356 if (adev->in_s0ix) 4357 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry); 4358 4359 /* post card */ 4360 if (amdgpu_device_need_post(adev)) { 4361 r = amdgpu_device_asic_init(adev); 4362 if (r) 4363 dev_err(adev->dev, "amdgpu asic init failed\n"); 4364 } 4365 4366 r = amdgpu_device_ip_resume(adev); 4367 4368 if (r) { 4369 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); 4370 goto exit; 4371 } 4372 amdgpu_fence_driver_hw_init(adev); 4373 4374 r = amdgpu_device_ip_late_init(adev); 4375 if (r) 4376 goto exit; 4377 4378 queue_delayed_work(system_wq, &adev->delayed_init_work, 4379 msecs_to_jiffies(AMDGPU_RESUME_MS)); 4380 4381 if (!adev->in_s0ix) { 4382 r = amdgpu_amdkfd_resume(adev, adev->in_runpm); 4383 if (r) 4384 goto exit; 4385 } 4386 4387 exit: 4388 if (amdgpu_sriov_vf(adev)) { 4389 amdgpu_virt_init_data_exchange(adev); 4390 amdgpu_virt_release_full_gpu(adev, true); 4391 } 4392 4393 if (r) 4394 return r; 4395 4396 /* Make sure IB tests flushed */ 4397 flush_delayed_work(&adev->delayed_init_work); 4398 4399 if (fbcon) 4400 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false); 4401 4402 amdgpu_ras_resume(adev); 4403 4404 if (adev->mode_info.num_crtc) { 4405 /* 4406 * Most of the connector probing functions try to acquire runtime pm 4407 * refs to ensure that the GPU is powered on when connector polling is 4408 * performed. Since we're calling this from a runtime PM callback, 4409 * trying to acquire rpm refs will cause us to deadlock. 4410 * 4411 * Since we're guaranteed to be holding the rpm lock, it's safe to 4412 * temporarily disable the rpm helpers so this doesn't deadlock us. 4413 */ 4414 #ifdef CONFIG_PM 4415 dev->dev->power.disable_depth++; 4416 #endif 4417 if (!adev->dc_enabled) 4418 drm_helper_hpd_irq_event(dev); 4419 else 4420 drm_kms_helper_hotplug_event(dev); 4421 #ifdef CONFIG_PM 4422 dev->dev->power.disable_depth--; 4423 #endif 4424 } 4425 adev->in_suspend = false; 4426 4427 if (adev->enable_mes) 4428 amdgpu_mes_self_test(adev); 4429 4430 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) 4431 DRM_WARN("smart shift update failed\n"); 4432 4433 return 0; 4434 } 4435 4436 /** 4437 * amdgpu_device_ip_check_soft_reset - did soft reset succeed 4438 * 4439 * @adev: amdgpu_device pointer 4440 * 4441 * The list of all the hardware IPs that make up the asic is walked and 4442 * the check_soft_reset callbacks are run. check_soft_reset determines 4443 * if the asic is still hung or not. 4444 * Returns true if any of the IPs are still in a hung state, false if not. 4445 */ 4446 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 4447 { 4448 int i; 4449 bool asic_hang = false; 4450 4451 if (amdgpu_sriov_vf(adev)) 4452 return true; 4453 4454 if (amdgpu_asic_need_full_reset(adev)) 4455 return true; 4456 4457 for (i = 0; i < adev->num_ip_blocks; i++) { 4458 if (!adev->ip_blocks[i].status.valid) 4459 continue; 4460 if (adev->ip_blocks[i].version->funcs->check_soft_reset) 4461 adev->ip_blocks[i].status.hang = 4462 adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 4463 if (adev->ip_blocks[i].status.hang) { 4464 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 4465 asic_hang = true; 4466 } 4467 } 4468 return asic_hang; 4469 } 4470 4471 /** 4472 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 4473 * 4474 * @adev: amdgpu_device pointer 4475 * 4476 * The list of all the hardware IPs that make up the asic is walked and the 4477 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 4478 * handles any IP specific hardware or software state changes that are 4479 * necessary for a soft reset to succeed. 4480 * Returns 0 on success, negative error code on failure. 4481 */ 4482 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 4483 { 4484 int i, r = 0; 4485 4486 for (i = 0; i < adev->num_ip_blocks; i++) { 4487 if (!adev->ip_blocks[i].status.valid) 4488 continue; 4489 if (adev->ip_blocks[i].status.hang && 4490 adev->ip_blocks[i].version->funcs->pre_soft_reset) { 4491 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 4492 if (r) 4493 return r; 4494 } 4495 } 4496 4497 return 0; 4498 } 4499 4500 /** 4501 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 4502 * 4503 * @adev: amdgpu_device pointer 4504 * 4505 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 4506 * reset is necessary to recover. 4507 * Returns true if a full asic reset is required, false if not. 4508 */ 4509 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 4510 { 4511 int i; 4512 4513 if (amdgpu_asic_need_full_reset(adev)) 4514 return true; 4515 4516 for (i = 0; i < adev->num_ip_blocks; i++) { 4517 if (!adev->ip_blocks[i].status.valid) 4518 continue; 4519 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 4520 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 4521 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 4522 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 4523 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 4524 if (adev->ip_blocks[i].status.hang) { 4525 dev_info(adev->dev, "Some block need full reset!\n"); 4526 return true; 4527 } 4528 } 4529 } 4530 return false; 4531 } 4532 4533 /** 4534 * amdgpu_device_ip_soft_reset - do a soft reset 4535 * 4536 * @adev: amdgpu_device pointer 4537 * 4538 * The list of all the hardware IPs that make up the asic is walked and the 4539 * soft_reset callbacks are run if the block is hung. soft_reset handles any 4540 * IP specific hardware or software state changes that are necessary to soft 4541 * reset the IP. 4542 * Returns 0 on success, negative error code on failure. 4543 */ 4544 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 4545 { 4546 int i, r = 0; 4547 4548 for (i = 0; i < adev->num_ip_blocks; i++) { 4549 if (!adev->ip_blocks[i].status.valid) 4550 continue; 4551 if (adev->ip_blocks[i].status.hang && 4552 adev->ip_blocks[i].version->funcs->soft_reset) { 4553 r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 4554 if (r) 4555 return r; 4556 } 4557 } 4558 4559 return 0; 4560 } 4561 4562 /** 4563 * amdgpu_device_ip_post_soft_reset - clean up from soft reset 4564 * 4565 * @adev: amdgpu_device pointer 4566 * 4567 * The list of all the hardware IPs that make up the asic is walked and the 4568 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 4569 * handles any IP specific hardware or software state changes that are 4570 * necessary after the IP has been soft reset. 4571 * Returns 0 on success, negative error code on failure. 4572 */ 4573 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 4574 { 4575 int i, r = 0; 4576 4577 for (i = 0; i < adev->num_ip_blocks; i++) { 4578 if (!adev->ip_blocks[i].status.valid) 4579 continue; 4580 if (adev->ip_blocks[i].status.hang && 4581 adev->ip_blocks[i].version->funcs->post_soft_reset) 4582 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 4583 if (r) 4584 return r; 4585 } 4586 4587 return 0; 4588 } 4589 4590 /** 4591 * amdgpu_device_recover_vram - Recover some VRAM contents 4592 * 4593 * @adev: amdgpu_device pointer 4594 * 4595 * Restores the contents of VRAM buffers from the shadows in GTT. Used to 4596 * restore things like GPUVM page tables after a GPU reset where 4597 * the contents of VRAM might be lost. 4598 * 4599 * Returns: 4600 * 0 on success, negative error code on failure. 4601 */ 4602 static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 4603 { 4604 struct dma_fence *fence = NULL, *next = NULL; 4605 struct amdgpu_bo *shadow; 4606 struct amdgpu_bo_vm *vmbo; 4607 long r = 1, tmo; 4608 4609 if (amdgpu_sriov_runtime(adev)) 4610 tmo = msecs_to_jiffies(8000); 4611 else 4612 tmo = msecs_to_jiffies(100); 4613 4614 dev_info(adev->dev, "recover vram bo from shadow start\n"); 4615 mutex_lock(&adev->shadow_list_lock); 4616 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) { 4617 /* If vm is compute context or adev is APU, shadow will be NULL */ 4618 if (!vmbo->shadow) 4619 continue; 4620 shadow = vmbo->shadow; 4621 4622 /* No need to recover an evicted BO */ 4623 if (shadow->tbo.resource->mem_type != TTM_PL_TT || 4624 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || 4625 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM) 4626 continue; 4627 4628 r = amdgpu_bo_restore_shadow(shadow, &next); 4629 if (r) 4630 break; 4631 4632 if (fence) { 4633 tmo = dma_fence_wait_timeout(fence, false, tmo); 4634 dma_fence_put(fence); 4635 fence = next; 4636 if (tmo == 0) { 4637 r = -ETIMEDOUT; 4638 break; 4639 } else if (tmo < 0) { 4640 r = tmo; 4641 break; 4642 } 4643 } else { 4644 fence = next; 4645 } 4646 } 4647 mutex_unlock(&adev->shadow_list_lock); 4648 4649 if (fence) 4650 tmo = dma_fence_wait_timeout(fence, false, tmo); 4651 dma_fence_put(fence); 4652 4653 if (r < 0 || tmo <= 0) { 4654 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); 4655 return -EIO; 4656 } 4657 4658 dev_info(adev->dev, "recover vram bo from shadow done\n"); 4659 return 0; 4660 } 4661 4662 4663 /** 4664 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 4665 * 4666 * @adev: amdgpu_device pointer 4667 * @from_hypervisor: request from hypervisor 4668 * 4669 * do VF FLR and reinitialize Asic 4670 * return 0 means succeeded otherwise failed 4671 */ 4672 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 4673 bool from_hypervisor) 4674 { 4675 int r; 4676 struct amdgpu_hive_info *hive = NULL; 4677 int retry_limit = 0; 4678 4679 retry: 4680 amdgpu_amdkfd_pre_reset(adev); 4681 4682 if (from_hypervisor) 4683 r = amdgpu_virt_request_full_gpu(adev, true); 4684 else 4685 r = amdgpu_virt_reset_gpu(adev); 4686 if (r) 4687 return r; 4688 4689 /* Resume IP prior to SMC */ 4690 r = amdgpu_device_ip_reinit_early_sriov(adev); 4691 if (r) 4692 goto error; 4693 4694 amdgpu_virt_init_data_exchange(adev); 4695 4696 r = amdgpu_device_fw_loading(adev); 4697 if (r) 4698 return r; 4699 4700 /* now we are okay to resume SMC/CP/SDMA */ 4701 r = amdgpu_device_ip_reinit_late_sriov(adev); 4702 if (r) 4703 goto error; 4704 4705 hive = amdgpu_get_xgmi_hive(adev); 4706 /* Update PSP FW topology after reset */ 4707 if (hive && adev->gmc.xgmi.num_physical_nodes > 1) 4708 r = amdgpu_xgmi_update_topology(hive, adev); 4709 4710 if (hive) 4711 amdgpu_put_xgmi_hive(hive); 4712 4713 if (!r) { 4714 amdgpu_irq_gpu_reset_resume_helper(adev); 4715 r = amdgpu_ib_ring_tests(adev); 4716 4717 amdgpu_amdkfd_post_reset(adev); 4718 } 4719 4720 error: 4721 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 4722 amdgpu_inc_vram_lost(adev); 4723 r = amdgpu_device_recover_vram(adev); 4724 } 4725 amdgpu_virt_release_full_gpu(adev, true); 4726 4727 if (AMDGPU_RETRY_SRIOV_RESET(r)) { 4728 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) { 4729 retry_limit++; 4730 goto retry; 4731 } else 4732 DRM_ERROR("GPU reset retry is beyond the retry limit\n"); 4733 } 4734 4735 return r; 4736 } 4737 4738 /** 4739 * amdgpu_device_has_job_running - check if there is any job in mirror list 4740 * 4741 * @adev: amdgpu_device pointer 4742 * 4743 * check if there is any job in mirror list 4744 */ 4745 bool amdgpu_device_has_job_running(struct amdgpu_device *adev) 4746 { 4747 int i; 4748 struct drm_sched_job *job; 4749 4750 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4751 struct amdgpu_ring *ring = adev->rings[i]; 4752 4753 if (!ring || !ring->sched.thread) 4754 continue; 4755 4756 spin_lock(&ring->sched.job_list_lock); 4757 job = list_first_entry_or_null(&ring->sched.pending_list, 4758 struct drm_sched_job, list); 4759 spin_unlock(&ring->sched.job_list_lock); 4760 if (job) 4761 return true; 4762 } 4763 return false; 4764 } 4765 4766 /** 4767 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 4768 * 4769 * @adev: amdgpu_device pointer 4770 * 4771 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 4772 * a hung GPU. 4773 */ 4774 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 4775 { 4776 4777 if (amdgpu_gpu_recovery == 0) 4778 goto disabled; 4779 4780 /* Skip soft reset check in fatal error mode */ 4781 if (!amdgpu_ras_is_poison_mode_supported(adev)) 4782 return true; 4783 4784 if (amdgpu_sriov_vf(adev)) 4785 return true; 4786 4787 if (amdgpu_gpu_recovery == -1) { 4788 switch (adev->asic_type) { 4789 #ifdef CONFIG_DRM_AMDGPU_SI 4790 case CHIP_VERDE: 4791 case CHIP_TAHITI: 4792 case CHIP_PITCAIRN: 4793 case CHIP_OLAND: 4794 case CHIP_HAINAN: 4795 #endif 4796 #ifdef CONFIG_DRM_AMDGPU_CIK 4797 case CHIP_KAVERI: 4798 case CHIP_KABINI: 4799 case CHIP_MULLINS: 4800 #endif 4801 case CHIP_CARRIZO: 4802 case CHIP_STONEY: 4803 case CHIP_CYAN_SKILLFISH: 4804 goto disabled; 4805 default: 4806 break; 4807 } 4808 } 4809 4810 return true; 4811 4812 disabled: 4813 dev_info(adev->dev, "GPU recovery disabled.\n"); 4814 return false; 4815 } 4816 4817 int amdgpu_device_mode1_reset(struct amdgpu_device *adev) 4818 { 4819 u32 i; 4820 int ret = 0; 4821 4822 amdgpu_atombios_scratch_regs_engine_hung(adev, true); 4823 4824 dev_info(adev->dev, "GPU mode1 reset\n"); 4825 4826 /* disable BM */ 4827 pci_clear_master(adev->pdev); 4828 4829 amdgpu_device_cache_pci_state(adev->pdev); 4830 4831 if (amdgpu_dpm_is_mode1_reset_supported(adev)) { 4832 dev_info(adev->dev, "GPU smu mode1 reset\n"); 4833 ret = amdgpu_dpm_mode1_reset(adev); 4834 } else { 4835 dev_info(adev->dev, "GPU psp mode1 reset\n"); 4836 ret = psp_gpu_reset(adev); 4837 } 4838 4839 if (ret) 4840 dev_err(adev->dev, "GPU mode1 reset failed\n"); 4841 4842 amdgpu_device_load_pci_state(adev->pdev); 4843 4844 /* wait for asic to come out of reset */ 4845 for (i = 0; i < adev->usec_timeout; i++) { 4846 u32 memsize = adev->nbio.funcs->get_memsize(adev); 4847 4848 if (memsize != 0xffffffff) 4849 break; 4850 udelay(1); 4851 } 4852 4853 amdgpu_atombios_scratch_regs_engine_hung(adev, false); 4854 return ret; 4855 } 4856 4857 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, 4858 struct amdgpu_reset_context *reset_context) 4859 { 4860 int i, r = 0; 4861 struct amdgpu_job *job = NULL; 4862 bool need_full_reset = 4863 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 4864 4865 if (reset_context->reset_req_dev == adev) 4866 job = reset_context->job; 4867 4868 if (amdgpu_sriov_vf(adev)) { 4869 /* stop the data exchange thread */ 4870 amdgpu_virt_fini_data_exchange(adev); 4871 } 4872 4873 amdgpu_fence_driver_isr_toggle(adev, true); 4874 4875 /* block all schedulers and reset given job's ring */ 4876 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4877 struct amdgpu_ring *ring = adev->rings[i]; 4878 4879 if (!ring || !ring->sched.thread) 4880 continue; 4881 4882 /*clear job fence from fence drv to avoid force_completion 4883 *leave NULL and vm flush fence in fence drv */ 4884 amdgpu_fence_driver_clear_job_fences(ring); 4885 4886 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 4887 amdgpu_fence_driver_force_completion(ring); 4888 } 4889 4890 amdgpu_fence_driver_isr_toggle(adev, false); 4891 4892 if (job && job->vm) 4893 drm_sched_increase_karma(&job->base); 4894 4895 r = amdgpu_reset_prepare_hwcontext(adev, reset_context); 4896 /* If reset handler not implemented, continue; otherwise return */ 4897 if (r == -ENOSYS) 4898 r = 0; 4899 else 4900 return r; 4901 4902 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ 4903 if (!amdgpu_sriov_vf(adev)) { 4904 4905 if (!need_full_reset) 4906 need_full_reset = amdgpu_device_ip_need_full_reset(adev); 4907 4908 if (!need_full_reset && amdgpu_gpu_recovery && 4909 amdgpu_device_ip_check_soft_reset(adev)) { 4910 amdgpu_device_ip_pre_soft_reset(adev); 4911 r = amdgpu_device_ip_soft_reset(adev); 4912 amdgpu_device_ip_post_soft_reset(adev); 4913 if (r || amdgpu_device_ip_check_soft_reset(adev)) { 4914 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n"); 4915 need_full_reset = true; 4916 } 4917 } 4918 4919 if (need_full_reset) 4920 r = amdgpu_device_ip_suspend(adev); 4921 if (need_full_reset) 4922 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 4923 else 4924 clear_bit(AMDGPU_NEED_FULL_RESET, 4925 &reset_context->flags); 4926 } 4927 4928 return r; 4929 } 4930 4931 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) 4932 { 4933 int i; 4934 4935 lockdep_assert_held(&adev->reset_domain->sem); 4936 4937 for (i = 0; i < adev->num_regs; i++) { 4938 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]); 4939 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], 4940 adev->reset_dump_reg_value[i]); 4941 } 4942 4943 return 0; 4944 } 4945 4946 #ifdef CONFIG_DEV_COREDUMP 4947 static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, 4948 size_t count, void *data, size_t datalen) 4949 { 4950 struct drm_printer p; 4951 struct amdgpu_device *adev = data; 4952 struct drm_print_iterator iter; 4953 int i; 4954 4955 iter.data = buffer; 4956 iter.offset = 0; 4957 iter.start = offset; 4958 iter.remain = count; 4959 4960 p = drm_coredump_printer(&iter); 4961 4962 drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); 4963 drm_printf(&p, "kernel: " UTS_RELEASE "\n"); 4964 drm_printf(&p, "module: " KBUILD_MODNAME "\n"); 4965 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec); 4966 if (adev->reset_task_info.pid) 4967 drm_printf(&p, "process_name: %s PID: %d\n", 4968 adev->reset_task_info.process_name, 4969 adev->reset_task_info.pid); 4970 4971 if (adev->reset_vram_lost) 4972 drm_printf(&p, "VRAM is lost due to GPU reset!\n"); 4973 if (adev->num_regs) { 4974 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); 4975 4976 for (i = 0; i < adev->num_regs; i++) 4977 drm_printf(&p, "0x%08x: 0x%08x\n", 4978 adev->reset_dump_reg_list[i], 4979 adev->reset_dump_reg_value[i]); 4980 } 4981 4982 return count - iter.remain; 4983 } 4984 4985 static void amdgpu_devcoredump_free(void *data) 4986 { 4987 } 4988 4989 static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev) 4990 { 4991 struct drm_device *dev = adev_to_drm(adev); 4992 4993 ktime_get_ts64(&adev->reset_time); 4994 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL, 4995 amdgpu_devcoredump_read, amdgpu_devcoredump_free); 4996 } 4997 #endif 4998 4999 int amdgpu_do_asic_reset(struct list_head *device_list_handle, 5000 struct amdgpu_reset_context *reset_context) 5001 { 5002 struct amdgpu_device *tmp_adev = NULL; 5003 bool need_full_reset, skip_hw_reset, vram_lost = false; 5004 int r = 0; 5005 bool gpu_reset_for_dev_remove = 0; 5006 5007 /* Try reset handler method first */ 5008 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, 5009 reset_list); 5010 amdgpu_reset_reg_dumps(tmp_adev); 5011 5012 reset_context->reset_device_list = device_list_handle; 5013 r = amdgpu_reset_perform_reset(tmp_adev, reset_context); 5014 /* If reset handler not implemented, continue; otherwise return */ 5015 if (r == -ENOSYS) 5016 r = 0; 5017 else 5018 return r; 5019 5020 /* Reset handler not implemented, use the default method */ 5021 need_full_reset = 5022 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5023 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); 5024 5025 gpu_reset_for_dev_remove = 5026 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && 5027 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5028 5029 /* 5030 * ASIC reset has to be done on all XGMI hive nodes ASAP 5031 * to allow proper links negotiation in FW (within 1 sec) 5032 */ 5033 if (!skip_hw_reset && need_full_reset) { 5034 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5035 /* For XGMI run all resets in parallel to speed up the process */ 5036 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 5037 tmp_adev->gmc.xgmi.pending_reset = false; 5038 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) 5039 r = -EALREADY; 5040 } else 5041 r = amdgpu_asic_reset(tmp_adev); 5042 5043 if (r) { 5044 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s", 5045 r, adev_to_drm(tmp_adev)->unique); 5046 break; 5047 } 5048 } 5049 5050 /* For XGMI wait for all resets to complete before proceed */ 5051 if (!r) { 5052 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5053 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 5054 flush_work(&tmp_adev->xgmi_reset_work); 5055 r = tmp_adev->asic_reset_res; 5056 if (r) 5057 break; 5058 } 5059 } 5060 } 5061 } 5062 5063 if (!r && amdgpu_ras_intr_triggered()) { 5064 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5065 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops && 5066 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) 5067 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev); 5068 } 5069 5070 amdgpu_ras_intr_cleared(); 5071 } 5072 5073 /* Since the mode1 reset affects base ip blocks, the 5074 * phase1 ip blocks need to be resumed. Otherwise there 5075 * will be a BIOS signature error and the psp bootloader 5076 * can't load kdb on the next amdgpu install. 5077 */ 5078 if (gpu_reset_for_dev_remove) { 5079 list_for_each_entry(tmp_adev, device_list_handle, reset_list) 5080 amdgpu_device_ip_resume_phase1(tmp_adev); 5081 5082 goto end; 5083 } 5084 5085 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5086 if (need_full_reset) { 5087 /* post card */ 5088 r = amdgpu_device_asic_init(tmp_adev); 5089 if (r) { 5090 dev_warn(tmp_adev->dev, "asic atom init failed!"); 5091 } else { 5092 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); 5093 r = amdgpu_amdkfd_resume_iommu(tmp_adev); 5094 if (r) 5095 goto out; 5096 5097 r = amdgpu_device_ip_resume_phase1(tmp_adev); 5098 if (r) 5099 goto out; 5100 5101 vram_lost = amdgpu_device_check_vram_lost(tmp_adev); 5102 #ifdef CONFIG_DEV_COREDUMP 5103 tmp_adev->reset_vram_lost = vram_lost; 5104 memset(&tmp_adev->reset_task_info, 0, 5105 sizeof(tmp_adev->reset_task_info)); 5106 if (reset_context->job && reset_context->job->vm) 5107 tmp_adev->reset_task_info = 5108 reset_context->job->vm->task_info; 5109 amdgpu_reset_capture_coredumpm(tmp_adev); 5110 #endif 5111 if (vram_lost) { 5112 DRM_INFO("VRAM is lost due to GPU reset!\n"); 5113 amdgpu_inc_vram_lost(tmp_adev); 5114 } 5115 5116 r = amdgpu_device_fw_loading(tmp_adev); 5117 if (r) 5118 return r; 5119 5120 r = amdgpu_device_ip_resume_phase2(tmp_adev); 5121 if (r) 5122 goto out; 5123 5124 if (vram_lost) 5125 amdgpu_device_fill_reset_magic(tmp_adev); 5126 5127 /* 5128 * Add this ASIC as tracked as reset was already 5129 * complete successfully. 5130 */ 5131 amdgpu_register_gpu_instance(tmp_adev); 5132 5133 if (!reset_context->hive && 5134 tmp_adev->gmc.xgmi.num_physical_nodes > 1) 5135 amdgpu_xgmi_add_device(tmp_adev); 5136 5137 r = amdgpu_device_ip_late_init(tmp_adev); 5138 if (r) 5139 goto out; 5140 5141 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false); 5142 5143 /* 5144 * The GPU enters bad state once faulty pages 5145 * by ECC has reached the threshold, and ras 5146 * recovery is scheduled next. So add one check 5147 * here to break recovery if it indeed exceeds 5148 * bad page threshold, and remind user to 5149 * retire this GPU or setting one bigger 5150 * bad_page_threshold value to fix this once 5151 * probing driver again. 5152 */ 5153 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) { 5154 /* must succeed. */ 5155 amdgpu_ras_resume(tmp_adev); 5156 } else { 5157 r = -EINVAL; 5158 goto out; 5159 } 5160 5161 /* Update PSP FW topology after reset */ 5162 if (reset_context->hive && 5163 tmp_adev->gmc.xgmi.num_physical_nodes > 1) 5164 r = amdgpu_xgmi_update_topology( 5165 reset_context->hive, tmp_adev); 5166 } 5167 } 5168 5169 out: 5170 if (!r) { 5171 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 5172 r = amdgpu_ib_ring_tests(tmp_adev); 5173 if (r) { 5174 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); 5175 need_full_reset = true; 5176 r = -EAGAIN; 5177 goto end; 5178 } 5179 } 5180 5181 if (!r) 5182 r = amdgpu_device_recover_vram(tmp_adev); 5183 else 5184 tmp_adev->asic_reset_res = r; 5185 } 5186 5187 end: 5188 if (need_full_reset) 5189 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5190 else 5191 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5192 return r; 5193 } 5194 5195 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev) 5196 { 5197 5198 switch (amdgpu_asic_reset_method(adev)) { 5199 case AMD_RESET_METHOD_MODE1: 5200 adev->mp1_state = PP_MP1_STATE_SHUTDOWN; 5201 break; 5202 case AMD_RESET_METHOD_MODE2: 5203 adev->mp1_state = PP_MP1_STATE_RESET; 5204 break; 5205 default: 5206 adev->mp1_state = PP_MP1_STATE_NONE; 5207 break; 5208 } 5209 } 5210 5211 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev) 5212 { 5213 amdgpu_vf_error_trans_all(adev); 5214 adev->mp1_state = PP_MP1_STATE_NONE; 5215 } 5216 5217 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) 5218 { 5219 struct pci_dev *p = NULL; 5220 5221 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 5222 adev->pdev->bus->number, 1); 5223 if (p) { 5224 pm_runtime_enable(&(p->dev)); 5225 pm_runtime_resume(&(p->dev)); 5226 } 5227 5228 pci_dev_put(p); 5229 } 5230 5231 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) 5232 { 5233 enum amd_reset_method reset_method; 5234 struct pci_dev *p = NULL; 5235 u64 expires; 5236 5237 /* 5238 * For now, only BACO and mode1 reset are confirmed 5239 * to suffer the audio issue without proper suspended. 5240 */ 5241 reset_method = amdgpu_asic_reset_method(adev); 5242 if ((reset_method != AMD_RESET_METHOD_BACO) && 5243 (reset_method != AMD_RESET_METHOD_MODE1)) 5244 return -EINVAL; 5245 5246 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 5247 adev->pdev->bus->number, 1); 5248 if (!p) 5249 return -ENODEV; 5250 5251 expires = pm_runtime_autosuspend_expiration(&(p->dev)); 5252 if (!expires) 5253 /* 5254 * If we cannot get the audio device autosuspend delay, 5255 * a fixed 4S interval will be used. Considering 3S is 5256 * the audio controller default autosuspend delay setting. 5257 * 4S used here is guaranteed to cover that. 5258 */ 5259 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL; 5260 5261 while (!pm_runtime_status_suspended(&(p->dev))) { 5262 if (!pm_runtime_suspend(&(p->dev))) 5263 break; 5264 5265 if (expires < ktime_get_mono_fast_ns()) { 5266 dev_warn(adev->dev, "failed to suspend display audio\n"); 5267 pci_dev_put(p); 5268 /* TODO: abort the succeeding gpu reset? */ 5269 return -ETIMEDOUT; 5270 } 5271 } 5272 5273 pm_runtime_disable(&(p->dev)); 5274 5275 pci_dev_put(p); 5276 return 0; 5277 } 5278 5279 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) 5280 { 5281 struct amdgpu_ras *con = amdgpu_ras_get_context(adev); 5282 5283 #if defined(CONFIG_DEBUG_FS) 5284 if (!amdgpu_sriov_vf(adev)) 5285 cancel_work(&adev->reset_work); 5286 #endif 5287 5288 if (adev->kfd.dev) 5289 cancel_work(&adev->kfd.reset_work); 5290 5291 if (amdgpu_sriov_vf(adev)) 5292 cancel_work(&adev->virt.flr_work); 5293 5294 if (con && adev->ras_enabled) 5295 cancel_work(&con->recovery_work); 5296 5297 } 5298 5299 /** 5300 * amdgpu_device_gpu_recover - reset the asic and recover scheduler 5301 * 5302 * @adev: amdgpu_device pointer 5303 * @job: which job trigger hang 5304 * @reset_context: amdgpu reset context pointer 5305 * 5306 * Attempt to reset the GPU if it has hung (all asics). 5307 * Attempt to do soft-reset or full-reset and reinitialize Asic 5308 * Returns 0 for success or an error on failure. 5309 */ 5310 5311 int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 5312 struct amdgpu_job *job, 5313 struct amdgpu_reset_context *reset_context) 5314 { 5315 struct list_head device_list, *device_list_handle = NULL; 5316 bool job_signaled = false; 5317 struct amdgpu_hive_info *hive = NULL; 5318 struct amdgpu_device *tmp_adev = NULL; 5319 int i, r = 0; 5320 bool need_emergency_restart = false; 5321 bool audio_suspended = false; 5322 bool gpu_reset_for_dev_remove = false; 5323 5324 gpu_reset_for_dev_remove = 5325 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && 5326 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5327 5328 /* 5329 * Special case: RAS triggered and full reset isn't supported 5330 */ 5331 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); 5332 5333 /* 5334 * Flush RAM to disk so that after reboot 5335 * the user can read log and see why the system rebooted. 5336 */ 5337 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) { 5338 DRM_WARN("Emergency reboot."); 5339 5340 ksys_sync_helper(); 5341 emergency_restart(); 5342 } 5343 5344 dev_info(adev->dev, "GPU %s begin!\n", 5345 need_emergency_restart ? "jobs stop":"reset"); 5346 5347 if (!amdgpu_sriov_vf(adev)) 5348 hive = amdgpu_get_xgmi_hive(adev); 5349 if (hive) 5350 mutex_lock(&hive->hive_lock); 5351 5352 reset_context->job = job; 5353 reset_context->hive = hive; 5354 /* 5355 * Build list of devices to reset. 5356 * In case we are in XGMI hive mode, resort the device list 5357 * to put adev in the 1st position. 5358 */ 5359 INIT_LIST_HEAD(&device_list); 5360 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { 5361 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { 5362 list_add_tail(&tmp_adev->reset_list, &device_list); 5363 if (gpu_reset_for_dev_remove && adev->shutdown) 5364 tmp_adev->shutdown = true; 5365 } 5366 if (!list_is_first(&adev->reset_list, &device_list)) 5367 list_rotate_to_front(&adev->reset_list, &device_list); 5368 device_list_handle = &device_list; 5369 } else { 5370 list_add_tail(&adev->reset_list, &device_list); 5371 device_list_handle = &device_list; 5372 } 5373 5374 /* We need to lock reset domain only once both for XGMI and single device */ 5375 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, 5376 reset_list); 5377 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); 5378 5379 /* block all schedulers and reset given job's ring */ 5380 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5381 5382 amdgpu_device_set_mp1_state(tmp_adev); 5383 5384 /* 5385 * Try to put the audio codec into suspend state 5386 * before gpu reset started. 5387 * 5388 * Due to the power domain of the graphics device 5389 * is shared with AZ power domain. Without this, 5390 * we may change the audio hardware from behind 5391 * the audio driver's back. That will trigger 5392 * some audio codec errors. 5393 */ 5394 if (!amdgpu_device_suspend_display_audio(tmp_adev)) 5395 audio_suspended = true; 5396 5397 amdgpu_ras_set_error_query_ready(tmp_adev, false); 5398 5399 cancel_delayed_work_sync(&tmp_adev->delayed_init_work); 5400 5401 if (!amdgpu_sriov_vf(tmp_adev)) 5402 amdgpu_amdkfd_pre_reset(tmp_adev); 5403 5404 /* 5405 * Mark these ASICs to be reseted as untracked first 5406 * And add them back after reset completed 5407 */ 5408 amdgpu_unregister_gpu_instance(tmp_adev); 5409 5410 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true); 5411 5412 /* disable ras on ALL IPs */ 5413 if (!need_emergency_restart && 5414 amdgpu_device_ip_need_full_reset(tmp_adev)) 5415 amdgpu_ras_suspend(tmp_adev); 5416 5417 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5418 struct amdgpu_ring *ring = tmp_adev->rings[i]; 5419 5420 if (!ring || !ring->sched.thread) 5421 continue; 5422 5423 drm_sched_stop(&ring->sched, job ? &job->base : NULL); 5424 5425 if (need_emergency_restart) 5426 amdgpu_job_stop_all_jobs_on_sched(&ring->sched); 5427 } 5428 atomic_inc(&tmp_adev->gpu_reset_counter); 5429 } 5430 5431 if (need_emergency_restart) 5432 goto skip_sched_resume; 5433 5434 /* 5435 * Must check guilty signal here since after this point all old 5436 * HW fences are force signaled. 5437 * 5438 * job->base holds a reference to parent fence 5439 */ 5440 if (job && dma_fence_is_signaled(&job->hw_fence)) { 5441 job_signaled = true; 5442 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); 5443 goto skip_hw_reset; 5444 } 5445 5446 retry: /* Rest of adevs pre asic reset from XGMI hive. */ 5447 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5448 if (gpu_reset_for_dev_remove) { 5449 /* Workaroud for ASICs need to disable SMC first */ 5450 amdgpu_device_smu_fini_early(tmp_adev); 5451 } 5452 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); 5453 /*TODO Should we stop ?*/ 5454 if (r) { 5455 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", 5456 r, adev_to_drm(tmp_adev)->unique); 5457 tmp_adev->asic_reset_res = r; 5458 } 5459 5460 /* 5461 * Drop all pending non scheduler resets. Scheduler resets 5462 * were already dropped during drm_sched_stop 5463 */ 5464 amdgpu_device_stop_pending_resets(tmp_adev); 5465 } 5466 5467 /* Actual ASIC resets if needed.*/ 5468 /* Host driver will handle XGMI hive reset for SRIOV */ 5469 if (amdgpu_sriov_vf(adev)) { 5470 r = amdgpu_device_reset_sriov(adev, job ? false : true); 5471 if (r) 5472 adev->asic_reset_res = r; 5473 5474 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */ 5475 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || 5476 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) 5477 amdgpu_ras_resume(adev); 5478 } else { 5479 r = amdgpu_do_asic_reset(device_list_handle, reset_context); 5480 if (r && r == -EAGAIN) 5481 goto retry; 5482 5483 if (!r && gpu_reset_for_dev_remove) 5484 goto recover_end; 5485 } 5486 5487 skip_hw_reset: 5488 5489 /* Post ASIC reset for all devs .*/ 5490 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5491 5492 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5493 struct amdgpu_ring *ring = tmp_adev->rings[i]; 5494 5495 if (!ring || !ring->sched.thread) 5496 continue; 5497 5498 drm_sched_start(&ring->sched, true); 5499 } 5500 5501 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)) 5502 amdgpu_mes_self_test(tmp_adev); 5503 5504 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) { 5505 drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); 5506 } 5507 5508 if (tmp_adev->asic_reset_res) 5509 r = tmp_adev->asic_reset_res; 5510 5511 tmp_adev->asic_reset_res = 0; 5512 5513 if (r) { 5514 /* bad news, how to tell it to userspace ? */ 5515 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); 5516 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 5517 } else { 5518 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); 5519 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0)) 5520 DRM_WARN("smart shift update failed\n"); 5521 } 5522 } 5523 5524 skip_sched_resume: 5525 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5526 /* unlock kfd: SRIOV would do it separately */ 5527 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) 5528 amdgpu_amdkfd_post_reset(tmp_adev); 5529 5530 /* kfd_post_reset will do nothing if kfd device is not initialized, 5531 * need to bring up kfd here if it's not be initialized before 5532 */ 5533 if (!adev->kfd.init_complete) 5534 amdgpu_amdkfd_device_init(adev); 5535 5536 if (audio_suspended) 5537 amdgpu_device_resume_display_audio(tmp_adev); 5538 5539 amdgpu_device_unset_mp1_state(tmp_adev); 5540 5541 amdgpu_ras_set_error_query_ready(tmp_adev, true); 5542 } 5543 5544 recover_end: 5545 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, 5546 reset_list); 5547 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); 5548 5549 if (hive) { 5550 mutex_unlock(&hive->hive_lock); 5551 amdgpu_put_xgmi_hive(hive); 5552 } 5553 5554 if (r) 5555 dev_info(adev->dev, "GPU reset end with ret = %d\n", r); 5556 5557 atomic_set(&adev->reset_domain->reset_res, r); 5558 return r; 5559 } 5560 5561 /** 5562 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 5563 * 5564 * @adev: amdgpu_device pointer 5565 * 5566 * Fetchs and stores in the driver the PCIE capabilities (gen speed 5567 * and lanes) of the slot the device is in. Handles APUs and 5568 * virtualized environments where PCIE config space may not be available. 5569 */ 5570 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 5571 { 5572 struct pci_dev *pdev; 5573 enum pci_bus_speed speed_cap, platform_speed_cap; 5574 enum pcie_link_width platform_link_width; 5575 5576 if (amdgpu_pcie_gen_cap) 5577 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 5578 5579 if (amdgpu_pcie_lane_cap) 5580 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 5581 5582 /* covers APUs as well */ 5583 if (pci_is_root_bus(adev->pdev->bus)) { 5584 if (adev->pm.pcie_gen_mask == 0) 5585 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 5586 if (adev->pm.pcie_mlw_mask == 0) 5587 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 5588 return; 5589 } 5590 5591 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) 5592 return; 5593 5594 pcie_bandwidth_available(adev->pdev, NULL, 5595 &platform_speed_cap, &platform_link_width); 5596 5597 if (adev->pm.pcie_gen_mask == 0) { 5598 /* asic caps */ 5599 pdev = adev->pdev; 5600 speed_cap = pcie_get_speed_cap(pdev); 5601 if (speed_cap == PCI_SPEED_UNKNOWN) { 5602 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5603 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5604 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 5605 } else { 5606 if (speed_cap == PCIE_SPEED_32_0GT) 5607 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5608 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5609 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5610 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 | 5611 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5); 5612 else if (speed_cap == PCIE_SPEED_16_0GT) 5613 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5614 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5615 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5616 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 5617 else if (speed_cap == PCIE_SPEED_8_0GT) 5618 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5619 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5620 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 5621 else if (speed_cap == PCIE_SPEED_5_0GT) 5622 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5623 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 5624 else 5625 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 5626 } 5627 /* platform caps */ 5628 if (platform_speed_cap == PCI_SPEED_UNKNOWN) { 5629 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5630 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 5631 } else { 5632 if (platform_speed_cap == PCIE_SPEED_32_0GT) 5633 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5634 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5635 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5636 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 | 5637 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5); 5638 else if (platform_speed_cap == PCIE_SPEED_16_0GT) 5639 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5640 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5641 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5642 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 5643 else if (platform_speed_cap == PCIE_SPEED_8_0GT) 5644 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5645 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5646 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 5647 else if (platform_speed_cap == PCIE_SPEED_5_0GT) 5648 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5649 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 5650 else 5651 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 5652 5653 } 5654 } 5655 if (adev->pm.pcie_mlw_mask == 0) { 5656 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { 5657 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 5658 } else { 5659 switch (platform_link_width) { 5660 case PCIE_LNK_X32: 5661 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 5662 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 5663 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 5664 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5665 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5666 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5667 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5668 break; 5669 case PCIE_LNK_X16: 5670 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 5671 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 5672 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5673 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5674 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5675 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5676 break; 5677 case PCIE_LNK_X12: 5678 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 5679 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5680 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5681 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5682 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5683 break; 5684 case PCIE_LNK_X8: 5685 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5686 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5687 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5688 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5689 break; 5690 case PCIE_LNK_X4: 5691 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5692 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5693 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5694 break; 5695 case PCIE_LNK_X2: 5696 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5697 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5698 break; 5699 case PCIE_LNK_X1: 5700 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 5701 break; 5702 default: 5703 break; 5704 } 5705 } 5706 } 5707 } 5708 5709 /** 5710 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR 5711 * 5712 * @adev: amdgpu_device pointer 5713 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev 5714 * 5715 * Return true if @peer_adev can access (DMA) @adev through the PCIe 5716 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of 5717 * @peer_adev. 5718 */ 5719 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, 5720 struct amdgpu_device *peer_adev) 5721 { 5722 #ifdef CONFIG_HSA_AMD_P2P 5723 uint64_t address_mask = peer_adev->dev->dma_mask ? 5724 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); 5725 resource_size_t aper_limit = 5726 adev->gmc.aper_base + adev->gmc.aper_size - 1; 5727 bool p2p_access = 5728 !adev->gmc.xgmi.connected_to_cpu && 5729 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); 5730 5731 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && 5732 adev->gmc.real_vram_size == adev->gmc.visible_vram_size && 5733 !(adev->gmc.aper_base & address_mask || 5734 aper_limit & address_mask)); 5735 #else 5736 return false; 5737 #endif 5738 } 5739 5740 int amdgpu_device_baco_enter(struct drm_device *dev) 5741 { 5742 struct amdgpu_device *adev = drm_to_adev(dev); 5743 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 5744 5745 if (!amdgpu_device_supports_baco(dev)) 5746 return -ENOTSUPP; 5747 5748 if (ras && adev->ras_enabled && 5749 adev->nbio.funcs->enable_doorbell_interrupt) 5750 adev->nbio.funcs->enable_doorbell_interrupt(adev, false); 5751 5752 return amdgpu_dpm_baco_enter(adev); 5753 } 5754 5755 int amdgpu_device_baco_exit(struct drm_device *dev) 5756 { 5757 struct amdgpu_device *adev = drm_to_adev(dev); 5758 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 5759 int ret = 0; 5760 5761 if (!amdgpu_device_supports_baco(dev)) 5762 return -ENOTSUPP; 5763 5764 ret = amdgpu_dpm_baco_exit(adev); 5765 if (ret) 5766 return ret; 5767 5768 if (ras && adev->ras_enabled && 5769 adev->nbio.funcs->enable_doorbell_interrupt) 5770 adev->nbio.funcs->enable_doorbell_interrupt(adev, true); 5771 5772 if (amdgpu_passthrough(adev) && 5773 adev->nbio.funcs->clear_doorbell_interrupt) 5774 adev->nbio.funcs->clear_doorbell_interrupt(adev); 5775 5776 return 0; 5777 } 5778 5779 /** 5780 * amdgpu_pci_error_detected - Called when a PCI error is detected. 5781 * @pdev: PCI device struct 5782 * @state: PCI channel state 5783 * 5784 * Description: Called when a PCI error is detected. 5785 * 5786 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT. 5787 */ 5788 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) 5789 { 5790 struct drm_device *dev = pci_get_drvdata(pdev); 5791 struct amdgpu_device *adev = drm_to_adev(dev); 5792 int i; 5793 5794 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state); 5795 5796 if (adev->gmc.xgmi.num_physical_nodes > 1) { 5797 DRM_WARN("No support for XGMI hive yet..."); 5798 return PCI_ERS_RESULT_DISCONNECT; 5799 } 5800 5801 adev->pci_channel_state = state; 5802 5803 switch (state) { 5804 case pci_channel_io_normal: 5805 return PCI_ERS_RESULT_CAN_RECOVER; 5806 /* Fatal error, prepare for slot reset */ 5807 case pci_channel_io_frozen: 5808 /* 5809 * Locking adev->reset_domain->sem will prevent any external access 5810 * to GPU during PCI error recovery 5811 */ 5812 amdgpu_device_lock_reset_domain(adev->reset_domain); 5813 amdgpu_device_set_mp1_state(adev); 5814 5815 /* 5816 * Block any work scheduling as we do for regular GPU reset 5817 * for the duration of the recovery 5818 */ 5819 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5820 struct amdgpu_ring *ring = adev->rings[i]; 5821 5822 if (!ring || !ring->sched.thread) 5823 continue; 5824 5825 drm_sched_stop(&ring->sched, NULL); 5826 } 5827 atomic_inc(&adev->gpu_reset_counter); 5828 return PCI_ERS_RESULT_NEED_RESET; 5829 case pci_channel_io_perm_failure: 5830 /* Permanent error, prepare for device removal */ 5831 return PCI_ERS_RESULT_DISCONNECT; 5832 } 5833 5834 return PCI_ERS_RESULT_NEED_RESET; 5835 } 5836 5837 /** 5838 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers 5839 * @pdev: pointer to PCI device 5840 */ 5841 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev) 5842 { 5843 5844 DRM_INFO("PCI error: mmio enabled callback!!\n"); 5845 5846 /* TODO - dump whatever for debugging purposes */ 5847 5848 /* This called only if amdgpu_pci_error_detected returns 5849 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still 5850 * works, no need to reset slot. 5851 */ 5852 5853 return PCI_ERS_RESULT_RECOVERED; 5854 } 5855 5856 /** 5857 * amdgpu_pci_slot_reset - Called when PCI slot has been reset. 5858 * @pdev: PCI device struct 5859 * 5860 * Description: This routine is called by the pci error recovery 5861 * code after the PCI slot has been reset, just before we 5862 * should resume normal operations. 5863 */ 5864 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) 5865 { 5866 struct drm_device *dev = pci_get_drvdata(pdev); 5867 struct amdgpu_device *adev = drm_to_adev(dev); 5868 int r, i; 5869 struct amdgpu_reset_context reset_context; 5870 u32 memsize; 5871 struct list_head device_list; 5872 5873 DRM_INFO("PCI error: slot reset callback!!\n"); 5874 5875 memset(&reset_context, 0, sizeof(reset_context)); 5876 5877 INIT_LIST_HEAD(&device_list); 5878 list_add_tail(&adev->reset_list, &device_list); 5879 5880 /* wait for asic to come out of reset */ 5881 msleep(500); 5882 5883 /* Restore PCI confspace */ 5884 amdgpu_device_load_pci_state(pdev); 5885 5886 /* confirm ASIC came out of reset */ 5887 for (i = 0; i < adev->usec_timeout; i++) { 5888 memsize = amdgpu_asic_get_config_memsize(adev); 5889 5890 if (memsize != 0xffffffff) 5891 break; 5892 udelay(1); 5893 } 5894 if (memsize == 0xffffffff) { 5895 r = -ETIME; 5896 goto out; 5897 } 5898 5899 reset_context.method = AMD_RESET_METHOD_NONE; 5900 reset_context.reset_req_dev = adev; 5901 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); 5902 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); 5903 5904 adev->no_hw_access = true; 5905 r = amdgpu_device_pre_asic_reset(adev, &reset_context); 5906 adev->no_hw_access = false; 5907 if (r) 5908 goto out; 5909 5910 r = amdgpu_do_asic_reset(&device_list, &reset_context); 5911 5912 out: 5913 if (!r) { 5914 if (amdgpu_device_cache_pci_state(adev->pdev)) 5915 pci_restore_state(adev->pdev); 5916 5917 DRM_INFO("PCIe error recovery succeeded\n"); 5918 } else { 5919 DRM_ERROR("PCIe error recovery failed, err:%d", r); 5920 amdgpu_device_unset_mp1_state(adev); 5921 amdgpu_device_unlock_reset_domain(adev->reset_domain); 5922 } 5923 5924 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 5925 } 5926 5927 /** 5928 * amdgpu_pci_resume() - resume normal ops after PCI reset 5929 * @pdev: pointer to PCI device 5930 * 5931 * Called when the error recovery driver tells us that its 5932 * OK to resume normal operation. 5933 */ 5934 void amdgpu_pci_resume(struct pci_dev *pdev) 5935 { 5936 struct drm_device *dev = pci_get_drvdata(pdev); 5937 struct amdgpu_device *adev = drm_to_adev(dev); 5938 int i; 5939 5940 5941 DRM_INFO("PCI error: resume callback!!\n"); 5942 5943 /* Only continue execution for the case of pci_channel_io_frozen */ 5944 if (adev->pci_channel_state != pci_channel_io_frozen) 5945 return; 5946 5947 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5948 struct amdgpu_ring *ring = adev->rings[i]; 5949 5950 if (!ring || !ring->sched.thread) 5951 continue; 5952 5953 drm_sched_start(&ring->sched, true); 5954 } 5955 5956 amdgpu_device_unset_mp1_state(adev); 5957 amdgpu_device_unlock_reset_domain(adev->reset_domain); 5958 } 5959 5960 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) 5961 { 5962 struct drm_device *dev = pci_get_drvdata(pdev); 5963 struct amdgpu_device *adev = drm_to_adev(dev); 5964 int r; 5965 5966 r = pci_save_state(pdev); 5967 if (!r) { 5968 kfree(adev->pci_state); 5969 5970 adev->pci_state = pci_store_saved_state(pdev); 5971 5972 if (!adev->pci_state) { 5973 DRM_ERROR("Failed to store PCI saved state"); 5974 return false; 5975 } 5976 } else { 5977 DRM_WARN("Failed to save PCI state, err:%d\n", r); 5978 return false; 5979 } 5980 5981 return true; 5982 } 5983 5984 bool amdgpu_device_load_pci_state(struct pci_dev *pdev) 5985 { 5986 struct drm_device *dev = pci_get_drvdata(pdev); 5987 struct amdgpu_device *adev = drm_to_adev(dev); 5988 int r; 5989 5990 if (!adev->pci_state) 5991 return false; 5992 5993 r = pci_load_saved_state(pdev, adev->pci_state); 5994 5995 if (!r) { 5996 pci_restore_state(pdev); 5997 } else { 5998 DRM_WARN("Failed to load PCI state, err:%d\n", r); 5999 return false; 6000 } 6001 6002 return true; 6003 } 6004 6005 void amdgpu_device_flush_hdp(struct amdgpu_device *adev, 6006 struct amdgpu_ring *ring) 6007 { 6008 #ifdef CONFIG_X86_64 6009 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) 6010 return; 6011 #endif 6012 if (adev->gmc.xgmi.connected_to_cpu) 6013 return; 6014 6015 if (ring && ring->funcs->emit_hdp_flush) 6016 amdgpu_ring_emit_hdp_flush(ring); 6017 else 6018 amdgpu_asic_flush_hdp(adev, ring); 6019 } 6020 6021 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, 6022 struct amdgpu_ring *ring) 6023 { 6024 #ifdef CONFIG_X86_64 6025 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) 6026 return; 6027 #endif 6028 if (adev->gmc.xgmi.connected_to_cpu) 6029 return; 6030 6031 amdgpu_asic_invalidate_hdp(adev, ring); 6032 } 6033 6034 int amdgpu_in_reset(struct amdgpu_device *adev) 6035 { 6036 return atomic_read(&adev->reset_domain->in_gpu_reset); 6037 } 6038 6039 /** 6040 * amdgpu_device_halt() - bring hardware to some kind of halt state 6041 * 6042 * @adev: amdgpu_device pointer 6043 * 6044 * Bring hardware to some kind of halt state so that no one can touch it 6045 * any more. It will help to maintain error context when error occurred. 6046 * Compare to a simple hang, the system will keep stable at least for SSH 6047 * access. Then it should be trivial to inspect the hardware state and 6048 * see what's going on. Implemented as following: 6049 * 6050 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc), 6051 * clears all CPU mappings to device, disallows remappings through page faults 6052 * 2. amdgpu_irq_disable_all() disables all interrupts 6053 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences 6054 * 4. set adev->no_hw_access to avoid potential crashes after setp 5 6055 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings 6056 * 6. pci_disable_device() and pci_wait_for_pending_transaction() 6057 * flush any in flight DMA operations 6058 */ 6059 void amdgpu_device_halt(struct amdgpu_device *adev) 6060 { 6061 struct pci_dev *pdev = adev->pdev; 6062 struct drm_device *ddev = adev_to_drm(adev); 6063 6064 drm_dev_unplug(ddev); 6065 6066 amdgpu_irq_disable_all(adev); 6067 6068 amdgpu_fence_driver_hw_fini(adev); 6069 6070 adev->no_hw_access = true; 6071 6072 amdgpu_device_unmap_mmio(adev); 6073 6074 pci_disable_device(pdev); 6075 pci_wait_for_pending_transaction(pdev); 6076 } 6077 6078 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, 6079 u32 reg) 6080 { 6081 unsigned long flags, address, data; 6082 u32 r; 6083 6084 address = adev->nbio.funcs->get_pcie_port_index_offset(adev); 6085 data = adev->nbio.funcs->get_pcie_port_data_offset(adev); 6086 6087 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 6088 WREG32(address, reg * 4); 6089 (void)RREG32(address); 6090 r = RREG32(data); 6091 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 6092 return r; 6093 } 6094 6095 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, 6096 u32 reg, u32 v) 6097 { 6098 unsigned long flags, address, data; 6099 6100 address = adev->nbio.funcs->get_pcie_port_index_offset(adev); 6101 data = adev->nbio.funcs->get_pcie_port_data_offset(adev); 6102 6103 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 6104 WREG32(address, reg * 4); 6105 (void)RREG32(address); 6106 WREG32(data, v); 6107 (void)RREG32(data); 6108 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 6109 } 6110 6111 /** 6112 * amdgpu_device_switch_gang - switch to a new gang 6113 * @adev: amdgpu_device pointer 6114 * @gang: the gang to switch to 6115 * 6116 * Try to switch to a new gang. 6117 * Returns: NULL if we switched to the new gang or a reference to the current 6118 * gang leader. 6119 */ 6120 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, 6121 struct dma_fence *gang) 6122 { 6123 struct dma_fence *old = NULL; 6124 6125 do { 6126 dma_fence_put(old); 6127 rcu_read_lock(); 6128 old = dma_fence_get_rcu_safe(&adev->gang_submit); 6129 rcu_read_unlock(); 6130 6131 if (old == gang) 6132 break; 6133 6134 if (!dma_fence_is_signaled(old)) 6135 return old; 6136 6137 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, 6138 old, gang) != old); 6139 6140 dma_fence_put(old); 6141 return NULL; 6142 } 6143 6144 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) 6145 { 6146 switch (adev->asic_type) { 6147 #ifdef CONFIG_DRM_AMDGPU_SI 6148 case CHIP_HAINAN: 6149 #endif 6150 case CHIP_TOPAZ: 6151 /* chips with no display hardware */ 6152 return false; 6153 #ifdef CONFIG_DRM_AMDGPU_SI 6154 case CHIP_TAHITI: 6155 case CHIP_PITCAIRN: 6156 case CHIP_VERDE: 6157 case CHIP_OLAND: 6158 #endif 6159 #ifdef CONFIG_DRM_AMDGPU_CIK 6160 case CHIP_BONAIRE: 6161 case CHIP_HAWAII: 6162 case CHIP_KAVERI: 6163 case CHIP_KABINI: 6164 case CHIP_MULLINS: 6165 #endif 6166 case CHIP_TONGA: 6167 case CHIP_FIJI: 6168 case CHIP_POLARIS10: 6169 case CHIP_POLARIS11: 6170 case CHIP_POLARIS12: 6171 case CHIP_VEGAM: 6172 case CHIP_CARRIZO: 6173 case CHIP_STONEY: 6174 /* chips with display hardware */ 6175 return true; 6176 default: 6177 /* IP discovery */ 6178 if (!adev->ip_versions[DCE_HWIP][0] || 6179 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) 6180 return false; 6181 return true; 6182 } 6183 } 6184 6185 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, 6186 uint32_t inst, uint32_t reg_addr, char reg_name[], 6187 uint32_t expected_value, uint32_t mask) 6188 { 6189 uint32_t ret = 0; 6190 uint32_t old_ = 0; 6191 uint32_t tmp_ = RREG32(reg_addr); 6192 uint32_t loop = adev->usec_timeout; 6193 6194 while ((tmp_ & (mask)) != (expected_value)) { 6195 if (old_ != tmp_) { 6196 loop = adev->usec_timeout; 6197 old_ = tmp_; 6198 } else 6199 udelay(1); 6200 tmp_ = RREG32(reg_addr); 6201 loop--; 6202 if (!loop) { 6203 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn", 6204 inst, reg_name, (uint32_t)expected_value, 6205 (uint32_t)(tmp_ & (mask))); 6206 ret = -ETIMEDOUT; 6207 break; 6208 } 6209 } 6210 return ret; 6211 } 6212