1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/power_supply.h> 29 #include <linux/kthread.h> 30 #include <linux/module.h> 31 #include <linux/console.h> 32 #include <linux/slab.h> 33 34 #include <drm/drm_atomic_helper.h> 35 #include <drm/drm_probe_helper.h> 36 #include <drm/amdgpu_drm.h> 37 #include <linux/vgaarb.h> 38 #include <linux/vga_switcheroo.h> 39 #include <linux/efi.h> 40 #include "amdgpu.h" 41 #include "amdgpu_trace.h" 42 #include "amdgpu_i2c.h" 43 #include "atom.h" 44 #include "amdgpu_atombios.h" 45 #include "amdgpu_atomfirmware.h" 46 #include "amd_pcie.h" 47 #ifdef CONFIG_DRM_AMDGPU_SI 48 #include "si.h" 49 #endif 50 #ifdef CONFIG_DRM_AMDGPU_CIK 51 #include "cik.h" 52 #endif 53 #include "vi.h" 54 #include "soc15.h" 55 #include "nv.h" 56 #include "bif/bif_4_1_d.h" 57 #include <linux/pci.h> 58 #include <linux/firmware.h> 59 #include "amdgpu_vf_error.h" 60 61 #include "amdgpu_amdkfd.h" 62 #include "amdgpu_pm.h" 63 64 #include "amdgpu_xgmi.h" 65 #include "amdgpu_ras.h" 66 #include "amdgpu_pmu.h" 67 #include "amdgpu_fru_eeprom.h" 68 69 #include <linux/suspend.h> 70 #include <drm/task_barrier.h> 71 #include <linux/pm_runtime.h> 72 73 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 75 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 76 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 77 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 78 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); 79 MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); 80 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); 81 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); 82 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); 83 84 #define AMDGPU_RESUME_MS 2000 85 86 const char *amdgpu_asic_name[] = { 87 "TAHITI", 88 "PITCAIRN", 89 "VERDE", 90 "OLAND", 91 "HAINAN", 92 "BONAIRE", 93 "KAVERI", 94 "KABINI", 95 "HAWAII", 96 "MULLINS", 97 "TOPAZ", 98 "TONGA", 99 "FIJI", 100 "CARRIZO", 101 "STONEY", 102 "POLARIS10", 103 "POLARIS11", 104 "POLARIS12", 105 "VEGAM", 106 "VEGA10", 107 "VEGA12", 108 "VEGA20", 109 "RAVEN", 110 "ARCTURUS", 111 "RENOIR", 112 "NAVI10", 113 "NAVI14", 114 "NAVI12", 115 "LAST", 116 }; 117 118 /** 119 * DOC: pcie_replay_count 120 * 121 * The amdgpu driver provides a sysfs API for reporting the total number 122 * of PCIe replays (NAKs) 123 * The file pcie_replay_count is used for this and returns the total 124 * number of replays as a sum of the NAKs generated and NAKs received 125 */ 126 127 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, 128 struct device_attribute *attr, char *buf) 129 { 130 struct drm_device *ddev = dev_get_drvdata(dev); 131 struct amdgpu_device *adev = ddev->dev_private; 132 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); 133 134 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt); 135 } 136 137 static DEVICE_ATTR(pcie_replay_count, S_IRUGO, 138 amdgpu_device_get_pcie_replay_count, NULL); 139 140 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 141 142 /** 143 * DOC: product_name 144 * 145 * The amdgpu driver provides a sysfs API for reporting the product name 146 * for the device 147 * The file serial_number is used for this and returns the product name 148 * as returned from the FRU. 149 * NOTE: This is only available for certain server cards 150 */ 151 152 static ssize_t amdgpu_device_get_product_name(struct device *dev, 153 struct device_attribute *attr, char *buf) 154 { 155 struct drm_device *ddev = dev_get_drvdata(dev); 156 struct amdgpu_device *adev = ddev->dev_private; 157 158 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name); 159 } 160 161 static DEVICE_ATTR(product_name, S_IRUGO, 162 amdgpu_device_get_product_name, NULL); 163 164 /** 165 * DOC: product_number 166 * 167 * The amdgpu driver provides a sysfs API for reporting the part number 168 * for the device 169 * The file serial_number is used for this and returns the part number 170 * as returned from the FRU. 171 * NOTE: This is only available for certain server cards 172 */ 173 174 static ssize_t amdgpu_device_get_product_number(struct device *dev, 175 struct device_attribute *attr, char *buf) 176 { 177 struct drm_device *ddev = dev_get_drvdata(dev); 178 struct amdgpu_device *adev = ddev->dev_private; 179 180 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number); 181 } 182 183 static DEVICE_ATTR(product_number, S_IRUGO, 184 amdgpu_device_get_product_number, NULL); 185 186 /** 187 * DOC: serial_number 188 * 189 * The amdgpu driver provides a sysfs API for reporting the serial number 190 * for the device 191 * The file serial_number is used for this and returns the serial number 192 * as returned from the FRU. 193 * NOTE: This is only available for certain server cards 194 */ 195 196 static ssize_t amdgpu_device_get_serial_number(struct device *dev, 197 struct device_attribute *attr, char *buf) 198 { 199 struct drm_device *ddev = dev_get_drvdata(dev); 200 struct amdgpu_device *adev = ddev->dev_private; 201 202 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial); 203 } 204 205 static DEVICE_ATTR(serial_number, S_IRUGO, 206 amdgpu_device_get_serial_number, NULL); 207 208 /** 209 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control 210 * 211 * @dev: drm_device pointer 212 * 213 * Returns true if the device is a dGPU with HG/PX power control, 214 * otherwise return false. 215 */ 216 bool amdgpu_device_supports_boco(struct drm_device *dev) 217 { 218 struct amdgpu_device *adev = dev->dev_private; 219 220 if (adev->flags & AMD_IS_PX) 221 return true; 222 return false; 223 } 224 225 /** 226 * amdgpu_device_supports_baco - Does the device support BACO 227 * 228 * @dev: drm_device pointer 229 * 230 * Returns true if the device supporte BACO, 231 * otherwise return false. 232 */ 233 bool amdgpu_device_supports_baco(struct drm_device *dev) 234 { 235 struct amdgpu_device *adev = dev->dev_private; 236 237 return amdgpu_asic_supports_baco(adev); 238 } 239 240 /** 241 * VRAM access helper functions. 242 * 243 * amdgpu_device_vram_access - read/write a buffer in vram 244 * 245 * @adev: amdgpu_device pointer 246 * @pos: offset of the buffer in vram 247 * @buf: virtual address of the buffer in system memory 248 * @size: read/write size, sizeof(@buf) must > @size 249 * @write: true - write to vram, otherwise - read from vram 250 */ 251 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, 252 uint32_t *buf, size_t size, bool write) 253 { 254 unsigned long flags; 255 uint32_t hi = ~0; 256 uint64_t last; 257 258 259 #ifdef CONFIG_64BIT 260 last = min(pos + size, adev->gmc.visible_vram_size); 261 if (last > pos) { 262 void __iomem *addr = adev->mman.aper_base_kaddr + pos; 263 size_t count = last - pos; 264 265 if (write) { 266 memcpy_toio(addr, buf, count); 267 mb(); 268 amdgpu_asic_flush_hdp(adev, NULL); 269 } else { 270 amdgpu_asic_invalidate_hdp(adev, NULL); 271 mb(); 272 memcpy_fromio(buf, addr, count); 273 } 274 275 if (count == size) 276 return; 277 278 pos += count; 279 buf += count / 4; 280 size -= count; 281 } 282 #endif 283 284 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 285 for (last = pos + size; pos < last; pos += 4) { 286 uint32_t tmp = pos >> 31; 287 288 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); 289 if (tmp != hi) { 290 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); 291 hi = tmp; 292 } 293 if (write) 294 WREG32_NO_KIQ(mmMM_DATA, *buf++); 295 else 296 *buf++ = RREG32_NO_KIQ(mmMM_DATA); 297 } 298 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 299 } 300 301 /* 302 * device register access helper functions. 303 */ 304 /** 305 * amdgpu_device_rreg - read a register 306 * 307 * @adev: amdgpu_device pointer 308 * @reg: dword aligned register offset 309 * @acc_flags: access flags which require special behavior 310 * 311 * Returns the 32 bit value from the offset specified. 312 */ 313 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, 314 uint32_t acc_flags) 315 { 316 uint32_t ret; 317 318 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 319 return amdgpu_kiq_rreg(adev, reg); 320 321 if ((reg * 4) < adev->rmmio_size) 322 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 323 else 324 ret = adev->pcie_rreg(adev, (reg * 4)); 325 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); 326 return ret; 327 } 328 329 /* 330 * MMIO register read with bytes helper functions 331 * @offset:bytes offset from MMIO start 332 * 333 */ 334 335 /** 336 * amdgpu_mm_rreg8 - read a memory mapped IO register 337 * 338 * @adev: amdgpu_device pointer 339 * @offset: byte aligned register offset 340 * 341 * Returns the 8 bit value from the offset specified. 342 */ 343 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { 344 if (offset < adev->rmmio_size) 345 return (readb(adev->rmmio + offset)); 346 BUG(); 347 } 348 349 /* 350 * MMIO register write with bytes helper functions 351 * @offset:bytes offset from MMIO start 352 * @value: the value want to be written to the register 353 * 354 */ 355 /** 356 * amdgpu_mm_wreg8 - read a memory mapped IO register 357 * 358 * @adev: amdgpu_device pointer 359 * @offset: byte aligned register offset 360 * @value: 8 bit value to write 361 * 362 * Writes the value specified to the offset specified. 363 */ 364 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { 365 if (offset < adev->rmmio_size) 366 writeb(value, adev->rmmio + offset); 367 else 368 BUG(); 369 } 370 371 void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg, 372 uint32_t v, uint32_t acc_flags) 373 { 374 trace_amdgpu_device_wreg(adev->pdev->device, reg, v); 375 376 if ((reg * 4) < adev->rmmio_size) 377 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 378 else 379 adev->pcie_wreg(adev, (reg * 4), v); 380 } 381 382 /** 383 * amdgpu_device_wreg - write to a register 384 * 385 * @adev: amdgpu_device pointer 386 * @reg: dword aligned register offset 387 * @v: 32 bit value to write to the register 388 * @acc_flags: access flags which require special behavior 389 * 390 * Writes the value specified to the offset specified. 391 */ 392 void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 393 uint32_t acc_flags) 394 { 395 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 396 return amdgpu_kiq_wreg(adev, reg, v); 397 398 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); 399 } 400 401 /* 402 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range 403 * 404 * this function is invoked only the debugfs register access 405 * */ 406 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 407 uint32_t acc_flags) 408 { 409 if (amdgpu_sriov_fullaccess(adev) && 410 adev->gfx.rlc.funcs && 411 adev->gfx.rlc.funcs->is_rlcg_access_range) { 412 413 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) 414 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v); 415 } 416 417 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); 418 } 419 420 /** 421 * amdgpu_io_rreg - read an IO register 422 * 423 * @adev: amdgpu_device pointer 424 * @reg: dword aligned register offset 425 * 426 * Returns the 32 bit value from the offset specified. 427 */ 428 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) 429 { 430 if ((reg * 4) < adev->rio_mem_size) 431 return ioread32(adev->rio_mem + (reg * 4)); 432 else { 433 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 434 return ioread32(adev->rio_mem + (mmMM_DATA * 4)); 435 } 436 } 437 438 /** 439 * amdgpu_io_wreg - write to an IO register 440 * 441 * @adev: amdgpu_device pointer 442 * @reg: dword aligned register offset 443 * @v: 32 bit value to write to the register 444 * 445 * Writes the value specified to the offset specified. 446 */ 447 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) 448 { 449 if ((reg * 4) < adev->rio_mem_size) 450 iowrite32(v, adev->rio_mem + (reg * 4)); 451 else { 452 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 453 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); 454 } 455 } 456 457 /** 458 * amdgpu_mm_rdoorbell - read a doorbell dword 459 * 460 * @adev: amdgpu_device pointer 461 * @index: doorbell index 462 * 463 * Returns the value in the doorbell aperture at the 464 * requested doorbell index (CIK). 465 */ 466 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) 467 { 468 if (index < adev->doorbell.num_doorbells) { 469 return readl(adev->doorbell.ptr + index); 470 } else { 471 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 472 return 0; 473 } 474 } 475 476 /** 477 * amdgpu_mm_wdoorbell - write a doorbell dword 478 * 479 * @adev: amdgpu_device pointer 480 * @index: doorbell index 481 * @v: value to write 482 * 483 * Writes @v to the doorbell aperture at the 484 * requested doorbell index (CIK). 485 */ 486 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) 487 { 488 if (index < adev->doorbell.num_doorbells) { 489 writel(v, adev->doorbell.ptr + index); 490 } else { 491 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 492 } 493 } 494 495 /** 496 * amdgpu_mm_rdoorbell64 - read a doorbell Qword 497 * 498 * @adev: amdgpu_device pointer 499 * @index: doorbell index 500 * 501 * Returns the value in the doorbell aperture at the 502 * requested doorbell index (VEGA10+). 503 */ 504 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) 505 { 506 if (index < adev->doorbell.num_doorbells) { 507 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); 508 } else { 509 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 510 return 0; 511 } 512 } 513 514 /** 515 * amdgpu_mm_wdoorbell64 - write a doorbell Qword 516 * 517 * @adev: amdgpu_device pointer 518 * @index: doorbell index 519 * @v: value to write 520 * 521 * Writes @v to the doorbell aperture at the 522 * requested doorbell index (VEGA10+). 523 */ 524 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) 525 { 526 if (index < adev->doorbell.num_doorbells) { 527 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); 528 } else { 529 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 530 } 531 } 532 533 /** 534 * amdgpu_invalid_rreg - dummy reg read function 535 * 536 * @adev: amdgpu device pointer 537 * @reg: offset of register 538 * 539 * Dummy register read function. Used for register blocks 540 * that certain asics don't have (all asics). 541 * Returns the value in the register. 542 */ 543 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 544 { 545 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 546 BUG(); 547 return 0; 548 } 549 550 /** 551 * amdgpu_invalid_wreg - dummy reg write function 552 * 553 * @adev: amdgpu device pointer 554 * @reg: offset of register 555 * @v: value to write to the register 556 * 557 * Dummy register read function. Used for register blocks 558 * that certain asics don't have (all asics). 559 */ 560 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 561 { 562 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 563 reg, v); 564 BUG(); 565 } 566 567 /** 568 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function 569 * 570 * @adev: amdgpu device pointer 571 * @reg: offset of register 572 * 573 * Dummy register read function. Used for register blocks 574 * that certain asics don't have (all asics). 575 * Returns the value in the register. 576 */ 577 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) 578 { 579 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); 580 BUG(); 581 return 0; 582 } 583 584 /** 585 * amdgpu_invalid_wreg64 - dummy reg write function 586 * 587 * @adev: amdgpu device pointer 588 * @reg: offset of register 589 * @v: value to write to the register 590 * 591 * Dummy register read function. Used for register blocks 592 * that certain asics don't have (all asics). 593 */ 594 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) 595 { 596 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", 597 reg, v); 598 BUG(); 599 } 600 601 /** 602 * amdgpu_block_invalid_rreg - dummy reg read function 603 * 604 * @adev: amdgpu device pointer 605 * @block: offset of instance 606 * @reg: offset of register 607 * 608 * Dummy register read function. Used for register blocks 609 * that certain asics don't have (all asics). 610 * Returns the value in the register. 611 */ 612 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 613 uint32_t block, uint32_t reg) 614 { 615 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 616 reg, block); 617 BUG(); 618 return 0; 619 } 620 621 /** 622 * amdgpu_block_invalid_wreg - dummy reg write function 623 * 624 * @adev: amdgpu device pointer 625 * @block: offset of instance 626 * @reg: offset of register 627 * @v: value to write to the register 628 * 629 * Dummy register read function. Used for register blocks 630 * that certain asics don't have (all asics). 631 */ 632 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 633 uint32_t block, 634 uint32_t reg, uint32_t v) 635 { 636 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 637 reg, block, v); 638 BUG(); 639 } 640 641 /** 642 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page 643 * 644 * @adev: amdgpu device pointer 645 * 646 * Allocates a scratch page of VRAM for use by various things in the 647 * driver. 648 */ 649 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) 650 { 651 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, 652 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 653 &adev->vram_scratch.robj, 654 &adev->vram_scratch.gpu_addr, 655 (void **)&adev->vram_scratch.ptr); 656 } 657 658 /** 659 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page 660 * 661 * @adev: amdgpu device pointer 662 * 663 * Frees the VRAM scratch page. 664 */ 665 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) 666 { 667 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); 668 } 669 670 /** 671 * amdgpu_device_program_register_sequence - program an array of registers. 672 * 673 * @adev: amdgpu_device pointer 674 * @registers: pointer to the register array 675 * @array_size: size of the register array 676 * 677 * Programs an array or registers with and and or masks. 678 * This is a helper for setting golden registers. 679 */ 680 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 681 const u32 *registers, 682 const u32 array_size) 683 { 684 u32 tmp, reg, and_mask, or_mask; 685 int i; 686 687 if (array_size % 3) 688 return; 689 690 for (i = 0; i < array_size; i +=3) { 691 reg = registers[i + 0]; 692 and_mask = registers[i + 1]; 693 or_mask = registers[i + 2]; 694 695 if (and_mask == 0xffffffff) { 696 tmp = or_mask; 697 } else { 698 tmp = RREG32(reg); 699 tmp &= ~and_mask; 700 if (adev->family >= AMDGPU_FAMILY_AI) 701 tmp |= (or_mask & and_mask); 702 else 703 tmp |= or_mask; 704 } 705 WREG32(reg, tmp); 706 } 707 } 708 709 /** 710 * amdgpu_device_pci_config_reset - reset the GPU 711 * 712 * @adev: amdgpu_device pointer 713 * 714 * Resets the GPU using the pci config reset sequence. 715 * Only applicable to asics prior to vega10. 716 */ 717 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 718 { 719 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 720 } 721 722 /* 723 * GPU doorbell aperture helpers function. 724 */ 725 /** 726 * amdgpu_device_doorbell_init - Init doorbell driver information. 727 * 728 * @adev: amdgpu_device pointer 729 * 730 * Init doorbell driver information (CIK) 731 * Returns 0 on success, error on failure. 732 */ 733 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) 734 { 735 736 /* No doorbell on SI hardware generation */ 737 if (adev->asic_type < CHIP_BONAIRE) { 738 adev->doorbell.base = 0; 739 adev->doorbell.size = 0; 740 adev->doorbell.num_doorbells = 0; 741 adev->doorbell.ptr = NULL; 742 return 0; 743 } 744 745 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) 746 return -EINVAL; 747 748 amdgpu_asic_init_doorbell_index(adev); 749 750 /* doorbell bar mapping */ 751 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 752 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 753 754 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), 755 adev->doorbell_index.max_assignment+1); 756 if (adev->doorbell.num_doorbells == 0) 757 return -EINVAL; 758 759 /* For Vega, reserve and map two pages on doorbell BAR since SDMA 760 * paging queue doorbell use the second page. The 761 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the 762 * doorbells are in the first page. So with paging queue enabled, 763 * the max num_doorbells should + 1 page (0x400 in dword) 764 */ 765 if (adev->asic_type >= CHIP_VEGA10) 766 adev->doorbell.num_doorbells += 0x400; 767 768 adev->doorbell.ptr = ioremap(adev->doorbell.base, 769 adev->doorbell.num_doorbells * 770 sizeof(u32)); 771 if (adev->doorbell.ptr == NULL) 772 return -ENOMEM; 773 774 return 0; 775 } 776 777 /** 778 * amdgpu_device_doorbell_fini - Tear down doorbell driver information. 779 * 780 * @adev: amdgpu_device pointer 781 * 782 * Tear down doorbell driver information (CIK) 783 */ 784 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) 785 { 786 iounmap(adev->doorbell.ptr); 787 adev->doorbell.ptr = NULL; 788 } 789 790 791 792 /* 793 * amdgpu_device_wb_*() 794 * Writeback is the method by which the GPU updates special pages in memory 795 * with the status of certain GPU events (fences, ring pointers,etc.). 796 */ 797 798 /** 799 * amdgpu_device_wb_fini - Disable Writeback and free memory 800 * 801 * @adev: amdgpu_device pointer 802 * 803 * Disables Writeback and frees the Writeback memory (all asics). 804 * Used at driver shutdown. 805 */ 806 static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 807 { 808 if (adev->wb.wb_obj) { 809 amdgpu_bo_free_kernel(&adev->wb.wb_obj, 810 &adev->wb.gpu_addr, 811 (void **)&adev->wb.wb); 812 adev->wb.wb_obj = NULL; 813 } 814 } 815 816 /** 817 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory 818 * 819 * @adev: amdgpu_device pointer 820 * 821 * Initializes writeback and allocates writeback memory (all asics). 822 * Used at driver startup. 823 * Returns 0 on success or an -error on failure. 824 */ 825 static int amdgpu_device_wb_init(struct amdgpu_device *adev) 826 { 827 int r; 828 829 if (adev->wb.wb_obj == NULL) { 830 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 831 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 832 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 833 &adev->wb.wb_obj, &adev->wb.gpu_addr, 834 (void **)&adev->wb.wb); 835 if (r) { 836 dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 837 return r; 838 } 839 840 adev->wb.num_wb = AMDGPU_MAX_WB; 841 memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 842 843 /* clear wb memory */ 844 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 845 } 846 847 return 0; 848 } 849 850 /** 851 * amdgpu_device_wb_get - Allocate a wb entry 852 * 853 * @adev: amdgpu_device pointer 854 * @wb: wb index 855 * 856 * Allocate a wb slot for use by the driver (all asics). 857 * Returns 0 on success or -EINVAL on failure. 858 */ 859 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 860 { 861 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 862 863 if (offset < adev->wb.num_wb) { 864 __set_bit(offset, adev->wb.used); 865 *wb = offset << 3; /* convert to dw offset */ 866 return 0; 867 } else { 868 return -EINVAL; 869 } 870 } 871 872 /** 873 * amdgpu_device_wb_free - Free a wb entry 874 * 875 * @adev: amdgpu_device pointer 876 * @wb: wb index 877 * 878 * Free a wb slot allocated for use by the driver (all asics) 879 */ 880 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 881 { 882 wb >>= 3; 883 if (wb < adev->wb.num_wb) 884 __clear_bit(wb, adev->wb.used); 885 } 886 887 /** 888 * amdgpu_device_resize_fb_bar - try to resize FB BAR 889 * 890 * @adev: amdgpu_device pointer 891 * 892 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 893 * to fail, but if any of the BARs is not accessible after the size we abort 894 * driver loading by returning -ENODEV. 895 */ 896 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 897 { 898 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size); 899 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; 900 struct pci_bus *root; 901 struct resource *res; 902 unsigned i; 903 u16 cmd; 904 int r; 905 906 /* Bypass for VF */ 907 if (amdgpu_sriov_vf(adev)) 908 return 0; 909 910 /* Check if the root BUS has 64bit memory resources */ 911 root = adev->pdev->bus; 912 while (root->parent) 913 root = root->parent; 914 915 pci_bus_for_each_resource(root, res, i) { 916 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 917 res->start > 0x100000000ull) 918 break; 919 } 920 921 /* Trying to resize is pointless without a root hub window above 4GB */ 922 if (!res) 923 return 0; 924 925 /* Disable memory decoding while we change the BAR addresses and size */ 926 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 927 pci_write_config_word(adev->pdev, PCI_COMMAND, 928 cmd & ~PCI_COMMAND_MEMORY); 929 930 /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 931 amdgpu_device_doorbell_fini(adev); 932 if (adev->asic_type >= CHIP_BONAIRE) 933 pci_release_resource(adev->pdev, 2); 934 935 pci_release_resource(adev->pdev, 0); 936 937 r = pci_resize_resource(adev->pdev, 0, rbar_size); 938 if (r == -ENOSPC) 939 DRM_INFO("Not enough PCI address space for a large BAR."); 940 else if (r && r != -ENOTSUPP) 941 DRM_ERROR("Problem resizing BAR0 (%d).", r); 942 943 pci_assign_unassigned_bus_resources(adev->pdev->bus); 944 945 /* When the doorbell or fb BAR isn't available we have no chance of 946 * using the device. 947 */ 948 r = amdgpu_device_doorbell_init(adev); 949 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 950 return -ENODEV; 951 952 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 953 954 return 0; 955 } 956 957 /* 958 * GPU helpers function. 959 */ 960 /** 961 * amdgpu_device_need_post - check if the hw need post or not 962 * 963 * @adev: amdgpu_device pointer 964 * 965 * Check if the asic has been initialized (all asics) at driver startup 966 * or post is needed if hw reset is performed. 967 * Returns true if need or false if not. 968 */ 969 bool amdgpu_device_need_post(struct amdgpu_device *adev) 970 { 971 uint32_t reg; 972 973 if (amdgpu_sriov_vf(adev)) 974 return false; 975 976 if (amdgpu_passthrough(adev)) { 977 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 978 * some old smc fw still need driver do vPost otherwise gpu hang, while 979 * those smc fw version above 22.15 doesn't have this flaw, so we force 980 * vpost executed for smc version below 22.15 981 */ 982 if (adev->asic_type == CHIP_FIJI) { 983 int err; 984 uint32_t fw_ver; 985 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 986 /* force vPost if error occured */ 987 if (err) 988 return true; 989 990 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 991 if (fw_ver < 0x00160e00) 992 return true; 993 } 994 } 995 996 if (adev->has_hw_reset) { 997 adev->has_hw_reset = false; 998 return true; 999 } 1000 1001 /* bios scratch used on CIK+ */ 1002 if (adev->asic_type >= CHIP_BONAIRE) 1003 return amdgpu_atombios_scratch_need_asic_init(adev); 1004 1005 /* check MEM_SIZE for older asics */ 1006 reg = amdgpu_asic_get_config_memsize(adev); 1007 1008 if ((reg != 0) && (reg != 0xffffffff)) 1009 return false; 1010 1011 return true; 1012 } 1013 1014 /* if we get transitioned to only one device, take VGA back */ 1015 /** 1016 * amdgpu_device_vga_set_decode - enable/disable vga decode 1017 * 1018 * @cookie: amdgpu_device pointer 1019 * @state: enable/disable vga decode 1020 * 1021 * Enable/disable vga decode (all asics). 1022 * Returns VGA resource flags. 1023 */ 1024 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) 1025 { 1026 struct amdgpu_device *adev = cookie; 1027 amdgpu_asic_set_vga_state(adev, state); 1028 if (state) 1029 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 1030 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1031 else 1032 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1033 } 1034 1035 /** 1036 * amdgpu_device_check_block_size - validate the vm block size 1037 * 1038 * @adev: amdgpu_device pointer 1039 * 1040 * Validates the vm block size specified via module parameter. 1041 * The vm block size defines number of bits in page table versus page directory, 1042 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1043 * page table and the remaining bits are in the page directory. 1044 */ 1045 static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 1046 { 1047 /* defines number of bits in page table versus page directory, 1048 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1049 * page table and the remaining bits are in the page directory */ 1050 if (amdgpu_vm_block_size == -1) 1051 return; 1052 1053 if (amdgpu_vm_block_size < 9) { 1054 dev_warn(adev->dev, "VM page table size (%d) too small\n", 1055 amdgpu_vm_block_size); 1056 amdgpu_vm_block_size = -1; 1057 } 1058 } 1059 1060 /** 1061 * amdgpu_device_check_vm_size - validate the vm size 1062 * 1063 * @adev: amdgpu_device pointer 1064 * 1065 * Validates the vm size in GB specified via module parameter. 1066 * The VM size is the size of the GPU virtual memory space in GB. 1067 */ 1068 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 1069 { 1070 /* no need to check the default value */ 1071 if (amdgpu_vm_size == -1) 1072 return; 1073 1074 if (amdgpu_vm_size < 1) { 1075 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 1076 amdgpu_vm_size); 1077 amdgpu_vm_size = -1; 1078 } 1079 } 1080 1081 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 1082 { 1083 struct sysinfo si; 1084 bool is_os_64 = (sizeof(void *) == 8); 1085 uint64_t total_memory; 1086 uint64_t dram_size_seven_GB = 0x1B8000000; 1087 uint64_t dram_size_three_GB = 0xB8000000; 1088 1089 if (amdgpu_smu_memory_pool_size == 0) 1090 return; 1091 1092 if (!is_os_64) { 1093 DRM_WARN("Not 64-bit OS, feature not supported\n"); 1094 goto def_value; 1095 } 1096 si_meminfo(&si); 1097 total_memory = (uint64_t)si.totalram * si.mem_unit; 1098 1099 if ((amdgpu_smu_memory_pool_size == 1) || 1100 (amdgpu_smu_memory_pool_size == 2)) { 1101 if (total_memory < dram_size_three_GB) 1102 goto def_value1; 1103 } else if ((amdgpu_smu_memory_pool_size == 4) || 1104 (amdgpu_smu_memory_pool_size == 8)) { 1105 if (total_memory < dram_size_seven_GB) 1106 goto def_value1; 1107 } else { 1108 DRM_WARN("Smu memory pool size not supported\n"); 1109 goto def_value; 1110 } 1111 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 1112 1113 return; 1114 1115 def_value1: 1116 DRM_WARN("No enough system memory\n"); 1117 def_value: 1118 adev->pm.smu_prv_buffer_size = 0; 1119 } 1120 1121 /** 1122 * amdgpu_device_check_arguments - validate module params 1123 * 1124 * @adev: amdgpu_device pointer 1125 * 1126 * Validates certain module parameters and updates 1127 * the associated values used by the driver (all asics). 1128 */ 1129 static int amdgpu_device_check_arguments(struct amdgpu_device *adev) 1130 { 1131 if (amdgpu_sched_jobs < 4) { 1132 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 1133 amdgpu_sched_jobs); 1134 amdgpu_sched_jobs = 4; 1135 } else if (!is_power_of_2(amdgpu_sched_jobs)){ 1136 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 1137 amdgpu_sched_jobs); 1138 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 1139 } 1140 1141 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 1142 /* gart size must be greater or equal to 32M */ 1143 dev_warn(adev->dev, "gart size (%d) too small\n", 1144 amdgpu_gart_size); 1145 amdgpu_gart_size = -1; 1146 } 1147 1148 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 1149 /* gtt size must be greater or equal to 32M */ 1150 dev_warn(adev->dev, "gtt size (%d) too small\n", 1151 amdgpu_gtt_size); 1152 amdgpu_gtt_size = -1; 1153 } 1154 1155 /* valid range is between 4 and 9 inclusive */ 1156 if (amdgpu_vm_fragment_size != -1 && 1157 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 1158 dev_warn(adev->dev, "valid range is between 4 and 9\n"); 1159 amdgpu_vm_fragment_size = -1; 1160 } 1161 1162 amdgpu_device_check_smu_prv_buffer_size(adev); 1163 1164 amdgpu_device_check_vm_size(adev); 1165 1166 amdgpu_device_check_block_size(adev); 1167 1168 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 1169 1170 amdgpu_gmc_tmz_set(adev); 1171 1172 return 0; 1173 } 1174 1175 /** 1176 * amdgpu_switcheroo_set_state - set switcheroo state 1177 * 1178 * @pdev: pci dev pointer 1179 * @state: vga_switcheroo state 1180 * 1181 * Callback for the switcheroo driver. Suspends or resumes the 1182 * the asics before or after it is powered up using ACPI methods. 1183 */ 1184 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) 1185 { 1186 struct drm_device *dev = pci_get_drvdata(pdev); 1187 int r; 1188 1189 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF) 1190 return; 1191 1192 if (state == VGA_SWITCHEROO_ON) { 1193 pr_info("switched on\n"); 1194 /* don't suspend or resume card normally */ 1195 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1196 1197 pci_set_power_state(dev->pdev, PCI_D0); 1198 pci_restore_state(dev->pdev); 1199 r = pci_enable_device(dev->pdev); 1200 if (r) 1201 DRM_WARN("pci_enable_device failed (%d)\n", r); 1202 amdgpu_device_resume(dev, true); 1203 1204 dev->switch_power_state = DRM_SWITCH_POWER_ON; 1205 drm_kms_helper_poll_enable(dev); 1206 } else { 1207 pr_info("switched off\n"); 1208 drm_kms_helper_poll_disable(dev); 1209 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1210 amdgpu_device_suspend(dev, true); 1211 pci_save_state(dev->pdev); 1212 /* Shut down the device */ 1213 pci_disable_device(dev->pdev); 1214 pci_set_power_state(dev->pdev, PCI_D3cold); 1215 dev->switch_power_state = DRM_SWITCH_POWER_OFF; 1216 } 1217 } 1218 1219 /** 1220 * amdgpu_switcheroo_can_switch - see if switcheroo state can change 1221 * 1222 * @pdev: pci dev pointer 1223 * 1224 * Callback for the switcheroo driver. Check of the switcheroo 1225 * state can be changed. 1226 * Returns true if the state can be changed, false if not. 1227 */ 1228 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 1229 { 1230 struct drm_device *dev = pci_get_drvdata(pdev); 1231 1232 /* 1233 * FIXME: open_count is protected by drm_global_mutex but that would lead to 1234 * locking inversion with the driver load path. And the access here is 1235 * completely racy anyway. So don't bother with locking for now. 1236 */ 1237 return atomic_read(&dev->open_count) == 0; 1238 } 1239 1240 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 1241 .set_gpu_state = amdgpu_switcheroo_set_state, 1242 .reprobe = NULL, 1243 .can_switch = amdgpu_switcheroo_can_switch, 1244 }; 1245 1246 /** 1247 * amdgpu_device_ip_set_clockgating_state - set the CG state 1248 * 1249 * @dev: amdgpu_device pointer 1250 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1251 * @state: clockgating state (gate or ungate) 1252 * 1253 * Sets the requested clockgating state for all instances of 1254 * the hardware IP specified. 1255 * Returns the error code from the last instance. 1256 */ 1257 int amdgpu_device_ip_set_clockgating_state(void *dev, 1258 enum amd_ip_block_type block_type, 1259 enum amd_clockgating_state state) 1260 { 1261 struct amdgpu_device *adev = dev; 1262 int i, r = 0; 1263 1264 for (i = 0; i < adev->num_ip_blocks; i++) { 1265 if (!adev->ip_blocks[i].status.valid) 1266 continue; 1267 if (adev->ip_blocks[i].version->type != block_type) 1268 continue; 1269 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1270 continue; 1271 r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1272 (void *)adev, state); 1273 if (r) 1274 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1275 adev->ip_blocks[i].version->funcs->name, r); 1276 } 1277 return r; 1278 } 1279 1280 /** 1281 * amdgpu_device_ip_set_powergating_state - set the PG state 1282 * 1283 * @dev: amdgpu_device pointer 1284 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1285 * @state: powergating state (gate or ungate) 1286 * 1287 * Sets the requested powergating state for all instances of 1288 * the hardware IP specified. 1289 * Returns the error code from the last instance. 1290 */ 1291 int amdgpu_device_ip_set_powergating_state(void *dev, 1292 enum amd_ip_block_type block_type, 1293 enum amd_powergating_state state) 1294 { 1295 struct amdgpu_device *adev = dev; 1296 int i, r = 0; 1297 1298 for (i = 0; i < adev->num_ip_blocks; i++) { 1299 if (!adev->ip_blocks[i].status.valid) 1300 continue; 1301 if (adev->ip_blocks[i].version->type != block_type) 1302 continue; 1303 if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1304 continue; 1305 r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1306 (void *)adev, state); 1307 if (r) 1308 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1309 adev->ip_blocks[i].version->funcs->name, r); 1310 } 1311 return r; 1312 } 1313 1314 /** 1315 * amdgpu_device_ip_get_clockgating_state - get the CG state 1316 * 1317 * @adev: amdgpu_device pointer 1318 * @flags: clockgating feature flags 1319 * 1320 * Walks the list of IPs on the device and updates the clockgating 1321 * flags for each IP. 1322 * Updates @flags with the feature flags for each hardware IP where 1323 * clockgating is enabled. 1324 */ 1325 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1326 u32 *flags) 1327 { 1328 int i; 1329 1330 for (i = 0; i < adev->num_ip_blocks; i++) { 1331 if (!adev->ip_blocks[i].status.valid) 1332 continue; 1333 if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1334 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1335 } 1336 } 1337 1338 /** 1339 * amdgpu_device_ip_wait_for_idle - wait for idle 1340 * 1341 * @adev: amdgpu_device pointer 1342 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1343 * 1344 * Waits for the request hardware IP to be idle. 1345 * Returns 0 for success or a negative error code on failure. 1346 */ 1347 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1348 enum amd_ip_block_type block_type) 1349 { 1350 int i, r; 1351 1352 for (i = 0; i < adev->num_ip_blocks; i++) { 1353 if (!adev->ip_blocks[i].status.valid) 1354 continue; 1355 if (adev->ip_blocks[i].version->type == block_type) { 1356 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1357 if (r) 1358 return r; 1359 break; 1360 } 1361 } 1362 return 0; 1363 1364 } 1365 1366 /** 1367 * amdgpu_device_ip_is_idle - is the hardware IP idle 1368 * 1369 * @adev: amdgpu_device pointer 1370 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1371 * 1372 * Check if the hardware IP is idle or not. 1373 * Returns true if it the IP is idle, false if not. 1374 */ 1375 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1376 enum amd_ip_block_type block_type) 1377 { 1378 int i; 1379 1380 for (i = 0; i < adev->num_ip_blocks; i++) { 1381 if (!adev->ip_blocks[i].status.valid) 1382 continue; 1383 if (adev->ip_blocks[i].version->type == block_type) 1384 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1385 } 1386 return true; 1387 1388 } 1389 1390 /** 1391 * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1392 * 1393 * @adev: amdgpu_device pointer 1394 * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1395 * 1396 * Returns a pointer to the hardware IP block structure 1397 * if it exists for the asic, otherwise NULL. 1398 */ 1399 struct amdgpu_ip_block * 1400 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1401 enum amd_ip_block_type type) 1402 { 1403 int i; 1404 1405 for (i = 0; i < adev->num_ip_blocks; i++) 1406 if (adev->ip_blocks[i].version->type == type) 1407 return &adev->ip_blocks[i]; 1408 1409 return NULL; 1410 } 1411 1412 /** 1413 * amdgpu_device_ip_block_version_cmp 1414 * 1415 * @adev: amdgpu_device pointer 1416 * @type: enum amd_ip_block_type 1417 * @major: major version 1418 * @minor: minor version 1419 * 1420 * return 0 if equal or greater 1421 * return 1 if smaller or the ip_block doesn't exist 1422 */ 1423 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1424 enum amd_ip_block_type type, 1425 u32 major, u32 minor) 1426 { 1427 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1428 1429 if (ip_block && ((ip_block->version->major > major) || 1430 ((ip_block->version->major == major) && 1431 (ip_block->version->minor >= minor)))) 1432 return 0; 1433 1434 return 1; 1435 } 1436 1437 /** 1438 * amdgpu_device_ip_block_add 1439 * 1440 * @adev: amdgpu_device pointer 1441 * @ip_block_version: pointer to the IP to add 1442 * 1443 * Adds the IP block driver information to the collection of IPs 1444 * on the asic. 1445 */ 1446 int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1447 const struct amdgpu_ip_block_version *ip_block_version) 1448 { 1449 if (!ip_block_version) 1450 return -EINVAL; 1451 1452 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1453 ip_block_version->funcs->name); 1454 1455 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1456 1457 return 0; 1458 } 1459 1460 /** 1461 * amdgpu_device_enable_virtual_display - enable virtual display feature 1462 * 1463 * @adev: amdgpu_device pointer 1464 * 1465 * Enabled the virtual display feature if the user has enabled it via 1466 * the module parameter virtual_display. This feature provides a virtual 1467 * display hardware on headless boards or in virtualized environments. 1468 * This function parses and validates the configuration string specified by 1469 * the user and configues the virtual display configuration (number of 1470 * virtual connectors, crtcs, etc.) specified. 1471 */ 1472 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1473 { 1474 adev->enable_virtual_display = false; 1475 1476 if (amdgpu_virtual_display) { 1477 struct drm_device *ddev = adev->ddev; 1478 const char *pci_address_name = pci_name(ddev->pdev); 1479 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 1480 1481 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 1482 pciaddstr_tmp = pciaddstr; 1483 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 1484 pciaddname = strsep(&pciaddname_tmp, ","); 1485 if (!strcmp("all", pciaddname) 1486 || !strcmp(pci_address_name, pciaddname)) { 1487 long num_crtc; 1488 int res = -1; 1489 1490 adev->enable_virtual_display = true; 1491 1492 if (pciaddname_tmp) 1493 res = kstrtol(pciaddname_tmp, 10, 1494 &num_crtc); 1495 1496 if (!res) { 1497 if (num_crtc < 1) 1498 num_crtc = 1; 1499 if (num_crtc > 6) 1500 num_crtc = 6; 1501 adev->mode_info.num_crtc = num_crtc; 1502 } else { 1503 adev->mode_info.num_crtc = 1; 1504 } 1505 break; 1506 } 1507 } 1508 1509 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 1510 amdgpu_virtual_display, pci_address_name, 1511 adev->enable_virtual_display, adev->mode_info.num_crtc); 1512 1513 kfree(pciaddstr); 1514 } 1515 } 1516 1517 /** 1518 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 1519 * 1520 * @adev: amdgpu_device pointer 1521 * 1522 * Parses the asic configuration parameters specified in the gpu info 1523 * firmware and makes them availale to the driver for use in configuring 1524 * the asic. 1525 * Returns 0 on success, -EINVAL on failure. 1526 */ 1527 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1528 { 1529 const char *chip_name; 1530 char fw_name[30]; 1531 int err; 1532 const struct gpu_info_firmware_header_v1_0 *hdr; 1533 1534 adev->firmware.gpu_info_fw = NULL; 1535 1536 switch (adev->asic_type) { 1537 case CHIP_TOPAZ: 1538 case CHIP_TONGA: 1539 case CHIP_FIJI: 1540 case CHIP_POLARIS10: 1541 case CHIP_POLARIS11: 1542 case CHIP_POLARIS12: 1543 case CHIP_VEGAM: 1544 case CHIP_CARRIZO: 1545 case CHIP_STONEY: 1546 #ifdef CONFIG_DRM_AMDGPU_SI 1547 case CHIP_VERDE: 1548 case CHIP_TAHITI: 1549 case CHIP_PITCAIRN: 1550 case CHIP_OLAND: 1551 case CHIP_HAINAN: 1552 #endif 1553 #ifdef CONFIG_DRM_AMDGPU_CIK 1554 case CHIP_BONAIRE: 1555 case CHIP_HAWAII: 1556 case CHIP_KAVERI: 1557 case CHIP_KABINI: 1558 case CHIP_MULLINS: 1559 #endif 1560 case CHIP_VEGA20: 1561 default: 1562 return 0; 1563 case CHIP_VEGA10: 1564 chip_name = "vega10"; 1565 break; 1566 case CHIP_VEGA12: 1567 chip_name = "vega12"; 1568 break; 1569 case CHIP_RAVEN: 1570 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1571 chip_name = "raven2"; 1572 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 1573 chip_name = "picasso"; 1574 else 1575 chip_name = "raven"; 1576 break; 1577 case CHIP_ARCTURUS: 1578 chip_name = "arcturus"; 1579 break; 1580 case CHIP_RENOIR: 1581 chip_name = "renoir"; 1582 break; 1583 case CHIP_NAVI10: 1584 chip_name = "navi10"; 1585 break; 1586 case CHIP_NAVI14: 1587 chip_name = "navi14"; 1588 break; 1589 case CHIP_NAVI12: 1590 chip_name = "navi12"; 1591 break; 1592 } 1593 1594 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 1595 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); 1596 if (err) { 1597 dev_err(adev->dev, 1598 "Failed to load gpu_info firmware \"%s\"\n", 1599 fw_name); 1600 goto out; 1601 } 1602 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); 1603 if (err) { 1604 dev_err(adev->dev, 1605 "Failed to validate gpu_info firmware \"%s\"\n", 1606 fw_name); 1607 goto out; 1608 } 1609 1610 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 1611 amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 1612 1613 switch (hdr->version_major) { 1614 case 1: 1615 { 1616 const struct gpu_info_firmware_v1_0 *gpu_info_fw = 1617 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 1618 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1619 1620 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) { 1621 amdgpu_discovery_get_gfx_info(adev); 1622 goto parse_soc_bounding_box; 1623 } 1624 1625 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 1626 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 1627 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 1628 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 1629 adev->gfx.config.max_texture_channel_caches = 1630 le32_to_cpu(gpu_info_fw->gc_num_tccs); 1631 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 1632 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 1633 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 1634 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 1635 adev->gfx.config.double_offchip_lds_buf = 1636 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 1637 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 1638 adev->gfx.cu_info.max_waves_per_simd = 1639 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 1640 adev->gfx.cu_info.max_scratch_slots_per_cu = 1641 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 1642 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 1643 if (hdr->version_minor >= 1) { 1644 const struct gpu_info_firmware_v1_1 *gpu_info_fw = 1645 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data + 1646 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1647 adev->gfx.config.num_sc_per_sh = 1648 le32_to_cpu(gpu_info_fw->num_sc_per_sh); 1649 adev->gfx.config.num_packer_per_sc = 1650 le32_to_cpu(gpu_info_fw->num_packer_per_sc); 1651 } 1652 1653 parse_soc_bounding_box: 1654 /* 1655 * soc bounding box info is not integrated in disocovery table, 1656 * we always need to parse it from gpu info firmware. 1657 */ 1658 if (hdr->version_minor == 2) { 1659 const struct gpu_info_firmware_v1_2 *gpu_info_fw = 1660 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + 1661 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1662 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; 1663 } 1664 break; 1665 } 1666 default: 1667 dev_err(adev->dev, 1668 "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 1669 err = -EINVAL; 1670 goto out; 1671 } 1672 out: 1673 return err; 1674 } 1675 1676 /** 1677 * amdgpu_device_ip_early_init - run early init for hardware IPs 1678 * 1679 * @adev: amdgpu_device pointer 1680 * 1681 * Early initialization pass for hardware IPs. The hardware IPs that make 1682 * up each asic are discovered each IP's early_init callback is run. This 1683 * is the first stage in initializing the asic. 1684 * Returns 0 on success, negative error code on failure. 1685 */ 1686 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 1687 { 1688 int i, r; 1689 1690 amdgpu_device_enable_virtual_display(adev); 1691 1692 switch (adev->asic_type) { 1693 case CHIP_TOPAZ: 1694 case CHIP_TONGA: 1695 case CHIP_FIJI: 1696 case CHIP_POLARIS10: 1697 case CHIP_POLARIS11: 1698 case CHIP_POLARIS12: 1699 case CHIP_VEGAM: 1700 case CHIP_CARRIZO: 1701 case CHIP_STONEY: 1702 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) 1703 adev->family = AMDGPU_FAMILY_CZ; 1704 else 1705 adev->family = AMDGPU_FAMILY_VI; 1706 1707 r = vi_set_ip_blocks(adev); 1708 if (r) 1709 return r; 1710 break; 1711 #ifdef CONFIG_DRM_AMDGPU_SI 1712 case CHIP_VERDE: 1713 case CHIP_TAHITI: 1714 case CHIP_PITCAIRN: 1715 case CHIP_OLAND: 1716 case CHIP_HAINAN: 1717 adev->family = AMDGPU_FAMILY_SI; 1718 r = si_set_ip_blocks(adev); 1719 if (r) 1720 return r; 1721 break; 1722 #endif 1723 #ifdef CONFIG_DRM_AMDGPU_CIK 1724 case CHIP_BONAIRE: 1725 case CHIP_HAWAII: 1726 case CHIP_KAVERI: 1727 case CHIP_KABINI: 1728 case CHIP_MULLINS: 1729 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII)) 1730 adev->family = AMDGPU_FAMILY_CI; 1731 else 1732 adev->family = AMDGPU_FAMILY_KV; 1733 1734 r = cik_set_ip_blocks(adev); 1735 if (r) 1736 return r; 1737 break; 1738 #endif 1739 case CHIP_VEGA10: 1740 case CHIP_VEGA12: 1741 case CHIP_VEGA20: 1742 case CHIP_RAVEN: 1743 case CHIP_ARCTURUS: 1744 case CHIP_RENOIR: 1745 if (adev->asic_type == CHIP_RAVEN || 1746 adev->asic_type == CHIP_RENOIR) 1747 adev->family = AMDGPU_FAMILY_RV; 1748 else 1749 adev->family = AMDGPU_FAMILY_AI; 1750 1751 r = soc15_set_ip_blocks(adev); 1752 if (r) 1753 return r; 1754 break; 1755 case CHIP_NAVI10: 1756 case CHIP_NAVI14: 1757 case CHIP_NAVI12: 1758 adev->family = AMDGPU_FAMILY_NV; 1759 1760 r = nv_set_ip_blocks(adev); 1761 if (r) 1762 return r; 1763 break; 1764 default: 1765 /* FIXME: not supported yet */ 1766 return -EINVAL; 1767 } 1768 1769 amdgpu_amdkfd_device_probe(adev); 1770 1771 if (amdgpu_sriov_vf(adev)) { 1772 /* handle vbios stuff prior full access mode for new handshake */ 1773 if (adev->virt.req_init_data_ver == 1) { 1774 if (!amdgpu_get_bios(adev)) { 1775 DRM_ERROR("failed to get vbios\n"); 1776 return -EINVAL; 1777 } 1778 1779 r = amdgpu_atombios_init(adev); 1780 if (r) { 1781 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 1782 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 1783 return r; 1784 } 1785 } 1786 } 1787 1788 /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios 1789 * will not be prepared by host for this VF */ 1790 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) { 1791 r = amdgpu_virt_request_full_gpu(adev, true); 1792 if (r) 1793 return r; 1794 } 1795 1796 adev->pm.pp_feature = amdgpu_pp_feature_mask; 1797 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) 1798 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1799 1800 for (i = 0; i < adev->num_ip_blocks; i++) { 1801 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 1802 DRM_ERROR("disabled ip block: %d <%s>\n", 1803 i, adev->ip_blocks[i].version->funcs->name); 1804 adev->ip_blocks[i].status.valid = false; 1805 } else { 1806 if (adev->ip_blocks[i].version->funcs->early_init) { 1807 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 1808 if (r == -ENOENT) { 1809 adev->ip_blocks[i].status.valid = false; 1810 } else if (r) { 1811 DRM_ERROR("early_init of IP block <%s> failed %d\n", 1812 adev->ip_blocks[i].version->funcs->name, r); 1813 return r; 1814 } else { 1815 adev->ip_blocks[i].status.valid = true; 1816 } 1817 } else { 1818 adev->ip_blocks[i].status.valid = true; 1819 } 1820 } 1821 /* get the vbios after the asic_funcs are set up */ 1822 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { 1823 r = amdgpu_device_parse_gpu_info_fw(adev); 1824 if (r) 1825 return r; 1826 1827 /* skip vbios handling for new handshake */ 1828 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1) 1829 continue; 1830 1831 /* Read BIOS */ 1832 if (!amdgpu_get_bios(adev)) 1833 return -EINVAL; 1834 1835 r = amdgpu_atombios_init(adev); 1836 if (r) { 1837 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 1838 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 1839 return r; 1840 } 1841 } 1842 } 1843 1844 adev->cg_flags &= amdgpu_cg_mask; 1845 adev->pg_flags &= amdgpu_pg_mask; 1846 1847 return 0; 1848 } 1849 1850 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) 1851 { 1852 int i, r; 1853 1854 for (i = 0; i < adev->num_ip_blocks; i++) { 1855 if (!adev->ip_blocks[i].status.sw) 1856 continue; 1857 if (adev->ip_blocks[i].status.hw) 1858 continue; 1859 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 1860 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || 1861 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 1862 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1863 if (r) { 1864 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1865 adev->ip_blocks[i].version->funcs->name, r); 1866 return r; 1867 } 1868 adev->ip_blocks[i].status.hw = true; 1869 } 1870 } 1871 1872 return 0; 1873 } 1874 1875 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) 1876 { 1877 int i, r; 1878 1879 for (i = 0; i < adev->num_ip_blocks; i++) { 1880 if (!adev->ip_blocks[i].status.sw) 1881 continue; 1882 if (adev->ip_blocks[i].status.hw) 1883 continue; 1884 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1885 if (r) { 1886 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1887 adev->ip_blocks[i].version->funcs->name, r); 1888 return r; 1889 } 1890 adev->ip_blocks[i].status.hw = true; 1891 } 1892 1893 return 0; 1894 } 1895 1896 static int amdgpu_device_fw_loading(struct amdgpu_device *adev) 1897 { 1898 int r = 0; 1899 int i; 1900 uint32_t smu_version; 1901 1902 if (adev->asic_type >= CHIP_VEGA10) { 1903 for (i = 0; i < adev->num_ip_blocks; i++) { 1904 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) 1905 continue; 1906 1907 /* no need to do the fw loading again if already done*/ 1908 if (adev->ip_blocks[i].status.hw == true) 1909 break; 1910 1911 if (adev->in_gpu_reset || adev->in_suspend) { 1912 r = adev->ip_blocks[i].version->funcs->resume(adev); 1913 if (r) { 1914 DRM_ERROR("resume of IP block <%s> failed %d\n", 1915 adev->ip_blocks[i].version->funcs->name, r); 1916 return r; 1917 } 1918 } else { 1919 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1920 if (r) { 1921 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1922 adev->ip_blocks[i].version->funcs->name, r); 1923 return r; 1924 } 1925 } 1926 1927 adev->ip_blocks[i].status.hw = true; 1928 break; 1929 } 1930 } 1931 1932 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA) 1933 r = amdgpu_pm_load_smu_firmware(adev, &smu_version); 1934 1935 return r; 1936 } 1937 1938 /** 1939 * amdgpu_device_ip_init - run init for hardware IPs 1940 * 1941 * @adev: amdgpu_device pointer 1942 * 1943 * Main initialization pass for hardware IPs. The list of all the hardware 1944 * IPs that make up the asic is walked and the sw_init and hw_init callbacks 1945 * are run. sw_init initializes the software state associated with each IP 1946 * and hw_init initializes the hardware associated with each IP. 1947 * Returns 0 on success, negative error code on failure. 1948 */ 1949 static int amdgpu_device_ip_init(struct amdgpu_device *adev) 1950 { 1951 int i, r; 1952 1953 r = amdgpu_ras_init(adev); 1954 if (r) 1955 return r; 1956 1957 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) { 1958 r = amdgpu_virt_request_full_gpu(adev, true); 1959 if (r) 1960 return -EAGAIN; 1961 } 1962 1963 for (i = 0; i < adev->num_ip_blocks; i++) { 1964 if (!adev->ip_blocks[i].status.valid) 1965 continue; 1966 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 1967 if (r) { 1968 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 1969 adev->ip_blocks[i].version->funcs->name, r); 1970 goto init_failed; 1971 } 1972 adev->ip_blocks[i].status.sw = true; 1973 1974 /* need to do gmc hw init early so we can allocate gpu mem */ 1975 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1976 r = amdgpu_device_vram_scratch_init(adev); 1977 if (r) { 1978 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); 1979 goto init_failed; 1980 } 1981 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 1982 if (r) { 1983 DRM_ERROR("hw_init %d failed %d\n", i, r); 1984 goto init_failed; 1985 } 1986 r = amdgpu_device_wb_init(adev); 1987 if (r) { 1988 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 1989 goto init_failed; 1990 } 1991 adev->ip_blocks[i].status.hw = true; 1992 1993 /* right after GMC hw init, we create CSA */ 1994 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { 1995 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, 1996 AMDGPU_GEM_DOMAIN_VRAM, 1997 AMDGPU_CSA_SIZE); 1998 if (r) { 1999 DRM_ERROR("allocate CSA failed %d\n", r); 2000 goto init_failed; 2001 } 2002 } 2003 } 2004 } 2005 2006 if (amdgpu_sriov_vf(adev)) 2007 amdgpu_virt_init_data_exchange(adev); 2008 2009 r = amdgpu_ib_pool_init(adev); 2010 if (r) { 2011 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2012 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2013 goto init_failed; 2014 } 2015 2016 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 2017 if (r) 2018 goto init_failed; 2019 2020 r = amdgpu_device_ip_hw_init_phase1(adev); 2021 if (r) 2022 goto init_failed; 2023 2024 r = amdgpu_device_fw_loading(adev); 2025 if (r) 2026 goto init_failed; 2027 2028 r = amdgpu_device_ip_hw_init_phase2(adev); 2029 if (r) 2030 goto init_failed; 2031 2032 /* 2033 * retired pages will be loaded from eeprom and reserved here, 2034 * it should be called after amdgpu_device_ip_hw_init_phase2 since 2035 * for some ASICs the RAS EEPROM code relies on SMU fully functioning 2036 * for I2C communication which only true at this point. 2037 * recovery_init may fail, but it can free all resources allocated by 2038 * itself and its failure should not stop amdgpu init process. 2039 * 2040 * Note: theoretically, this should be called before all vram allocations 2041 * to protect retired page from abusing 2042 */ 2043 amdgpu_ras_recovery_init(adev); 2044 2045 if (adev->gmc.xgmi.num_physical_nodes > 1) 2046 amdgpu_xgmi_add_device(adev); 2047 amdgpu_amdkfd_device_init(adev); 2048 2049 amdgpu_fru_get_product_info(adev); 2050 2051 init_failed: 2052 if (amdgpu_sriov_vf(adev)) 2053 amdgpu_virt_release_full_gpu(adev, true); 2054 2055 return r; 2056 } 2057 2058 /** 2059 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 2060 * 2061 * @adev: amdgpu_device pointer 2062 * 2063 * Writes a reset magic value to the gart pointer in VRAM. The driver calls 2064 * this function before a GPU reset. If the value is retained after a 2065 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 2066 */ 2067 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 2068 { 2069 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 2070 } 2071 2072 /** 2073 * amdgpu_device_check_vram_lost - check if vram is valid 2074 * 2075 * @adev: amdgpu_device pointer 2076 * 2077 * Checks the reset magic value written to the gart pointer in VRAM. 2078 * The driver calls this after a GPU reset to see if the contents of 2079 * VRAM is lost or now. 2080 * returns true if vram is lost, false if not. 2081 */ 2082 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 2083 { 2084 if (memcmp(adev->gart.ptr, adev->reset_magic, 2085 AMDGPU_RESET_MAGIC_NUM)) 2086 return true; 2087 2088 if (!adev->in_gpu_reset) 2089 return false; 2090 2091 /* 2092 * For all ASICs with baco/mode1 reset, the VRAM is 2093 * always assumed to be lost. 2094 */ 2095 switch (amdgpu_asic_reset_method(adev)) { 2096 case AMD_RESET_METHOD_BACO: 2097 case AMD_RESET_METHOD_MODE1: 2098 return true; 2099 default: 2100 return false; 2101 } 2102 } 2103 2104 /** 2105 * amdgpu_device_set_cg_state - set clockgating for amdgpu device 2106 * 2107 * @adev: amdgpu_device pointer 2108 * @state: clockgating state (gate or ungate) 2109 * 2110 * The list of all the hardware IPs that make up the asic is walked and the 2111 * set_clockgating_state callbacks are run. 2112 * Late initialization pass enabling clockgating for hardware IPs. 2113 * Fini or suspend, pass disabling clockgating for hardware IPs. 2114 * Returns 0 on success, negative error code on failure. 2115 */ 2116 2117 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 2118 enum amd_clockgating_state state) 2119 { 2120 int i, j, r; 2121 2122 if (amdgpu_emu_mode == 1) 2123 return 0; 2124 2125 for (j = 0; j < adev->num_ip_blocks; j++) { 2126 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2127 if (!adev->ip_blocks[i].status.late_initialized) 2128 continue; 2129 /* skip CG for VCE/UVD, it's handled specially */ 2130 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2131 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2132 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2133 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2134 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 2135 /* enable clockgating to save power */ 2136 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 2137 state); 2138 if (r) { 2139 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 2140 adev->ip_blocks[i].version->funcs->name, r); 2141 return r; 2142 } 2143 } 2144 } 2145 2146 return 0; 2147 } 2148 2149 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state) 2150 { 2151 int i, j, r; 2152 2153 if (amdgpu_emu_mode == 1) 2154 return 0; 2155 2156 for (j = 0; j < adev->num_ip_blocks; j++) { 2157 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2158 if (!adev->ip_blocks[i].status.late_initialized) 2159 continue; 2160 /* skip CG for VCE/UVD, it's handled specially */ 2161 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2162 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2163 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2164 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2165 adev->ip_blocks[i].version->funcs->set_powergating_state) { 2166 /* enable powergating to save power */ 2167 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 2168 state); 2169 if (r) { 2170 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 2171 adev->ip_blocks[i].version->funcs->name, r); 2172 return r; 2173 } 2174 } 2175 } 2176 return 0; 2177 } 2178 2179 static int amdgpu_device_enable_mgpu_fan_boost(void) 2180 { 2181 struct amdgpu_gpu_instance *gpu_ins; 2182 struct amdgpu_device *adev; 2183 int i, ret = 0; 2184 2185 mutex_lock(&mgpu_info.mutex); 2186 2187 /* 2188 * MGPU fan boost feature should be enabled 2189 * only when there are two or more dGPUs in 2190 * the system 2191 */ 2192 if (mgpu_info.num_dgpu < 2) 2193 goto out; 2194 2195 for (i = 0; i < mgpu_info.num_dgpu; i++) { 2196 gpu_ins = &(mgpu_info.gpu_ins[i]); 2197 adev = gpu_ins->adev; 2198 if (!(adev->flags & AMD_IS_APU) && 2199 !gpu_ins->mgpu_fan_enabled && 2200 adev->powerplay.pp_funcs && 2201 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) { 2202 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); 2203 if (ret) 2204 break; 2205 2206 gpu_ins->mgpu_fan_enabled = 1; 2207 } 2208 } 2209 2210 out: 2211 mutex_unlock(&mgpu_info.mutex); 2212 2213 return ret; 2214 } 2215 2216 /** 2217 * amdgpu_device_ip_late_init - run late init for hardware IPs 2218 * 2219 * @adev: amdgpu_device pointer 2220 * 2221 * Late initialization pass for hardware IPs. The list of all the hardware 2222 * IPs that make up the asic is walked and the late_init callbacks are run. 2223 * late_init covers any special initialization that an IP requires 2224 * after all of the have been initialized or something that needs to happen 2225 * late in the init process. 2226 * Returns 0 on success, negative error code on failure. 2227 */ 2228 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 2229 { 2230 struct amdgpu_gpu_instance *gpu_instance; 2231 int i = 0, r; 2232 2233 for (i = 0; i < adev->num_ip_blocks; i++) { 2234 if (!adev->ip_blocks[i].status.hw) 2235 continue; 2236 if (adev->ip_blocks[i].version->funcs->late_init) { 2237 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 2238 if (r) { 2239 DRM_ERROR("late_init of IP block <%s> failed %d\n", 2240 adev->ip_blocks[i].version->funcs->name, r); 2241 return r; 2242 } 2243 } 2244 adev->ip_blocks[i].status.late_initialized = true; 2245 } 2246 2247 amdgpu_ras_set_error_query_ready(adev, true); 2248 2249 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 2250 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 2251 2252 amdgpu_device_fill_reset_magic(adev); 2253 2254 r = amdgpu_device_enable_mgpu_fan_boost(); 2255 if (r) 2256 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 2257 2258 2259 if (adev->gmc.xgmi.num_physical_nodes > 1) { 2260 mutex_lock(&mgpu_info.mutex); 2261 2262 /* 2263 * Reset device p-state to low as this was booted with high. 2264 * 2265 * This should be performed only after all devices from the same 2266 * hive get initialized. 2267 * 2268 * However, it's unknown how many device in the hive in advance. 2269 * As this is counted one by one during devices initializations. 2270 * 2271 * So, we wait for all XGMI interlinked devices initialized. 2272 * This may bring some delays as those devices may come from 2273 * different hives. But that should be OK. 2274 */ 2275 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { 2276 for (i = 0; i < mgpu_info.num_gpu; i++) { 2277 gpu_instance = &(mgpu_info.gpu_ins[i]); 2278 if (gpu_instance->adev->flags & AMD_IS_APU) 2279 continue; 2280 2281 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 2282 AMDGPU_XGMI_PSTATE_MIN); 2283 if (r) { 2284 DRM_ERROR("pstate setting failed (%d).\n", r); 2285 break; 2286 } 2287 } 2288 } 2289 2290 mutex_unlock(&mgpu_info.mutex); 2291 } 2292 2293 return 0; 2294 } 2295 2296 /** 2297 * amdgpu_device_ip_fini - run fini for hardware IPs 2298 * 2299 * @adev: amdgpu_device pointer 2300 * 2301 * Main teardown pass for hardware IPs. The list of all the hardware 2302 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 2303 * are run. hw_fini tears down the hardware associated with each IP 2304 * and sw_fini tears down any software state associated with each IP. 2305 * Returns 0 on success, negative error code on failure. 2306 */ 2307 static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 2308 { 2309 int i, r; 2310 2311 amdgpu_ras_pre_fini(adev); 2312 2313 if (adev->gmc.xgmi.num_physical_nodes > 1) 2314 amdgpu_xgmi_remove_device(adev); 2315 2316 amdgpu_amdkfd_device_fini(adev); 2317 2318 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2319 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2320 2321 /* need to disable SMC first */ 2322 for (i = 0; i < adev->num_ip_blocks; i++) { 2323 if (!adev->ip_blocks[i].status.hw) 2324 continue; 2325 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 2326 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2327 /* XXX handle errors */ 2328 if (r) { 2329 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2330 adev->ip_blocks[i].version->funcs->name, r); 2331 } 2332 adev->ip_blocks[i].status.hw = false; 2333 break; 2334 } 2335 } 2336 2337 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2338 if (!adev->ip_blocks[i].status.hw) 2339 continue; 2340 2341 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2342 /* XXX handle errors */ 2343 if (r) { 2344 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2345 adev->ip_blocks[i].version->funcs->name, r); 2346 } 2347 2348 adev->ip_blocks[i].status.hw = false; 2349 } 2350 2351 2352 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2353 if (!adev->ip_blocks[i].status.sw) 2354 continue; 2355 2356 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2357 amdgpu_ucode_free_bo(adev); 2358 amdgpu_free_static_csa(&adev->virt.csa_obj); 2359 amdgpu_device_wb_fini(adev); 2360 amdgpu_device_vram_scratch_fini(adev); 2361 amdgpu_ib_pool_fini(adev); 2362 } 2363 2364 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 2365 /* XXX handle errors */ 2366 if (r) { 2367 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 2368 adev->ip_blocks[i].version->funcs->name, r); 2369 } 2370 adev->ip_blocks[i].status.sw = false; 2371 adev->ip_blocks[i].status.valid = false; 2372 } 2373 2374 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2375 if (!adev->ip_blocks[i].status.late_initialized) 2376 continue; 2377 if (adev->ip_blocks[i].version->funcs->late_fini) 2378 adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 2379 adev->ip_blocks[i].status.late_initialized = false; 2380 } 2381 2382 amdgpu_ras_fini(adev); 2383 2384 if (amdgpu_sriov_vf(adev)) 2385 if (amdgpu_virt_release_full_gpu(adev, false)) 2386 DRM_ERROR("failed to release exclusive mode on fini\n"); 2387 2388 return 0; 2389 } 2390 2391 /** 2392 * amdgpu_device_delayed_init_work_handler - work handler for IB tests 2393 * 2394 * @work: work_struct. 2395 */ 2396 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) 2397 { 2398 struct amdgpu_device *adev = 2399 container_of(work, struct amdgpu_device, delayed_init_work.work); 2400 int r; 2401 2402 r = amdgpu_ib_ring_tests(adev); 2403 if (r) 2404 DRM_ERROR("ib ring test failed (%d).\n", r); 2405 } 2406 2407 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 2408 { 2409 struct amdgpu_device *adev = 2410 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 2411 2412 mutex_lock(&adev->gfx.gfx_off_mutex); 2413 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { 2414 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 2415 adev->gfx.gfx_off_state = true; 2416 } 2417 mutex_unlock(&adev->gfx.gfx_off_mutex); 2418 } 2419 2420 /** 2421 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 2422 * 2423 * @adev: amdgpu_device pointer 2424 * 2425 * Main suspend function for hardware IPs. The list of all the hardware 2426 * IPs that make up the asic is walked, clockgating is disabled and the 2427 * suspend callbacks are run. suspend puts the hardware and software state 2428 * in each IP into a state suitable for suspend. 2429 * Returns 0 on success, negative error code on failure. 2430 */ 2431 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 2432 { 2433 int i, r; 2434 2435 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2436 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2437 2438 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2439 if (!adev->ip_blocks[i].status.valid) 2440 continue; 2441 /* displays are handled separately */ 2442 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { 2443 /* XXX handle errors */ 2444 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2445 /* XXX handle errors */ 2446 if (r) { 2447 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2448 adev->ip_blocks[i].version->funcs->name, r); 2449 return r; 2450 } 2451 adev->ip_blocks[i].status.hw = false; 2452 } 2453 } 2454 2455 return 0; 2456 } 2457 2458 /** 2459 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 2460 * 2461 * @adev: amdgpu_device pointer 2462 * 2463 * Main suspend function for hardware IPs. The list of all the hardware 2464 * IPs that make up the asic is walked, clockgating is disabled and the 2465 * suspend callbacks are run. suspend puts the hardware and software state 2466 * in each IP into a state suitable for suspend. 2467 * Returns 0 on success, negative error code on failure. 2468 */ 2469 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 2470 { 2471 int i, r; 2472 2473 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2474 if (!adev->ip_blocks[i].status.valid) 2475 continue; 2476 /* displays are handled in phase1 */ 2477 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 2478 continue; 2479 /* PSP lost connection when err_event_athub occurs */ 2480 if (amdgpu_ras_intr_triggered() && 2481 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 2482 adev->ip_blocks[i].status.hw = false; 2483 continue; 2484 } 2485 /* XXX handle errors */ 2486 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2487 /* XXX handle errors */ 2488 if (r) { 2489 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2490 adev->ip_blocks[i].version->funcs->name, r); 2491 } 2492 adev->ip_blocks[i].status.hw = false; 2493 /* handle putting the SMC in the appropriate state */ 2494 if(!amdgpu_sriov_vf(adev)){ 2495 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 2496 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); 2497 if (r) { 2498 DRM_ERROR("SMC failed to set mp1 state %d, %d\n", 2499 adev->mp1_state, r); 2500 return r; 2501 } 2502 } 2503 } 2504 adev->ip_blocks[i].status.hw = false; 2505 } 2506 2507 return 0; 2508 } 2509 2510 /** 2511 * amdgpu_device_ip_suspend - run suspend for hardware IPs 2512 * 2513 * @adev: amdgpu_device pointer 2514 * 2515 * Main suspend function for hardware IPs. The list of all the hardware 2516 * IPs that make up the asic is walked, clockgating is disabled and the 2517 * suspend callbacks are run. suspend puts the hardware and software state 2518 * in each IP into a state suitable for suspend. 2519 * Returns 0 on success, negative error code on failure. 2520 */ 2521 int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 2522 { 2523 int r; 2524 2525 if (amdgpu_sriov_vf(adev)) 2526 amdgpu_virt_request_full_gpu(adev, false); 2527 2528 r = amdgpu_device_ip_suspend_phase1(adev); 2529 if (r) 2530 return r; 2531 r = amdgpu_device_ip_suspend_phase2(adev); 2532 2533 if (amdgpu_sriov_vf(adev)) 2534 amdgpu_virt_release_full_gpu(adev, false); 2535 2536 return r; 2537 } 2538 2539 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 2540 { 2541 int i, r; 2542 2543 static enum amd_ip_block_type ip_order[] = { 2544 AMD_IP_BLOCK_TYPE_GMC, 2545 AMD_IP_BLOCK_TYPE_COMMON, 2546 AMD_IP_BLOCK_TYPE_PSP, 2547 AMD_IP_BLOCK_TYPE_IH, 2548 }; 2549 2550 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2551 int j; 2552 struct amdgpu_ip_block *block; 2553 2554 for (j = 0; j < adev->num_ip_blocks; j++) { 2555 block = &adev->ip_blocks[j]; 2556 2557 block->status.hw = false; 2558 if (block->version->type != ip_order[i] || 2559 !block->status.valid) 2560 continue; 2561 2562 r = block->version->funcs->hw_init(adev); 2563 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2564 if (r) 2565 return r; 2566 block->status.hw = true; 2567 } 2568 } 2569 2570 return 0; 2571 } 2572 2573 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 2574 { 2575 int i, r; 2576 2577 static enum amd_ip_block_type ip_order[] = { 2578 AMD_IP_BLOCK_TYPE_SMC, 2579 AMD_IP_BLOCK_TYPE_DCE, 2580 AMD_IP_BLOCK_TYPE_GFX, 2581 AMD_IP_BLOCK_TYPE_SDMA, 2582 AMD_IP_BLOCK_TYPE_UVD, 2583 AMD_IP_BLOCK_TYPE_VCE, 2584 AMD_IP_BLOCK_TYPE_VCN 2585 }; 2586 2587 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2588 int j; 2589 struct amdgpu_ip_block *block; 2590 2591 for (j = 0; j < adev->num_ip_blocks; j++) { 2592 block = &adev->ip_blocks[j]; 2593 2594 if (block->version->type != ip_order[i] || 2595 !block->status.valid || 2596 block->status.hw) 2597 continue; 2598 2599 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) 2600 r = block->version->funcs->resume(adev); 2601 else 2602 r = block->version->funcs->hw_init(adev); 2603 2604 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2605 if (r) 2606 return r; 2607 block->status.hw = true; 2608 } 2609 } 2610 2611 return 0; 2612 } 2613 2614 /** 2615 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 2616 * 2617 * @adev: amdgpu_device pointer 2618 * 2619 * First resume function for hardware IPs. The list of all the hardware 2620 * IPs that make up the asic is walked and the resume callbacks are run for 2621 * COMMON, GMC, and IH. resume puts the hardware into a functional state 2622 * after a suspend and updates the software state as necessary. This 2623 * function is also used for restoring the GPU after a GPU reset. 2624 * Returns 0 on success, negative error code on failure. 2625 */ 2626 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 2627 { 2628 int i, r; 2629 2630 for (i = 0; i < adev->num_ip_blocks; i++) { 2631 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 2632 continue; 2633 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2634 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2635 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2636 2637 r = adev->ip_blocks[i].version->funcs->resume(adev); 2638 if (r) { 2639 DRM_ERROR("resume of IP block <%s> failed %d\n", 2640 adev->ip_blocks[i].version->funcs->name, r); 2641 return r; 2642 } 2643 adev->ip_blocks[i].status.hw = true; 2644 } 2645 } 2646 2647 return 0; 2648 } 2649 2650 /** 2651 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 2652 * 2653 * @adev: amdgpu_device pointer 2654 * 2655 * First resume function for hardware IPs. The list of all the hardware 2656 * IPs that make up the asic is walked and the resume callbacks are run for 2657 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 2658 * functional state after a suspend and updates the software state as 2659 * necessary. This function is also used for restoring the GPU after a GPU 2660 * reset. 2661 * Returns 0 on success, negative error code on failure. 2662 */ 2663 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 2664 { 2665 int i, r; 2666 2667 for (i = 0; i < adev->num_ip_blocks; i++) { 2668 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 2669 continue; 2670 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2671 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2672 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 2673 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 2674 continue; 2675 r = adev->ip_blocks[i].version->funcs->resume(adev); 2676 if (r) { 2677 DRM_ERROR("resume of IP block <%s> failed %d\n", 2678 adev->ip_blocks[i].version->funcs->name, r); 2679 return r; 2680 } 2681 adev->ip_blocks[i].status.hw = true; 2682 } 2683 2684 return 0; 2685 } 2686 2687 /** 2688 * amdgpu_device_ip_resume - run resume for hardware IPs 2689 * 2690 * @adev: amdgpu_device pointer 2691 * 2692 * Main resume function for hardware IPs. The hardware IPs 2693 * are split into two resume functions because they are 2694 * are also used in in recovering from a GPU reset and some additional 2695 * steps need to be take between them. In this case (S3/S4) they are 2696 * run sequentially. 2697 * Returns 0 on success, negative error code on failure. 2698 */ 2699 static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 2700 { 2701 int r; 2702 2703 r = amdgpu_device_ip_resume_phase1(adev); 2704 if (r) 2705 return r; 2706 2707 r = amdgpu_device_fw_loading(adev); 2708 if (r) 2709 return r; 2710 2711 r = amdgpu_device_ip_resume_phase2(adev); 2712 2713 return r; 2714 } 2715 2716 /** 2717 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 2718 * 2719 * @adev: amdgpu_device pointer 2720 * 2721 * Query the VBIOS data tables to determine if the board supports SR-IOV. 2722 */ 2723 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 2724 { 2725 if (amdgpu_sriov_vf(adev)) { 2726 if (adev->is_atom_fw) { 2727 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) 2728 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2729 } else { 2730 if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 2731 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2732 } 2733 2734 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 2735 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 2736 } 2737 } 2738 2739 /** 2740 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 2741 * 2742 * @asic_type: AMD asic type 2743 * 2744 * Check if there is DC (new modesetting infrastructre) support for an asic. 2745 * returns true if DC has support, false if not. 2746 */ 2747 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 2748 { 2749 switch (asic_type) { 2750 #if defined(CONFIG_DRM_AMD_DC) 2751 case CHIP_BONAIRE: 2752 case CHIP_KAVERI: 2753 case CHIP_KABINI: 2754 case CHIP_MULLINS: 2755 /* 2756 * We have systems in the wild with these ASICs that require 2757 * LVDS and VGA support which is not supported with DC. 2758 * 2759 * Fallback to the non-DC driver here by default so as not to 2760 * cause regressions. 2761 */ 2762 return amdgpu_dc > 0; 2763 case CHIP_HAWAII: 2764 case CHIP_CARRIZO: 2765 case CHIP_STONEY: 2766 case CHIP_POLARIS10: 2767 case CHIP_POLARIS11: 2768 case CHIP_POLARIS12: 2769 case CHIP_VEGAM: 2770 case CHIP_TONGA: 2771 case CHIP_FIJI: 2772 case CHIP_VEGA10: 2773 case CHIP_VEGA12: 2774 case CHIP_VEGA20: 2775 #if defined(CONFIG_DRM_AMD_DC_DCN) 2776 case CHIP_RAVEN: 2777 case CHIP_NAVI10: 2778 case CHIP_NAVI14: 2779 case CHIP_NAVI12: 2780 case CHIP_RENOIR: 2781 #endif 2782 return amdgpu_dc != 0; 2783 #endif 2784 default: 2785 if (amdgpu_dc > 0) 2786 DRM_INFO("Display Core has been requested via kernel parameter " 2787 "but isn't supported by ASIC, ignoring\n"); 2788 return false; 2789 } 2790 } 2791 2792 /** 2793 * amdgpu_device_has_dc_support - check if dc is supported 2794 * 2795 * @adev: amdgpu_device_pointer 2796 * 2797 * Returns true for supported, false for not supported 2798 */ 2799 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 2800 { 2801 if (amdgpu_sriov_vf(adev)) 2802 return false; 2803 2804 return amdgpu_device_asic_has_dc_support(adev->asic_type); 2805 } 2806 2807 2808 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) 2809 { 2810 struct amdgpu_device *adev = 2811 container_of(__work, struct amdgpu_device, xgmi_reset_work); 2812 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); 2813 2814 /* It's a bug to not have a hive within this function */ 2815 if (WARN_ON(!hive)) 2816 return; 2817 2818 /* 2819 * Use task barrier to synchronize all xgmi reset works across the 2820 * hive. task_barrier_enter and task_barrier_exit will block 2821 * until all the threads running the xgmi reset works reach 2822 * those points. task_barrier_full will do both blocks. 2823 */ 2824 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { 2825 2826 task_barrier_enter(&hive->tb); 2827 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev); 2828 2829 if (adev->asic_reset_res) 2830 goto fail; 2831 2832 task_barrier_exit(&hive->tb); 2833 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev); 2834 2835 if (adev->asic_reset_res) 2836 goto fail; 2837 2838 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count) 2839 adev->mmhub.funcs->reset_ras_error_count(adev); 2840 } else { 2841 2842 task_barrier_full(&hive->tb); 2843 adev->asic_reset_res = amdgpu_asic_reset(adev); 2844 } 2845 2846 fail: 2847 if (adev->asic_reset_res) 2848 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", 2849 adev->asic_reset_res, adev->ddev->unique); 2850 } 2851 2852 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) 2853 { 2854 char *input = amdgpu_lockup_timeout; 2855 char *timeout_setting = NULL; 2856 int index = 0; 2857 long timeout; 2858 int ret = 0; 2859 2860 /* 2861 * By default timeout for non compute jobs is 10000. 2862 * And there is no timeout enforced on compute jobs. 2863 * In SR-IOV or passthrough mode, timeout for compute 2864 * jobs are 60000 by default. 2865 */ 2866 adev->gfx_timeout = msecs_to_jiffies(10000); 2867 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 2868 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) 2869 adev->compute_timeout = msecs_to_jiffies(60000); 2870 else 2871 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; 2872 2873 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 2874 while ((timeout_setting = strsep(&input, ",")) && 2875 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 2876 ret = kstrtol(timeout_setting, 0, &timeout); 2877 if (ret) 2878 return ret; 2879 2880 if (timeout == 0) { 2881 index++; 2882 continue; 2883 } else if (timeout < 0) { 2884 timeout = MAX_SCHEDULE_TIMEOUT; 2885 } else { 2886 timeout = msecs_to_jiffies(timeout); 2887 } 2888 2889 switch (index++) { 2890 case 0: 2891 adev->gfx_timeout = timeout; 2892 break; 2893 case 1: 2894 adev->compute_timeout = timeout; 2895 break; 2896 case 2: 2897 adev->sdma_timeout = timeout; 2898 break; 2899 case 3: 2900 adev->video_timeout = timeout; 2901 break; 2902 default: 2903 break; 2904 } 2905 } 2906 /* 2907 * There is only one value specified and 2908 * it should apply to all non-compute jobs. 2909 */ 2910 if (index == 1) { 2911 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 2912 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) 2913 adev->compute_timeout = adev->gfx_timeout; 2914 } 2915 } 2916 2917 return ret; 2918 } 2919 2920 static const struct attribute *amdgpu_dev_attributes[] = { 2921 &dev_attr_product_name.attr, 2922 &dev_attr_product_number.attr, 2923 &dev_attr_serial_number.attr, 2924 &dev_attr_pcie_replay_count.attr, 2925 NULL 2926 }; 2927 2928 /** 2929 * amdgpu_device_init - initialize the driver 2930 * 2931 * @adev: amdgpu_device pointer 2932 * @ddev: drm dev pointer 2933 * @pdev: pci dev pointer 2934 * @flags: driver flags 2935 * 2936 * Initializes the driver info and hw (all asics). 2937 * Returns 0 for success or an error on failure. 2938 * Called at driver startup. 2939 */ 2940 int amdgpu_device_init(struct amdgpu_device *adev, 2941 struct drm_device *ddev, 2942 struct pci_dev *pdev, 2943 uint32_t flags) 2944 { 2945 int r, i; 2946 bool boco = false; 2947 u32 max_MBps; 2948 2949 adev->shutdown = false; 2950 adev->dev = &pdev->dev; 2951 adev->ddev = ddev; 2952 adev->pdev = pdev; 2953 adev->flags = flags; 2954 2955 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) 2956 adev->asic_type = amdgpu_force_asic_type; 2957 else 2958 adev->asic_type = flags & AMD_ASIC_MASK; 2959 2960 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 2961 if (amdgpu_emu_mode == 1) 2962 adev->usec_timeout *= 10; 2963 adev->gmc.gart_size = 512 * 1024 * 1024; 2964 adev->accel_working = false; 2965 adev->num_rings = 0; 2966 adev->mman.buffer_funcs = NULL; 2967 adev->mman.buffer_funcs_ring = NULL; 2968 adev->vm_manager.vm_pte_funcs = NULL; 2969 adev->vm_manager.vm_pte_num_scheds = 0; 2970 adev->gmc.gmc_funcs = NULL; 2971 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2972 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 2973 2974 adev->smc_rreg = &amdgpu_invalid_rreg; 2975 adev->smc_wreg = &amdgpu_invalid_wreg; 2976 adev->pcie_rreg = &amdgpu_invalid_rreg; 2977 adev->pcie_wreg = &amdgpu_invalid_wreg; 2978 adev->pciep_rreg = &amdgpu_invalid_rreg; 2979 adev->pciep_wreg = &amdgpu_invalid_wreg; 2980 adev->pcie_rreg64 = &amdgpu_invalid_rreg64; 2981 adev->pcie_wreg64 = &amdgpu_invalid_wreg64; 2982 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 2983 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 2984 adev->didt_rreg = &amdgpu_invalid_rreg; 2985 adev->didt_wreg = &amdgpu_invalid_wreg; 2986 adev->gc_cac_rreg = &amdgpu_invalid_rreg; 2987 adev->gc_cac_wreg = &amdgpu_invalid_wreg; 2988 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 2989 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 2990 2991 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 2992 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 2993 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 2994 2995 /* mutex initialization are all done here so we 2996 * can recall function without having locking issues */ 2997 atomic_set(&adev->irq.ih.lock, 0); 2998 mutex_init(&adev->firmware.mutex); 2999 mutex_init(&adev->pm.mutex); 3000 mutex_init(&adev->gfx.gpu_clock_mutex); 3001 mutex_init(&adev->srbm_mutex); 3002 mutex_init(&adev->gfx.pipe_reserve_mutex); 3003 mutex_init(&adev->gfx.gfx_off_mutex); 3004 mutex_init(&adev->grbm_idx_mutex); 3005 mutex_init(&adev->mn_lock); 3006 mutex_init(&adev->virt.vf_errors.lock); 3007 hash_init(adev->mn_hash); 3008 mutex_init(&adev->lock_reset); 3009 mutex_init(&adev->psp.mutex); 3010 mutex_init(&adev->notifier_lock); 3011 3012 r = amdgpu_device_check_arguments(adev); 3013 if (r) 3014 return r; 3015 3016 spin_lock_init(&adev->mmio_idx_lock); 3017 spin_lock_init(&adev->smc_idx_lock); 3018 spin_lock_init(&adev->pcie_idx_lock); 3019 spin_lock_init(&adev->uvd_ctx_idx_lock); 3020 spin_lock_init(&adev->didt_idx_lock); 3021 spin_lock_init(&adev->gc_cac_idx_lock); 3022 spin_lock_init(&adev->se_cac_idx_lock); 3023 spin_lock_init(&adev->audio_endpt_idx_lock); 3024 spin_lock_init(&adev->mm_stats.lock); 3025 3026 INIT_LIST_HEAD(&adev->shadow_list); 3027 mutex_init(&adev->shadow_list_lock); 3028 3029 INIT_DELAYED_WORK(&adev->delayed_init_work, 3030 amdgpu_device_delayed_init_work_handler); 3031 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 3032 amdgpu_device_delay_enable_gfx_off); 3033 3034 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); 3035 3036 adev->gfx.gfx_off_req_count = 1; 3037 adev->pm.ac_power = power_supply_is_system_supplied() > 0; 3038 3039 /* Registers mapping */ 3040 /* TODO: block userspace mapping of io register */ 3041 if (adev->asic_type >= CHIP_BONAIRE) { 3042 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 3043 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 3044 } else { 3045 adev->rmmio_base = pci_resource_start(adev->pdev, 2); 3046 adev->rmmio_size = pci_resource_len(adev->pdev, 2); 3047 } 3048 3049 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 3050 if (adev->rmmio == NULL) { 3051 return -ENOMEM; 3052 } 3053 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 3054 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); 3055 3056 /* io port mapping */ 3057 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 3058 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) { 3059 adev->rio_mem_size = pci_resource_len(adev->pdev, i); 3060 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size); 3061 break; 3062 } 3063 } 3064 if (adev->rio_mem == NULL) 3065 DRM_INFO("PCI I/O BAR is not found.\n"); 3066 3067 /* enable PCIE atomic ops */ 3068 r = pci_enable_atomic_ops_to_root(adev->pdev, 3069 PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 3070 PCI_EXP_DEVCAP2_ATOMIC_COMP64); 3071 if (r) { 3072 adev->have_atomics_support = false; 3073 DRM_INFO("PCIE atomic ops is not supported\n"); 3074 } else { 3075 adev->have_atomics_support = true; 3076 } 3077 3078 amdgpu_device_get_pcie_info(adev); 3079 3080 if (amdgpu_mcbp) 3081 DRM_INFO("MCBP is enabled\n"); 3082 3083 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10) 3084 adev->enable_mes = true; 3085 3086 /* detect hw virtualization here */ 3087 amdgpu_detect_virtualization(adev); 3088 3089 r = amdgpu_device_get_job_timeout_settings(adev); 3090 if (r) { 3091 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); 3092 return r; 3093 } 3094 3095 /* early init functions */ 3096 r = amdgpu_device_ip_early_init(adev); 3097 if (r) 3098 return r; 3099 3100 /* doorbell bar mapping and doorbell index init*/ 3101 amdgpu_device_doorbell_init(adev); 3102 3103 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 3104 /* this will fail for cards that aren't VGA class devices, just 3105 * ignore it */ 3106 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); 3107 3108 if (amdgpu_device_supports_boco(ddev)) 3109 boco = true; 3110 if (amdgpu_has_atpx() && 3111 (amdgpu_is_atpx_hybrid() || 3112 amdgpu_has_atpx_dgpu_power_cntl()) && 3113 !pci_is_thunderbolt_attached(adev->pdev)) 3114 vga_switcheroo_register_client(adev->pdev, 3115 &amdgpu_switcheroo_ops, boco); 3116 if (boco) 3117 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 3118 3119 if (amdgpu_emu_mode == 1) { 3120 /* post the asic on emulation mode */ 3121 emu_soc_asic_init(adev); 3122 goto fence_driver_init; 3123 } 3124 3125 /* detect if we are with an SRIOV vbios */ 3126 amdgpu_device_detect_sriov_bios(adev); 3127 3128 /* check if we need to reset the asic 3129 * E.g., driver was not cleanly unloaded previously, etc. 3130 */ 3131 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { 3132 r = amdgpu_asic_reset(adev); 3133 if (r) { 3134 dev_err(adev->dev, "asic reset on init failed\n"); 3135 goto failed; 3136 } 3137 } 3138 3139 /* Post card if necessary */ 3140 if (amdgpu_device_need_post(adev)) { 3141 if (!adev->bios) { 3142 dev_err(adev->dev, "no vBIOS found\n"); 3143 r = -EINVAL; 3144 goto failed; 3145 } 3146 DRM_INFO("GPU posting now...\n"); 3147 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 3148 if (r) { 3149 dev_err(adev->dev, "gpu post error!\n"); 3150 goto failed; 3151 } 3152 } 3153 3154 if (adev->is_atom_fw) { 3155 /* Initialize clocks */ 3156 r = amdgpu_atomfirmware_get_clock_info(adev); 3157 if (r) { 3158 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 3159 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3160 goto failed; 3161 } 3162 } else { 3163 /* Initialize clocks */ 3164 r = amdgpu_atombios_get_clock_info(adev); 3165 if (r) { 3166 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 3167 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3168 goto failed; 3169 } 3170 /* init i2c buses */ 3171 if (!amdgpu_device_has_dc_support(adev)) 3172 amdgpu_atombios_i2c_init(adev); 3173 } 3174 3175 fence_driver_init: 3176 /* Fence driver */ 3177 r = amdgpu_fence_driver_init(adev); 3178 if (r) { 3179 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); 3180 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 3181 goto failed; 3182 } 3183 3184 /* init the mode config */ 3185 drm_mode_config_init(adev->ddev); 3186 3187 r = amdgpu_device_ip_init(adev); 3188 if (r) { 3189 /* failed in exclusive mode due to timeout */ 3190 if (amdgpu_sriov_vf(adev) && 3191 !amdgpu_sriov_runtime(adev) && 3192 amdgpu_virt_mmio_blocked(adev) && 3193 !amdgpu_virt_wait_reset(adev)) { 3194 dev_err(adev->dev, "VF exclusive mode timeout\n"); 3195 /* Don't send request since VF is inactive. */ 3196 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 3197 adev->virt.ops = NULL; 3198 r = -EAGAIN; 3199 goto failed; 3200 } 3201 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 3202 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 3203 goto failed; 3204 } 3205 3206 dev_info(adev->dev, 3207 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", 3208 adev->gfx.config.max_shader_engines, 3209 adev->gfx.config.max_sh_per_se, 3210 adev->gfx.config.max_cu_per_sh, 3211 adev->gfx.cu_info.number); 3212 3213 adev->accel_working = true; 3214 3215 amdgpu_vm_check_compute_bug(adev); 3216 3217 /* Initialize the buffer migration limit. */ 3218 if (amdgpu_moverate >= 0) 3219 max_MBps = amdgpu_moverate; 3220 else 3221 max_MBps = 8; /* Allow 8 MB/s. */ 3222 /* Get a log2 for easy divisions. */ 3223 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 3224 3225 amdgpu_fbdev_init(adev); 3226 3227 r = amdgpu_pm_sysfs_init(adev); 3228 if (r) { 3229 adev->pm_sysfs_en = false; 3230 DRM_ERROR("registering pm debugfs failed (%d).\n", r); 3231 } else 3232 adev->pm_sysfs_en = true; 3233 3234 r = amdgpu_ucode_sysfs_init(adev); 3235 if (r) { 3236 adev->ucode_sysfs_en = false; 3237 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); 3238 } else 3239 adev->ucode_sysfs_en = true; 3240 3241 if ((amdgpu_testing & 1)) { 3242 if (adev->accel_working) 3243 amdgpu_test_moves(adev); 3244 else 3245 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); 3246 } 3247 if (amdgpu_benchmarking) { 3248 if (adev->accel_working) 3249 amdgpu_benchmark(adev, amdgpu_benchmarking); 3250 else 3251 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); 3252 } 3253 3254 /* 3255 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. 3256 * Otherwise the mgpu fan boost feature will be skipped due to the 3257 * gpu instance is counted less. 3258 */ 3259 amdgpu_register_gpu_instance(adev); 3260 3261 /* enable clockgating, etc. after ib tests, etc. since some blocks require 3262 * explicit gating rather than handling it automatically. 3263 */ 3264 r = amdgpu_device_ip_late_init(adev); 3265 if (r) { 3266 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 3267 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 3268 goto failed; 3269 } 3270 3271 /* must succeed. */ 3272 amdgpu_ras_resume(adev); 3273 3274 queue_delayed_work(system_wq, &adev->delayed_init_work, 3275 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3276 3277 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); 3278 if (r) { 3279 dev_err(adev->dev, "Could not create amdgpu device attr\n"); 3280 return r; 3281 } 3282 3283 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 3284 r = amdgpu_pmu_init(adev); 3285 if (r) 3286 dev_err(adev->dev, "amdgpu_pmu_init failed\n"); 3287 3288 return 0; 3289 3290 failed: 3291 amdgpu_vf_error_trans_all(adev); 3292 if (boco) 3293 vga_switcheroo_fini_domain_pm_ops(adev->dev); 3294 3295 return r; 3296 } 3297 3298 /** 3299 * amdgpu_device_fini - tear down the driver 3300 * 3301 * @adev: amdgpu_device pointer 3302 * 3303 * Tear down the driver info (all asics). 3304 * Called at driver shutdown. 3305 */ 3306 void amdgpu_device_fini(struct amdgpu_device *adev) 3307 { 3308 int r; 3309 3310 DRM_INFO("amdgpu: finishing device.\n"); 3311 flush_delayed_work(&adev->delayed_init_work); 3312 adev->shutdown = true; 3313 3314 /* make sure IB test finished before entering exclusive mode 3315 * to avoid preemption on IB test 3316 * */ 3317 if (amdgpu_sriov_vf(adev)) 3318 amdgpu_virt_request_full_gpu(adev, false); 3319 3320 /* disable all interrupts */ 3321 amdgpu_irq_disable_all(adev); 3322 if (adev->mode_info.mode_config_initialized){ 3323 if (!amdgpu_device_has_dc_support(adev)) 3324 drm_helper_force_disable_all(adev->ddev); 3325 else 3326 drm_atomic_helper_shutdown(adev->ddev); 3327 } 3328 amdgpu_fence_driver_fini(adev); 3329 if (adev->pm_sysfs_en) 3330 amdgpu_pm_sysfs_fini(adev); 3331 amdgpu_fbdev_fini(adev); 3332 r = amdgpu_device_ip_fini(adev); 3333 if (adev->firmware.gpu_info_fw) { 3334 release_firmware(adev->firmware.gpu_info_fw); 3335 adev->firmware.gpu_info_fw = NULL; 3336 } 3337 adev->accel_working = false; 3338 /* free i2c buses */ 3339 if (!amdgpu_device_has_dc_support(adev)) 3340 amdgpu_i2c_fini(adev); 3341 3342 if (amdgpu_emu_mode != 1) 3343 amdgpu_atombios_fini(adev); 3344 3345 kfree(adev->bios); 3346 adev->bios = NULL; 3347 if (amdgpu_has_atpx() && 3348 (amdgpu_is_atpx_hybrid() || 3349 amdgpu_has_atpx_dgpu_power_cntl()) && 3350 !pci_is_thunderbolt_attached(adev->pdev)) 3351 vga_switcheroo_unregister_client(adev->pdev); 3352 if (amdgpu_device_supports_boco(adev->ddev)) 3353 vga_switcheroo_fini_domain_pm_ops(adev->dev); 3354 vga_client_register(adev->pdev, NULL, NULL, NULL); 3355 if (adev->rio_mem) 3356 pci_iounmap(adev->pdev, adev->rio_mem); 3357 adev->rio_mem = NULL; 3358 iounmap(adev->rmmio); 3359 adev->rmmio = NULL; 3360 amdgpu_device_doorbell_fini(adev); 3361 3362 if (adev->ucode_sysfs_en) 3363 amdgpu_ucode_sysfs_fini(adev); 3364 3365 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); 3366 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 3367 amdgpu_pmu_fini(adev); 3368 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) 3369 amdgpu_discovery_fini(adev); 3370 } 3371 3372 3373 /* 3374 * Suspend & resume. 3375 */ 3376 /** 3377 * amdgpu_device_suspend - initiate device suspend 3378 * 3379 * @dev: drm dev pointer 3380 * @suspend: suspend state 3381 * @fbcon : notify the fbdev of suspend 3382 * 3383 * Puts the hw in the suspend state (all asics). 3384 * Returns 0 for success or an error on failure. 3385 * Called at driver suspend. 3386 */ 3387 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) 3388 { 3389 struct amdgpu_device *adev; 3390 struct drm_crtc *crtc; 3391 struct drm_connector *connector; 3392 struct drm_connector_list_iter iter; 3393 int r; 3394 3395 if (dev == NULL || dev->dev_private == NULL) { 3396 return -ENODEV; 3397 } 3398 3399 adev = dev->dev_private; 3400 3401 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 3402 return 0; 3403 3404 adev->in_suspend = true; 3405 drm_kms_helper_poll_disable(dev); 3406 3407 if (fbcon) 3408 amdgpu_fbdev_set_suspend(adev, 1); 3409 3410 cancel_delayed_work_sync(&adev->delayed_init_work); 3411 3412 if (!amdgpu_device_has_dc_support(adev)) { 3413 /* turn off display hw */ 3414 drm_modeset_lock_all(dev); 3415 drm_connector_list_iter_begin(dev, &iter); 3416 drm_for_each_connector_iter(connector, &iter) 3417 drm_helper_connector_dpms(connector, 3418 DRM_MODE_DPMS_OFF); 3419 drm_connector_list_iter_end(&iter); 3420 drm_modeset_unlock_all(dev); 3421 /* unpin the front buffers and cursors */ 3422 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 3423 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 3424 struct drm_framebuffer *fb = crtc->primary->fb; 3425 struct amdgpu_bo *robj; 3426 3427 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { 3428 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 3429 r = amdgpu_bo_reserve(aobj, true); 3430 if (r == 0) { 3431 amdgpu_bo_unpin(aobj); 3432 amdgpu_bo_unreserve(aobj); 3433 } 3434 } 3435 3436 if (fb == NULL || fb->obj[0] == NULL) { 3437 continue; 3438 } 3439 robj = gem_to_amdgpu_bo(fb->obj[0]); 3440 /* don't unpin kernel fb objects */ 3441 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { 3442 r = amdgpu_bo_reserve(robj, true); 3443 if (r == 0) { 3444 amdgpu_bo_unpin(robj); 3445 amdgpu_bo_unreserve(robj); 3446 } 3447 } 3448 } 3449 } 3450 3451 amdgpu_ras_suspend(adev); 3452 3453 r = amdgpu_device_ip_suspend_phase1(adev); 3454 3455 amdgpu_amdkfd_suspend(adev, !fbcon); 3456 3457 /* evict vram memory */ 3458 amdgpu_bo_evict_vram(adev); 3459 3460 amdgpu_fence_driver_suspend(adev); 3461 3462 r = amdgpu_device_ip_suspend_phase2(adev); 3463 3464 /* evict remaining vram memory 3465 * This second call to evict vram is to evict the gart page table 3466 * using the CPU. 3467 */ 3468 amdgpu_bo_evict_vram(adev); 3469 3470 return 0; 3471 } 3472 3473 /** 3474 * amdgpu_device_resume - initiate device resume 3475 * 3476 * @dev: drm dev pointer 3477 * @resume: resume state 3478 * @fbcon : notify the fbdev of resume 3479 * 3480 * Bring the hw back to operating state (all asics). 3481 * Returns 0 for success or an error on failure. 3482 * Called at driver resume. 3483 */ 3484 int amdgpu_device_resume(struct drm_device *dev, bool fbcon) 3485 { 3486 struct drm_connector *connector; 3487 struct drm_connector_list_iter iter; 3488 struct amdgpu_device *adev = dev->dev_private; 3489 struct drm_crtc *crtc; 3490 int r = 0; 3491 3492 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 3493 return 0; 3494 3495 /* post card */ 3496 if (amdgpu_device_need_post(adev)) { 3497 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 3498 if (r) 3499 DRM_ERROR("amdgpu asic init failed\n"); 3500 } 3501 3502 r = amdgpu_device_ip_resume(adev); 3503 if (r) { 3504 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r); 3505 return r; 3506 } 3507 amdgpu_fence_driver_resume(adev); 3508 3509 3510 r = amdgpu_device_ip_late_init(adev); 3511 if (r) 3512 return r; 3513 3514 queue_delayed_work(system_wq, &adev->delayed_init_work, 3515 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3516 3517 if (!amdgpu_device_has_dc_support(adev)) { 3518 /* pin cursors */ 3519 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 3520 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 3521 3522 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { 3523 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 3524 r = amdgpu_bo_reserve(aobj, true); 3525 if (r == 0) { 3526 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 3527 if (r != 0) 3528 DRM_ERROR("Failed to pin cursor BO (%d)\n", r); 3529 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); 3530 amdgpu_bo_unreserve(aobj); 3531 } 3532 } 3533 } 3534 } 3535 r = amdgpu_amdkfd_resume(adev, !fbcon); 3536 if (r) 3537 return r; 3538 3539 /* Make sure IB tests flushed */ 3540 flush_delayed_work(&adev->delayed_init_work); 3541 3542 /* blat the mode back in */ 3543 if (fbcon) { 3544 if (!amdgpu_device_has_dc_support(adev)) { 3545 /* pre DCE11 */ 3546 drm_helper_resume_force_mode(dev); 3547 3548 /* turn on display hw */ 3549 drm_modeset_lock_all(dev); 3550 3551 drm_connector_list_iter_begin(dev, &iter); 3552 drm_for_each_connector_iter(connector, &iter) 3553 drm_helper_connector_dpms(connector, 3554 DRM_MODE_DPMS_ON); 3555 drm_connector_list_iter_end(&iter); 3556 3557 drm_modeset_unlock_all(dev); 3558 } 3559 amdgpu_fbdev_set_suspend(adev, 0); 3560 } 3561 3562 drm_kms_helper_poll_enable(dev); 3563 3564 amdgpu_ras_resume(adev); 3565 3566 /* 3567 * Most of the connector probing functions try to acquire runtime pm 3568 * refs to ensure that the GPU is powered on when connector polling is 3569 * performed. Since we're calling this from a runtime PM callback, 3570 * trying to acquire rpm refs will cause us to deadlock. 3571 * 3572 * Since we're guaranteed to be holding the rpm lock, it's safe to 3573 * temporarily disable the rpm helpers so this doesn't deadlock us. 3574 */ 3575 #ifdef CONFIG_PM 3576 dev->dev->power.disable_depth++; 3577 #endif 3578 if (!amdgpu_device_has_dc_support(adev)) 3579 drm_helper_hpd_irq_event(dev); 3580 else 3581 drm_kms_helper_hotplug_event(dev); 3582 #ifdef CONFIG_PM 3583 dev->dev->power.disable_depth--; 3584 #endif 3585 adev->in_suspend = false; 3586 3587 return 0; 3588 } 3589 3590 /** 3591 * amdgpu_device_ip_check_soft_reset - did soft reset succeed 3592 * 3593 * @adev: amdgpu_device pointer 3594 * 3595 * The list of all the hardware IPs that make up the asic is walked and 3596 * the check_soft_reset callbacks are run. check_soft_reset determines 3597 * if the asic is still hung or not. 3598 * Returns true if any of the IPs are still in a hung state, false if not. 3599 */ 3600 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 3601 { 3602 int i; 3603 bool asic_hang = false; 3604 3605 if (amdgpu_sriov_vf(adev)) 3606 return true; 3607 3608 if (amdgpu_asic_need_full_reset(adev)) 3609 return true; 3610 3611 for (i = 0; i < adev->num_ip_blocks; i++) { 3612 if (!adev->ip_blocks[i].status.valid) 3613 continue; 3614 if (adev->ip_blocks[i].version->funcs->check_soft_reset) 3615 adev->ip_blocks[i].status.hang = 3616 adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 3617 if (adev->ip_blocks[i].status.hang) { 3618 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 3619 asic_hang = true; 3620 } 3621 } 3622 return asic_hang; 3623 } 3624 3625 /** 3626 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 3627 * 3628 * @adev: amdgpu_device pointer 3629 * 3630 * The list of all the hardware IPs that make up the asic is walked and the 3631 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 3632 * handles any IP specific hardware or software state changes that are 3633 * necessary for a soft reset to succeed. 3634 * Returns 0 on success, negative error code on failure. 3635 */ 3636 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 3637 { 3638 int i, r = 0; 3639 3640 for (i = 0; i < adev->num_ip_blocks; i++) { 3641 if (!adev->ip_blocks[i].status.valid) 3642 continue; 3643 if (adev->ip_blocks[i].status.hang && 3644 adev->ip_blocks[i].version->funcs->pre_soft_reset) { 3645 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 3646 if (r) 3647 return r; 3648 } 3649 } 3650 3651 return 0; 3652 } 3653 3654 /** 3655 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 3656 * 3657 * @adev: amdgpu_device pointer 3658 * 3659 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 3660 * reset is necessary to recover. 3661 * Returns true if a full asic reset is required, false if not. 3662 */ 3663 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 3664 { 3665 int i; 3666 3667 if (amdgpu_asic_need_full_reset(adev)) 3668 return true; 3669 3670 for (i = 0; i < adev->num_ip_blocks; i++) { 3671 if (!adev->ip_blocks[i].status.valid) 3672 continue; 3673 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 3674 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 3675 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 3676 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 3677 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 3678 if (adev->ip_blocks[i].status.hang) { 3679 DRM_INFO("Some block need full reset!\n"); 3680 return true; 3681 } 3682 } 3683 } 3684 return false; 3685 } 3686 3687 /** 3688 * amdgpu_device_ip_soft_reset - do a soft reset 3689 * 3690 * @adev: amdgpu_device pointer 3691 * 3692 * The list of all the hardware IPs that make up the asic is walked and the 3693 * soft_reset callbacks are run if the block is hung. soft_reset handles any 3694 * IP specific hardware or software state changes that are necessary to soft 3695 * reset the IP. 3696 * Returns 0 on success, negative error code on failure. 3697 */ 3698 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 3699 { 3700 int i, r = 0; 3701 3702 for (i = 0; i < adev->num_ip_blocks; i++) { 3703 if (!adev->ip_blocks[i].status.valid) 3704 continue; 3705 if (adev->ip_blocks[i].status.hang && 3706 adev->ip_blocks[i].version->funcs->soft_reset) { 3707 r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 3708 if (r) 3709 return r; 3710 } 3711 } 3712 3713 return 0; 3714 } 3715 3716 /** 3717 * amdgpu_device_ip_post_soft_reset - clean up from soft reset 3718 * 3719 * @adev: amdgpu_device pointer 3720 * 3721 * The list of all the hardware IPs that make up the asic is walked and the 3722 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 3723 * handles any IP specific hardware or software state changes that are 3724 * necessary after the IP has been soft reset. 3725 * Returns 0 on success, negative error code on failure. 3726 */ 3727 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 3728 { 3729 int i, r = 0; 3730 3731 for (i = 0; i < adev->num_ip_blocks; i++) { 3732 if (!adev->ip_blocks[i].status.valid) 3733 continue; 3734 if (adev->ip_blocks[i].status.hang && 3735 adev->ip_blocks[i].version->funcs->post_soft_reset) 3736 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 3737 if (r) 3738 return r; 3739 } 3740 3741 return 0; 3742 } 3743 3744 /** 3745 * amdgpu_device_recover_vram - Recover some VRAM contents 3746 * 3747 * @adev: amdgpu_device pointer 3748 * 3749 * Restores the contents of VRAM buffers from the shadows in GTT. Used to 3750 * restore things like GPUVM page tables after a GPU reset where 3751 * the contents of VRAM might be lost. 3752 * 3753 * Returns: 3754 * 0 on success, negative error code on failure. 3755 */ 3756 static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 3757 { 3758 struct dma_fence *fence = NULL, *next = NULL; 3759 struct amdgpu_bo *shadow; 3760 long r = 1, tmo; 3761 3762 if (amdgpu_sriov_runtime(adev)) 3763 tmo = msecs_to_jiffies(8000); 3764 else 3765 tmo = msecs_to_jiffies(100); 3766 3767 DRM_INFO("recover vram bo from shadow start\n"); 3768 mutex_lock(&adev->shadow_list_lock); 3769 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) { 3770 3771 /* No need to recover an evicted BO */ 3772 if (shadow->tbo.mem.mem_type != TTM_PL_TT || 3773 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET || 3774 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) 3775 continue; 3776 3777 r = amdgpu_bo_restore_shadow(shadow, &next); 3778 if (r) 3779 break; 3780 3781 if (fence) { 3782 tmo = dma_fence_wait_timeout(fence, false, tmo); 3783 dma_fence_put(fence); 3784 fence = next; 3785 if (tmo == 0) { 3786 r = -ETIMEDOUT; 3787 break; 3788 } else if (tmo < 0) { 3789 r = tmo; 3790 break; 3791 } 3792 } else { 3793 fence = next; 3794 } 3795 } 3796 mutex_unlock(&adev->shadow_list_lock); 3797 3798 if (fence) 3799 tmo = dma_fence_wait_timeout(fence, false, tmo); 3800 dma_fence_put(fence); 3801 3802 if (r < 0 || tmo <= 0) { 3803 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); 3804 return -EIO; 3805 } 3806 3807 DRM_INFO("recover vram bo from shadow done\n"); 3808 return 0; 3809 } 3810 3811 3812 /** 3813 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 3814 * 3815 * @adev: amdgpu device pointer 3816 * @from_hypervisor: request from hypervisor 3817 * 3818 * do VF FLR and reinitialize Asic 3819 * return 0 means succeeded otherwise failed 3820 */ 3821 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 3822 bool from_hypervisor) 3823 { 3824 int r; 3825 3826 if (from_hypervisor) 3827 r = amdgpu_virt_request_full_gpu(adev, true); 3828 else 3829 r = amdgpu_virt_reset_gpu(adev); 3830 if (r) 3831 return r; 3832 3833 amdgpu_amdkfd_pre_reset(adev); 3834 3835 /* Resume IP prior to SMC */ 3836 r = amdgpu_device_ip_reinit_early_sriov(adev); 3837 if (r) 3838 goto error; 3839 3840 amdgpu_virt_init_data_exchange(adev); 3841 /* we need recover gart prior to run SMC/CP/SDMA resume */ 3842 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); 3843 3844 r = amdgpu_device_fw_loading(adev); 3845 if (r) 3846 return r; 3847 3848 /* now we are okay to resume SMC/CP/SDMA */ 3849 r = amdgpu_device_ip_reinit_late_sriov(adev); 3850 if (r) 3851 goto error; 3852 3853 amdgpu_irq_gpu_reset_resume_helper(adev); 3854 r = amdgpu_ib_ring_tests(adev); 3855 amdgpu_amdkfd_post_reset(adev); 3856 3857 error: 3858 amdgpu_virt_release_full_gpu(adev, true); 3859 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 3860 amdgpu_inc_vram_lost(adev); 3861 r = amdgpu_device_recover_vram(adev); 3862 } 3863 3864 return r; 3865 } 3866 3867 /** 3868 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 3869 * 3870 * @adev: amdgpu device pointer 3871 * 3872 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 3873 * a hung GPU. 3874 */ 3875 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 3876 { 3877 if (!amdgpu_device_ip_check_soft_reset(adev)) { 3878 DRM_INFO("Timeout, but no hardware hang detected.\n"); 3879 return false; 3880 } 3881 3882 if (amdgpu_gpu_recovery == 0) 3883 goto disabled; 3884 3885 if (amdgpu_sriov_vf(adev)) 3886 return true; 3887 3888 if (amdgpu_gpu_recovery == -1) { 3889 switch (adev->asic_type) { 3890 case CHIP_BONAIRE: 3891 case CHIP_HAWAII: 3892 case CHIP_TOPAZ: 3893 case CHIP_TONGA: 3894 case CHIP_FIJI: 3895 case CHIP_POLARIS10: 3896 case CHIP_POLARIS11: 3897 case CHIP_POLARIS12: 3898 case CHIP_VEGAM: 3899 case CHIP_VEGA20: 3900 case CHIP_VEGA10: 3901 case CHIP_VEGA12: 3902 case CHIP_RAVEN: 3903 case CHIP_ARCTURUS: 3904 case CHIP_RENOIR: 3905 case CHIP_NAVI10: 3906 case CHIP_NAVI14: 3907 case CHIP_NAVI12: 3908 break; 3909 default: 3910 goto disabled; 3911 } 3912 } 3913 3914 return true; 3915 3916 disabled: 3917 DRM_INFO("GPU recovery disabled.\n"); 3918 return false; 3919 } 3920 3921 3922 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, 3923 struct amdgpu_job *job, 3924 bool *need_full_reset_arg) 3925 { 3926 int i, r = 0; 3927 bool need_full_reset = *need_full_reset_arg; 3928 3929 amdgpu_debugfs_wait_dump(adev); 3930 3931 /* block all schedulers and reset given job's ring */ 3932 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3933 struct amdgpu_ring *ring = adev->rings[i]; 3934 3935 if (!ring || !ring->sched.thread) 3936 continue; 3937 3938 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 3939 amdgpu_fence_driver_force_completion(ring); 3940 } 3941 3942 if(job) 3943 drm_sched_increase_karma(&job->base); 3944 3945 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ 3946 if (!amdgpu_sriov_vf(adev)) { 3947 3948 if (!need_full_reset) 3949 need_full_reset = amdgpu_device_ip_need_full_reset(adev); 3950 3951 if (!need_full_reset) { 3952 amdgpu_device_ip_pre_soft_reset(adev); 3953 r = amdgpu_device_ip_soft_reset(adev); 3954 amdgpu_device_ip_post_soft_reset(adev); 3955 if (r || amdgpu_device_ip_check_soft_reset(adev)) { 3956 DRM_INFO("soft reset failed, will fallback to full reset!\n"); 3957 need_full_reset = true; 3958 } 3959 } 3960 3961 if (need_full_reset) 3962 r = amdgpu_device_ip_suspend(adev); 3963 3964 *need_full_reset_arg = need_full_reset; 3965 } 3966 3967 return r; 3968 } 3969 3970 static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, 3971 struct list_head *device_list_handle, 3972 bool *need_full_reset_arg) 3973 { 3974 struct amdgpu_device *tmp_adev = NULL; 3975 bool need_full_reset = *need_full_reset_arg, vram_lost = false; 3976 int r = 0; 3977 3978 /* 3979 * ASIC reset has to be done on all HGMI hive nodes ASAP 3980 * to allow proper links negotiation in FW (within 1 sec) 3981 */ 3982 if (need_full_reset) { 3983 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 3984 /* For XGMI run all resets in parallel to speed up the process */ 3985 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 3986 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) 3987 r = -EALREADY; 3988 } else 3989 r = amdgpu_asic_reset(tmp_adev); 3990 3991 if (r) { 3992 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s", 3993 r, tmp_adev->ddev->unique); 3994 break; 3995 } 3996 } 3997 3998 /* For XGMI wait for all resets to complete before proceed */ 3999 if (!r) { 4000 list_for_each_entry(tmp_adev, device_list_handle, 4001 gmc.xgmi.head) { 4002 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 4003 flush_work(&tmp_adev->xgmi_reset_work); 4004 r = tmp_adev->asic_reset_res; 4005 if (r) 4006 break; 4007 } 4008 } 4009 } 4010 } 4011 4012 if (!r && amdgpu_ras_intr_triggered()) { 4013 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4014 if (tmp_adev->mmhub.funcs && 4015 tmp_adev->mmhub.funcs->reset_ras_error_count) 4016 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev); 4017 } 4018 4019 amdgpu_ras_intr_cleared(); 4020 } 4021 4022 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4023 if (need_full_reset) { 4024 /* post card */ 4025 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) 4026 DRM_WARN("asic atom init failed!"); 4027 4028 if (!r) { 4029 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); 4030 r = amdgpu_device_ip_resume_phase1(tmp_adev); 4031 if (r) 4032 goto out; 4033 4034 vram_lost = amdgpu_device_check_vram_lost(tmp_adev); 4035 if (vram_lost) { 4036 DRM_INFO("VRAM is lost due to GPU reset!\n"); 4037 amdgpu_inc_vram_lost(tmp_adev); 4038 } 4039 4040 r = amdgpu_gtt_mgr_recover( 4041 &tmp_adev->mman.bdev.man[TTM_PL_TT]); 4042 if (r) 4043 goto out; 4044 4045 r = amdgpu_device_fw_loading(tmp_adev); 4046 if (r) 4047 return r; 4048 4049 r = amdgpu_device_ip_resume_phase2(tmp_adev); 4050 if (r) 4051 goto out; 4052 4053 if (vram_lost) 4054 amdgpu_device_fill_reset_magic(tmp_adev); 4055 4056 /* 4057 * Add this ASIC as tracked as reset was already 4058 * complete successfully. 4059 */ 4060 amdgpu_register_gpu_instance(tmp_adev); 4061 4062 r = amdgpu_device_ip_late_init(tmp_adev); 4063 if (r) 4064 goto out; 4065 4066 amdgpu_fbdev_set_suspend(tmp_adev, 0); 4067 4068 /* must succeed. */ 4069 amdgpu_ras_resume(tmp_adev); 4070 4071 /* Update PSP FW topology after reset */ 4072 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) 4073 r = amdgpu_xgmi_update_topology(hive, tmp_adev); 4074 } 4075 } 4076 4077 4078 out: 4079 if (!r) { 4080 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 4081 r = amdgpu_ib_ring_tests(tmp_adev); 4082 if (r) { 4083 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); 4084 r = amdgpu_device_ip_suspend(tmp_adev); 4085 need_full_reset = true; 4086 r = -EAGAIN; 4087 goto end; 4088 } 4089 } 4090 4091 if (!r) 4092 r = amdgpu_device_recover_vram(tmp_adev); 4093 else 4094 tmp_adev->asic_reset_res = r; 4095 } 4096 4097 end: 4098 *need_full_reset_arg = need_full_reset; 4099 return r; 4100 } 4101 4102 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) 4103 { 4104 if (trylock) { 4105 if (!mutex_trylock(&adev->lock_reset)) 4106 return false; 4107 } else 4108 mutex_lock(&adev->lock_reset); 4109 4110 atomic_inc(&adev->gpu_reset_counter); 4111 adev->in_gpu_reset = true; 4112 switch (amdgpu_asic_reset_method(adev)) { 4113 case AMD_RESET_METHOD_MODE1: 4114 adev->mp1_state = PP_MP1_STATE_SHUTDOWN; 4115 break; 4116 case AMD_RESET_METHOD_MODE2: 4117 adev->mp1_state = PP_MP1_STATE_RESET; 4118 break; 4119 default: 4120 adev->mp1_state = PP_MP1_STATE_NONE; 4121 break; 4122 } 4123 4124 return true; 4125 } 4126 4127 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) 4128 { 4129 amdgpu_vf_error_trans_all(adev); 4130 adev->mp1_state = PP_MP1_STATE_NONE; 4131 adev->in_gpu_reset = false; 4132 mutex_unlock(&adev->lock_reset); 4133 } 4134 4135 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) 4136 { 4137 struct pci_dev *p = NULL; 4138 4139 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 4140 adev->pdev->bus->number, 1); 4141 if (p) { 4142 pm_runtime_enable(&(p->dev)); 4143 pm_runtime_resume(&(p->dev)); 4144 } 4145 } 4146 4147 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) 4148 { 4149 enum amd_reset_method reset_method; 4150 struct pci_dev *p = NULL; 4151 u64 expires; 4152 4153 /* 4154 * For now, only BACO and mode1 reset are confirmed 4155 * to suffer the audio issue without proper suspended. 4156 */ 4157 reset_method = amdgpu_asic_reset_method(adev); 4158 if ((reset_method != AMD_RESET_METHOD_BACO) && 4159 (reset_method != AMD_RESET_METHOD_MODE1)) 4160 return -EINVAL; 4161 4162 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 4163 adev->pdev->bus->number, 1); 4164 if (!p) 4165 return -ENODEV; 4166 4167 expires = pm_runtime_autosuspend_expiration(&(p->dev)); 4168 if (!expires) 4169 /* 4170 * If we cannot get the audio device autosuspend delay, 4171 * a fixed 4S interval will be used. Considering 3S is 4172 * the audio controller default autosuspend delay setting. 4173 * 4S used here is guaranteed to cover that. 4174 */ 4175 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL; 4176 4177 while (!pm_runtime_status_suspended(&(p->dev))) { 4178 if (!pm_runtime_suspend(&(p->dev))) 4179 break; 4180 4181 if (expires < ktime_get_mono_fast_ns()) { 4182 dev_warn(adev->dev, "failed to suspend display audio\n"); 4183 /* TODO: abort the succeeding gpu reset? */ 4184 return -ETIMEDOUT; 4185 } 4186 } 4187 4188 pm_runtime_disable(&(p->dev)); 4189 4190 return 0; 4191 } 4192 4193 /** 4194 * amdgpu_device_gpu_recover - reset the asic and recover scheduler 4195 * 4196 * @adev: amdgpu device pointer 4197 * @job: which job trigger hang 4198 * 4199 * Attempt to reset the GPU if it has hung (all asics). 4200 * Attempt to do soft-reset or full-reset and reinitialize Asic 4201 * Returns 0 for success or an error on failure. 4202 */ 4203 4204 int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 4205 struct amdgpu_job *job) 4206 { 4207 struct list_head device_list, *device_list_handle = NULL; 4208 bool need_full_reset = false; 4209 bool job_signaled = false; 4210 struct amdgpu_hive_info *hive = NULL; 4211 struct amdgpu_device *tmp_adev = NULL; 4212 int i, r = 0; 4213 bool in_ras_intr = amdgpu_ras_intr_triggered(); 4214 bool use_baco = 4215 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? 4216 true : false; 4217 bool audio_suspended = false; 4218 4219 /* 4220 * Flush RAM to disk so that after reboot 4221 * the user can read log and see why the system rebooted. 4222 */ 4223 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) { 4224 4225 DRM_WARN("Emergency reboot."); 4226 4227 ksys_sync_helper(); 4228 emergency_restart(); 4229 } 4230 4231 dev_info(adev->dev, "GPU %s begin!\n", 4232 (in_ras_intr && !use_baco) ? "jobs stop":"reset"); 4233 4234 /* 4235 * Here we trylock to avoid chain of resets executing from 4236 * either trigger by jobs on different adevs in XGMI hive or jobs on 4237 * different schedulers for same device while this TO handler is running. 4238 * We always reset all schedulers for device and all devices for XGMI 4239 * hive so that should take care of them too. 4240 */ 4241 hive = amdgpu_get_xgmi_hive(adev, true); 4242 if (hive && !mutex_trylock(&hive->reset_lock)) { 4243 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", 4244 job ? job->base.id : -1, hive->hive_id); 4245 mutex_unlock(&hive->hive_lock); 4246 return 0; 4247 } 4248 4249 /* 4250 * Build list of devices to reset. 4251 * In case we are in XGMI hive mode, resort the device list 4252 * to put adev in the 1st position. 4253 */ 4254 INIT_LIST_HEAD(&device_list); 4255 if (adev->gmc.xgmi.num_physical_nodes > 1) { 4256 if (!hive) 4257 return -ENODEV; 4258 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list)) 4259 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list); 4260 device_list_handle = &hive->device_list; 4261 } else { 4262 list_add_tail(&adev->gmc.xgmi.head, &device_list); 4263 device_list_handle = &device_list; 4264 } 4265 4266 /* block all schedulers and reset given job's ring */ 4267 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4268 if (!amdgpu_device_lock_adev(tmp_adev, !hive)) { 4269 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", 4270 job ? job->base.id : -1); 4271 mutex_unlock(&hive->hive_lock); 4272 return 0; 4273 } 4274 4275 /* 4276 * Try to put the audio codec into suspend state 4277 * before gpu reset started. 4278 * 4279 * Due to the power domain of the graphics device 4280 * is shared with AZ power domain. Without this, 4281 * we may change the audio hardware from behind 4282 * the audio driver's back. That will trigger 4283 * some audio codec errors. 4284 */ 4285 if (!amdgpu_device_suspend_display_audio(tmp_adev)) 4286 audio_suspended = true; 4287 4288 amdgpu_ras_set_error_query_ready(tmp_adev, false); 4289 4290 cancel_delayed_work_sync(&tmp_adev->delayed_init_work); 4291 4292 if (!amdgpu_sriov_vf(tmp_adev)) 4293 amdgpu_amdkfd_pre_reset(tmp_adev); 4294 4295 /* 4296 * Mark these ASICs to be reseted as untracked first 4297 * And add them back after reset completed 4298 */ 4299 amdgpu_unregister_gpu_instance(tmp_adev); 4300 4301 amdgpu_fbdev_set_suspend(tmp_adev, 1); 4302 4303 /* disable ras on ALL IPs */ 4304 if (!(in_ras_intr && !use_baco) && 4305 amdgpu_device_ip_need_full_reset(tmp_adev)) 4306 amdgpu_ras_suspend(tmp_adev); 4307 4308 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4309 struct amdgpu_ring *ring = tmp_adev->rings[i]; 4310 4311 if (!ring || !ring->sched.thread) 4312 continue; 4313 4314 drm_sched_stop(&ring->sched, job ? &job->base : NULL); 4315 4316 if (in_ras_intr && !use_baco) 4317 amdgpu_job_stop_all_jobs_on_sched(&ring->sched); 4318 } 4319 } 4320 4321 if (in_ras_intr && !use_baco) 4322 goto skip_sched_resume; 4323 4324 /* 4325 * Must check guilty signal here since after this point all old 4326 * HW fences are force signaled. 4327 * 4328 * job->base holds a reference to parent fence 4329 */ 4330 if (job && job->base.s_fence->parent && 4331 dma_fence_is_signaled(job->base.s_fence->parent)) { 4332 job_signaled = true; 4333 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); 4334 goto skip_hw_reset; 4335 } 4336 4337 retry: /* Rest of adevs pre asic reset from XGMI hive. */ 4338 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4339 r = amdgpu_device_pre_asic_reset(tmp_adev, 4340 NULL, 4341 &need_full_reset); 4342 /*TODO Should we stop ?*/ 4343 if (r) { 4344 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", 4345 r, tmp_adev->ddev->unique); 4346 tmp_adev->asic_reset_res = r; 4347 } 4348 } 4349 4350 /* Actual ASIC resets if needed.*/ 4351 /* TODO Implement XGMI hive reset logic for SRIOV */ 4352 if (amdgpu_sriov_vf(adev)) { 4353 r = amdgpu_device_reset_sriov(adev, job ? false : true); 4354 if (r) 4355 adev->asic_reset_res = r; 4356 } else { 4357 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset); 4358 if (r && r == -EAGAIN) 4359 goto retry; 4360 } 4361 4362 skip_hw_reset: 4363 4364 /* Post ASIC reset for all devs .*/ 4365 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4366 4367 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4368 struct amdgpu_ring *ring = tmp_adev->rings[i]; 4369 4370 if (!ring || !ring->sched.thread) 4371 continue; 4372 4373 /* No point to resubmit jobs if we didn't HW reset*/ 4374 if (!tmp_adev->asic_reset_res && !job_signaled) 4375 drm_sched_resubmit_jobs(&ring->sched); 4376 4377 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); 4378 } 4379 4380 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) { 4381 drm_helper_resume_force_mode(tmp_adev->ddev); 4382 } 4383 4384 tmp_adev->asic_reset_res = 0; 4385 4386 if (r) { 4387 /* bad news, how to tell it to userspace ? */ 4388 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); 4389 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 4390 } else { 4391 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); 4392 } 4393 } 4394 4395 skip_sched_resume: 4396 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4397 /*unlock kfd: SRIOV would do it separately */ 4398 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) 4399 amdgpu_amdkfd_post_reset(tmp_adev); 4400 if (audio_suspended) 4401 amdgpu_device_resume_display_audio(tmp_adev); 4402 amdgpu_device_unlock_adev(tmp_adev); 4403 } 4404 4405 if (hive) { 4406 mutex_unlock(&hive->reset_lock); 4407 mutex_unlock(&hive->hive_lock); 4408 } 4409 4410 if (r) 4411 dev_info(adev->dev, "GPU reset end with ret = %d\n", r); 4412 return r; 4413 } 4414 4415 /** 4416 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 4417 * 4418 * @adev: amdgpu_device pointer 4419 * 4420 * Fetchs and stores in the driver the PCIE capabilities (gen speed 4421 * and lanes) of the slot the device is in. Handles APUs and 4422 * virtualized environments where PCIE config space may not be available. 4423 */ 4424 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 4425 { 4426 struct pci_dev *pdev; 4427 enum pci_bus_speed speed_cap, platform_speed_cap; 4428 enum pcie_link_width platform_link_width; 4429 4430 if (amdgpu_pcie_gen_cap) 4431 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 4432 4433 if (amdgpu_pcie_lane_cap) 4434 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 4435 4436 /* covers APUs as well */ 4437 if (pci_is_root_bus(adev->pdev->bus)) { 4438 if (adev->pm.pcie_gen_mask == 0) 4439 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 4440 if (adev->pm.pcie_mlw_mask == 0) 4441 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 4442 return; 4443 } 4444 4445 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) 4446 return; 4447 4448 pcie_bandwidth_available(adev->pdev, NULL, 4449 &platform_speed_cap, &platform_link_width); 4450 4451 if (adev->pm.pcie_gen_mask == 0) { 4452 /* asic caps */ 4453 pdev = adev->pdev; 4454 speed_cap = pcie_get_speed_cap(pdev); 4455 if (speed_cap == PCI_SPEED_UNKNOWN) { 4456 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4457 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4458 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 4459 } else { 4460 if (speed_cap == PCIE_SPEED_16_0GT) 4461 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4462 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4463 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 4464 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 4465 else if (speed_cap == PCIE_SPEED_8_0GT) 4466 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4467 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4468 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 4469 else if (speed_cap == PCIE_SPEED_5_0GT) 4470 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4471 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 4472 else 4473 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 4474 } 4475 /* platform caps */ 4476 if (platform_speed_cap == PCI_SPEED_UNKNOWN) { 4477 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4478 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 4479 } else { 4480 if (platform_speed_cap == PCIE_SPEED_16_0GT) 4481 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4482 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4483 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 4484 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 4485 else if (platform_speed_cap == PCIE_SPEED_8_0GT) 4486 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4487 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4488 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 4489 else if (platform_speed_cap == PCIE_SPEED_5_0GT) 4490 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4491 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 4492 else 4493 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 4494 4495 } 4496 } 4497 if (adev->pm.pcie_mlw_mask == 0) { 4498 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { 4499 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 4500 } else { 4501 switch (platform_link_width) { 4502 case PCIE_LNK_X32: 4503 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 4504 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 4505 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 4506 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4507 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4508 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4509 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4510 break; 4511 case PCIE_LNK_X16: 4512 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 4513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 4514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4515 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4517 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4518 break; 4519 case PCIE_LNK_X12: 4520 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 4521 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4522 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4524 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4525 break; 4526 case PCIE_LNK_X8: 4527 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4528 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4529 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4530 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4531 break; 4532 case PCIE_LNK_X4: 4533 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4534 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4535 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4536 break; 4537 case PCIE_LNK_X2: 4538 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4539 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4540 break; 4541 case PCIE_LNK_X1: 4542 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 4543 break; 4544 default: 4545 break; 4546 } 4547 } 4548 } 4549 } 4550 4551 int amdgpu_device_baco_enter(struct drm_device *dev) 4552 { 4553 struct amdgpu_device *adev = dev->dev_private; 4554 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 4555 4556 if (!amdgpu_device_supports_baco(adev->ddev)) 4557 return -ENOTSUPP; 4558 4559 if (ras && ras->supported) 4560 adev->nbio.funcs->enable_doorbell_interrupt(adev, false); 4561 4562 return amdgpu_dpm_baco_enter(adev); 4563 } 4564 4565 int amdgpu_device_baco_exit(struct drm_device *dev) 4566 { 4567 struct amdgpu_device *adev = dev->dev_private; 4568 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 4569 int ret = 0; 4570 4571 if (!amdgpu_device_supports_baco(adev->ddev)) 4572 return -ENOTSUPP; 4573 4574 ret = amdgpu_dpm_baco_exit(adev); 4575 if (ret) 4576 return ret; 4577 4578 if (ras && ras->supported) 4579 adev->nbio.funcs->enable_doorbell_interrupt(adev, true); 4580 4581 return 0; 4582 } 4583