1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/power_supply.h> 29 #include <linux/kthread.h> 30 #include <linux/module.h> 31 #include <linux/console.h> 32 #include <linux/slab.h> 33 34 #include <drm/drm_atomic_helper.h> 35 #include <drm/drm_probe_helper.h> 36 #include <drm/amdgpu_drm.h> 37 #include <linux/vgaarb.h> 38 #include <linux/vga_switcheroo.h> 39 #include <linux/efi.h> 40 #include "amdgpu.h" 41 #include "amdgpu_trace.h" 42 #include "amdgpu_i2c.h" 43 #include "atom.h" 44 #include "amdgpu_atombios.h" 45 #include "amdgpu_atomfirmware.h" 46 #include "amd_pcie.h" 47 #ifdef CONFIG_DRM_AMDGPU_SI 48 #include "si.h" 49 #endif 50 #ifdef CONFIG_DRM_AMDGPU_CIK 51 #include "cik.h" 52 #endif 53 #include "vi.h" 54 #include "soc15.h" 55 #include "nv.h" 56 #include "bif/bif_4_1_d.h" 57 #include <linux/pci.h> 58 #include <linux/firmware.h> 59 #include "amdgpu_vf_error.h" 60 61 #include "amdgpu_amdkfd.h" 62 #include "amdgpu_pm.h" 63 64 #include "amdgpu_xgmi.h" 65 #include "amdgpu_ras.h" 66 #include "amdgpu_pmu.h" 67 #include "amdgpu_fru_eeprom.h" 68 69 #include <linux/suspend.h> 70 #include <drm/task_barrier.h> 71 #include <linux/pm_runtime.h> 72 73 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 75 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 76 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 77 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 78 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); 79 MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); 80 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); 81 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); 82 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); 83 MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin"); 84 85 #define AMDGPU_RESUME_MS 2000 86 87 const char *amdgpu_asic_name[] = { 88 "TAHITI", 89 "PITCAIRN", 90 "VERDE", 91 "OLAND", 92 "HAINAN", 93 "BONAIRE", 94 "KAVERI", 95 "KABINI", 96 "HAWAII", 97 "MULLINS", 98 "TOPAZ", 99 "TONGA", 100 "FIJI", 101 "CARRIZO", 102 "STONEY", 103 "POLARIS10", 104 "POLARIS11", 105 "POLARIS12", 106 "VEGAM", 107 "VEGA10", 108 "VEGA12", 109 "VEGA20", 110 "RAVEN", 111 "ARCTURUS", 112 "RENOIR", 113 "NAVI10", 114 "NAVI14", 115 "NAVI12", 116 "SIENNA_CICHLID", 117 "LAST", 118 }; 119 120 /** 121 * DOC: pcie_replay_count 122 * 123 * The amdgpu driver provides a sysfs API for reporting the total number 124 * of PCIe replays (NAKs) 125 * The file pcie_replay_count is used for this and returns the total 126 * number of replays as a sum of the NAKs generated and NAKs received 127 */ 128 129 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, 130 struct device_attribute *attr, char *buf) 131 { 132 struct drm_device *ddev = dev_get_drvdata(dev); 133 struct amdgpu_device *adev = ddev->dev_private; 134 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); 135 136 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt); 137 } 138 139 static DEVICE_ATTR(pcie_replay_count, S_IRUGO, 140 amdgpu_device_get_pcie_replay_count, NULL); 141 142 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 143 144 /** 145 * DOC: product_name 146 * 147 * The amdgpu driver provides a sysfs API for reporting the product name 148 * for the device 149 * The file serial_number is used for this and returns the product name 150 * as returned from the FRU. 151 * NOTE: This is only available for certain server cards 152 */ 153 154 static ssize_t amdgpu_device_get_product_name(struct device *dev, 155 struct device_attribute *attr, char *buf) 156 { 157 struct drm_device *ddev = dev_get_drvdata(dev); 158 struct amdgpu_device *adev = ddev->dev_private; 159 160 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name); 161 } 162 163 static DEVICE_ATTR(product_name, S_IRUGO, 164 amdgpu_device_get_product_name, NULL); 165 166 /** 167 * DOC: product_number 168 * 169 * The amdgpu driver provides a sysfs API for reporting the part number 170 * for the device 171 * The file serial_number is used for this and returns the part number 172 * as returned from the FRU. 173 * NOTE: This is only available for certain server cards 174 */ 175 176 static ssize_t amdgpu_device_get_product_number(struct device *dev, 177 struct device_attribute *attr, char *buf) 178 { 179 struct drm_device *ddev = dev_get_drvdata(dev); 180 struct amdgpu_device *adev = ddev->dev_private; 181 182 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number); 183 } 184 185 static DEVICE_ATTR(product_number, S_IRUGO, 186 amdgpu_device_get_product_number, NULL); 187 188 /** 189 * DOC: serial_number 190 * 191 * The amdgpu driver provides a sysfs API for reporting the serial number 192 * for the device 193 * The file serial_number is used for this and returns the serial number 194 * as returned from the FRU. 195 * NOTE: This is only available for certain server cards 196 */ 197 198 static ssize_t amdgpu_device_get_serial_number(struct device *dev, 199 struct device_attribute *attr, char *buf) 200 { 201 struct drm_device *ddev = dev_get_drvdata(dev); 202 struct amdgpu_device *adev = ddev->dev_private; 203 204 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial); 205 } 206 207 static DEVICE_ATTR(serial_number, S_IRUGO, 208 amdgpu_device_get_serial_number, NULL); 209 210 /** 211 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control 212 * 213 * @dev: drm_device pointer 214 * 215 * Returns true if the device is a dGPU with HG/PX power control, 216 * otherwise return false. 217 */ 218 bool amdgpu_device_supports_boco(struct drm_device *dev) 219 { 220 struct amdgpu_device *adev = dev->dev_private; 221 222 if (adev->flags & AMD_IS_PX) 223 return true; 224 return false; 225 } 226 227 /** 228 * amdgpu_device_supports_baco - Does the device support BACO 229 * 230 * @dev: drm_device pointer 231 * 232 * Returns true if the device supporte BACO, 233 * otherwise return false. 234 */ 235 bool amdgpu_device_supports_baco(struct drm_device *dev) 236 { 237 struct amdgpu_device *adev = dev->dev_private; 238 239 return amdgpu_asic_supports_baco(adev); 240 } 241 242 /** 243 * VRAM access helper functions. 244 * 245 * amdgpu_device_vram_access - read/write a buffer in vram 246 * 247 * @adev: amdgpu_device pointer 248 * @pos: offset of the buffer in vram 249 * @buf: virtual address of the buffer in system memory 250 * @size: read/write size, sizeof(@buf) must > @size 251 * @write: true - write to vram, otherwise - read from vram 252 */ 253 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, 254 uint32_t *buf, size_t size, bool write) 255 { 256 unsigned long flags; 257 uint32_t hi = ~0; 258 uint64_t last; 259 260 261 #ifdef CONFIG_64BIT 262 last = min(pos + size, adev->gmc.visible_vram_size); 263 if (last > pos) { 264 void __iomem *addr = adev->mman.aper_base_kaddr + pos; 265 size_t count = last - pos; 266 267 if (write) { 268 memcpy_toio(addr, buf, count); 269 mb(); 270 amdgpu_asic_flush_hdp(adev, NULL); 271 } else { 272 amdgpu_asic_invalidate_hdp(adev, NULL); 273 mb(); 274 memcpy_fromio(buf, addr, count); 275 } 276 277 if (count == size) 278 return; 279 280 pos += count; 281 buf += count / 4; 282 size -= count; 283 } 284 #endif 285 286 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 287 for (last = pos + size; pos < last; pos += 4) { 288 uint32_t tmp = pos >> 31; 289 290 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); 291 if (tmp != hi) { 292 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); 293 hi = tmp; 294 } 295 if (write) 296 WREG32_NO_KIQ(mmMM_DATA, *buf++); 297 else 298 *buf++ = RREG32_NO_KIQ(mmMM_DATA); 299 } 300 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 301 } 302 303 /* 304 * device register access helper functions. 305 */ 306 /** 307 * amdgpu_device_rreg - read a register 308 * 309 * @adev: amdgpu_device pointer 310 * @reg: dword aligned register offset 311 * @acc_flags: access flags which require special behavior 312 * 313 * Returns the 32 bit value from the offset specified. 314 */ 315 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, 316 uint32_t acc_flags) 317 { 318 uint32_t ret; 319 320 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 321 return amdgpu_kiq_rreg(adev, reg); 322 323 if ((reg * 4) < adev->rmmio_size) 324 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 325 else 326 ret = adev->pcie_rreg(adev, (reg * 4)); 327 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); 328 return ret; 329 } 330 331 /* 332 * MMIO register read with bytes helper functions 333 * @offset:bytes offset from MMIO start 334 * 335 */ 336 337 /** 338 * amdgpu_mm_rreg8 - read a memory mapped IO register 339 * 340 * @adev: amdgpu_device pointer 341 * @offset: byte aligned register offset 342 * 343 * Returns the 8 bit value from the offset specified. 344 */ 345 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { 346 if (offset < adev->rmmio_size) 347 return (readb(adev->rmmio + offset)); 348 BUG(); 349 } 350 351 /* 352 * MMIO register write with bytes helper functions 353 * @offset:bytes offset from MMIO start 354 * @value: the value want to be written to the register 355 * 356 */ 357 /** 358 * amdgpu_mm_wreg8 - read a memory mapped IO register 359 * 360 * @adev: amdgpu_device pointer 361 * @offset: byte aligned register offset 362 * @value: 8 bit value to write 363 * 364 * Writes the value specified to the offset specified. 365 */ 366 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { 367 if (offset < adev->rmmio_size) 368 writeb(value, adev->rmmio + offset); 369 else 370 BUG(); 371 } 372 373 void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg, 374 uint32_t v, uint32_t acc_flags) 375 { 376 trace_amdgpu_device_wreg(adev->pdev->device, reg, v); 377 378 if ((reg * 4) < adev->rmmio_size) 379 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 380 else 381 adev->pcie_wreg(adev, (reg * 4), v); 382 } 383 384 /** 385 * amdgpu_device_wreg - write to a register 386 * 387 * @adev: amdgpu_device pointer 388 * @reg: dword aligned register offset 389 * @v: 32 bit value to write to the register 390 * @acc_flags: access flags which require special behavior 391 * 392 * Writes the value specified to the offset specified. 393 */ 394 void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 395 uint32_t acc_flags) 396 { 397 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 398 return amdgpu_kiq_wreg(adev, reg, v); 399 400 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); 401 } 402 403 /* 404 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range 405 * 406 * this function is invoked only the debugfs register access 407 * */ 408 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 409 uint32_t acc_flags) 410 { 411 if (amdgpu_sriov_fullaccess(adev) && 412 adev->gfx.rlc.funcs && 413 adev->gfx.rlc.funcs->is_rlcg_access_range) { 414 415 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) 416 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v); 417 } 418 419 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); 420 } 421 422 /** 423 * amdgpu_io_rreg - read an IO register 424 * 425 * @adev: amdgpu_device pointer 426 * @reg: dword aligned register offset 427 * 428 * Returns the 32 bit value from the offset specified. 429 */ 430 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) 431 { 432 if ((reg * 4) < adev->rio_mem_size) 433 return ioread32(adev->rio_mem + (reg * 4)); 434 else { 435 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 436 return ioread32(adev->rio_mem + (mmMM_DATA * 4)); 437 } 438 } 439 440 /** 441 * amdgpu_io_wreg - write to an IO register 442 * 443 * @adev: amdgpu_device pointer 444 * @reg: dword aligned register offset 445 * @v: 32 bit value to write to the register 446 * 447 * Writes the value specified to the offset specified. 448 */ 449 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) 450 { 451 if ((reg * 4) < adev->rio_mem_size) 452 iowrite32(v, adev->rio_mem + (reg * 4)); 453 else { 454 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 455 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); 456 } 457 } 458 459 /** 460 * amdgpu_mm_rdoorbell - read a doorbell dword 461 * 462 * @adev: amdgpu_device pointer 463 * @index: doorbell index 464 * 465 * Returns the value in the doorbell aperture at the 466 * requested doorbell index (CIK). 467 */ 468 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) 469 { 470 if (index < adev->doorbell.num_doorbells) { 471 return readl(adev->doorbell.ptr + index); 472 } else { 473 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 474 return 0; 475 } 476 } 477 478 /** 479 * amdgpu_mm_wdoorbell - write a doorbell dword 480 * 481 * @adev: amdgpu_device pointer 482 * @index: doorbell index 483 * @v: value to write 484 * 485 * Writes @v to the doorbell aperture at the 486 * requested doorbell index (CIK). 487 */ 488 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) 489 { 490 if (index < adev->doorbell.num_doorbells) { 491 writel(v, adev->doorbell.ptr + index); 492 } else { 493 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 494 } 495 } 496 497 /** 498 * amdgpu_mm_rdoorbell64 - read a doorbell Qword 499 * 500 * @adev: amdgpu_device pointer 501 * @index: doorbell index 502 * 503 * Returns the value in the doorbell aperture at the 504 * requested doorbell index (VEGA10+). 505 */ 506 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) 507 { 508 if (index < adev->doorbell.num_doorbells) { 509 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); 510 } else { 511 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 512 return 0; 513 } 514 } 515 516 /** 517 * amdgpu_mm_wdoorbell64 - write a doorbell Qword 518 * 519 * @adev: amdgpu_device pointer 520 * @index: doorbell index 521 * @v: value to write 522 * 523 * Writes @v to the doorbell aperture at the 524 * requested doorbell index (VEGA10+). 525 */ 526 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) 527 { 528 if (index < adev->doorbell.num_doorbells) { 529 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); 530 } else { 531 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 532 } 533 } 534 535 /** 536 * amdgpu_invalid_rreg - dummy reg read function 537 * 538 * @adev: amdgpu device pointer 539 * @reg: offset of register 540 * 541 * Dummy register read function. Used for register blocks 542 * that certain asics don't have (all asics). 543 * Returns the value in the register. 544 */ 545 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 546 { 547 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 548 BUG(); 549 return 0; 550 } 551 552 /** 553 * amdgpu_invalid_wreg - dummy reg write function 554 * 555 * @adev: amdgpu device pointer 556 * @reg: offset of register 557 * @v: value to write to the register 558 * 559 * Dummy register read function. Used for register blocks 560 * that certain asics don't have (all asics). 561 */ 562 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 563 { 564 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 565 reg, v); 566 BUG(); 567 } 568 569 /** 570 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function 571 * 572 * @adev: amdgpu device pointer 573 * @reg: offset of register 574 * 575 * Dummy register read function. Used for register blocks 576 * that certain asics don't have (all asics). 577 * Returns the value in the register. 578 */ 579 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) 580 { 581 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); 582 BUG(); 583 return 0; 584 } 585 586 /** 587 * amdgpu_invalid_wreg64 - dummy reg write function 588 * 589 * @adev: amdgpu device pointer 590 * @reg: offset of register 591 * @v: value to write to the register 592 * 593 * Dummy register read function. Used for register blocks 594 * that certain asics don't have (all asics). 595 */ 596 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) 597 { 598 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", 599 reg, v); 600 BUG(); 601 } 602 603 /** 604 * amdgpu_block_invalid_rreg - dummy reg read function 605 * 606 * @adev: amdgpu device pointer 607 * @block: offset of instance 608 * @reg: offset of register 609 * 610 * Dummy register read function. Used for register blocks 611 * that certain asics don't have (all asics). 612 * Returns the value in the register. 613 */ 614 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 615 uint32_t block, uint32_t reg) 616 { 617 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 618 reg, block); 619 BUG(); 620 return 0; 621 } 622 623 /** 624 * amdgpu_block_invalid_wreg - dummy reg write function 625 * 626 * @adev: amdgpu device pointer 627 * @block: offset of instance 628 * @reg: offset of register 629 * @v: value to write to the register 630 * 631 * Dummy register read function. Used for register blocks 632 * that certain asics don't have (all asics). 633 */ 634 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 635 uint32_t block, 636 uint32_t reg, uint32_t v) 637 { 638 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 639 reg, block, v); 640 BUG(); 641 } 642 643 /** 644 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page 645 * 646 * @adev: amdgpu device pointer 647 * 648 * Allocates a scratch page of VRAM for use by various things in the 649 * driver. 650 */ 651 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) 652 { 653 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, 654 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 655 &adev->vram_scratch.robj, 656 &adev->vram_scratch.gpu_addr, 657 (void **)&adev->vram_scratch.ptr); 658 } 659 660 /** 661 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page 662 * 663 * @adev: amdgpu device pointer 664 * 665 * Frees the VRAM scratch page. 666 */ 667 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) 668 { 669 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); 670 } 671 672 /** 673 * amdgpu_device_program_register_sequence - program an array of registers. 674 * 675 * @adev: amdgpu_device pointer 676 * @registers: pointer to the register array 677 * @array_size: size of the register array 678 * 679 * Programs an array or registers with and and or masks. 680 * This is a helper for setting golden registers. 681 */ 682 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 683 const u32 *registers, 684 const u32 array_size) 685 { 686 u32 tmp, reg, and_mask, or_mask; 687 int i; 688 689 if (array_size % 3) 690 return; 691 692 for (i = 0; i < array_size; i +=3) { 693 reg = registers[i + 0]; 694 and_mask = registers[i + 1]; 695 or_mask = registers[i + 2]; 696 697 if (and_mask == 0xffffffff) { 698 tmp = or_mask; 699 } else { 700 tmp = RREG32(reg); 701 tmp &= ~and_mask; 702 if (adev->family >= AMDGPU_FAMILY_AI) 703 tmp |= (or_mask & and_mask); 704 else 705 tmp |= or_mask; 706 } 707 WREG32(reg, tmp); 708 } 709 } 710 711 /** 712 * amdgpu_device_pci_config_reset - reset the GPU 713 * 714 * @adev: amdgpu_device pointer 715 * 716 * Resets the GPU using the pci config reset sequence. 717 * Only applicable to asics prior to vega10. 718 */ 719 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 720 { 721 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 722 } 723 724 /* 725 * GPU doorbell aperture helpers function. 726 */ 727 /** 728 * amdgpu_device_doorbell_init - Init doorbell driver information. 729 * 730 * @adev: amdgpu_device pointer 731 * 732 * Init doorbell driver information (CIK) 733 * Returns 0 on success, error on failure. 734 */ 735 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) 736 { 737 738 /* No doorbell on SI hardware generation */ 739 if (adev->asic_type < CHIP_BONAIRE) { 740 adev->doorbell.base = 0; 741 adev->doorbell.size = 0; 742 adev->doorbell.num_doorbells = 0; 743 adev->doorbell.ptr = NULL; 744 return 0; 745 } 746 747 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) 748 return -EINVAL; 749 750 amdgpu_asic_init_doorbell_index(adev); 751 752 /* doorbell bar mapping */ 753 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 754 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 755 756 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), 757 adev->doorbell_index.max_assignment+1); 758 if (adev->doorbell.num_doorbells == 0) 759 return -EINVAL; 760 761 /* For Vega, reserve and map two pages on doorbell BAR since SDMA 762 * paging queue doorbell use the second page. The 763 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the 764 * doorbells are in the first page. So with paging queue enabled, 765 * the max num_doorbells should + 1 page (0x400 in dword) 766 */ 767 if (adev->asic_type >= CHIP_VEGA10) 768 adev->doorbell.num_doorbells += 0x400; 769 770 adev->doorbell.ptr = ioremap(adev->doorbell.base, 771 adev->doorbell.num_doorbells * 772 sizeof(u32)); 773 if (adev->doorbell.ptr == NULL) 774 return -ENOMEM; 775 776 return 0; 777 } 778 779 /** 780 * amdgpu_device_doorbell_fini - Tear down doorbell driver information. 781 * 782 * @adev: amdgpu_device pointer 783 * 784 * Tear down doorbell driver information (CIK) 785 */ 786 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) 787 { 788 iounmap(adev->doorbell.ptr); 789 adev->doorbell.ptr = NULL; 790 } 791 792 793 794 /* 795 * amdgpu_device_wb_*() 796 * Writeback is the method by which the GPU updates special pages in memory 797 * with the status of certain GPU events (fences, ring pointers,etc.). 798 */ 799 800 /** 801 * amdgpu_device_wb_fini - Disable Writeback and free memory 802 * 803 * @adev: amdgpu_device pointer 804 * 805 * Disables Writeback and frees the Writeback memory (all asics). 806 * Used at driver shutdown. 807 */ 808 static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 809 { 810 if (adev->wb.wb_obj) { 811 amdgpu_bo_free_kernel(&adev->wb.wb_obj, 812 &adev->wb.gpu_addr, 813 (void **)&adev->wb.wb); 814 adev->wb.wb_obj = NULL; 815 } 816 } 817 818 /** 819 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory 820 * 821 * @adev: amdgpu_device pointer 822 * 823 * Initializes writeback and allocates writeback memory (all asics). 824 * Used at driver startup. 825 * Returns 0 on success or an -error on failure. 826 */ 827 static int amdgpu_device_wb_init(struct amdgpu_device *adev) 828 { 829 int r; 830 831 if (adev->wb.wb_obj == NULL) { 832 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 833 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 834 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 835 &adev->wb.wb_obj, &adev->wb.gpu_addr, 836 (void **)&adev->wb.wb); 837 if (r) { 838 dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 839 return r; 840 } 841 842 adev->wb.num_wb = AMDGPU_MAX_WB; 843 memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 844 845 /* clear wb memory */ 846 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 847 } 848 849 return 0; 850 } 851 852 /** 853 * amdgpu_device_wb_get - Allocate a wb entry 854 * 855 * @adev: amdgpu_device pointer 856 * @wb: wb index 857 * 858 * Allocate a wb slot for use by the driver (all asics). 859 * Returns 0 on success or -EINVAL on failure. 860 */ 861 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 862 { 863 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 864 865 if (offset < adev->wb.num_wb) { 866 __set_bit(offset, adev->wb.used); 867 *wb = offset << 3; /* convert to dw offset */ 868 return 0; 869 } else { 870 return -EINVAL; 871 } 872 } 873 874 /** 875 * amdgpu_device_wb_free - Free a wb entry 876 * 877 * @adev: amdgpu_device pointer 878 * @wb: wb index 879 * 880 * Free a wb slot allocated for use by the driver (all asics) 881 */ 882 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 883 { 884 wb >>= 3; 885 if (wb < adev->wb.num_wb) 886 __clear_bit(wb, adev->wb.used); 887 } 888 889 /** 890 * amdgpu_device_resize_fb_bar - try to resize FB BAR 891 * 892 * @adev: amdgpu_device pointer 893 * 894 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 895 * to fail, but if any of the BARs is not accessible after the size we abort 896 * driver loading by returning -ENODEV. 897 */ 898 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 899 { 900 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size); 901 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; 902 struct pci_bus *root; 903 struct resource *res; 904 unsigned i; 905 u16 cmd; 906 int r; 907 908 /* Bypass for VF */ 909 if (amdgpu_sriov_vf(adev)) 910 return 0; 911 912 /* Check if the root BUS has 64bit memory resources */ 913 root = adev->pdev->bus; 914 while (root->parent) 915 root = root->parent; 916 917 pci_bus_for_each_resource(root, res, i) { 918 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 919 res->start > 0x100000000ull) 920 break; 921 } 922 923 /* Trying to resize is pointless without a root hub window above 4GB */ 924 if (!res) 925 return 0; 926 927 /* Disable memory decoding while we change the BAR addresses and size */ 928 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 929 pci_write_config_word(adev->pdev, PCI_COMMAND, 930 cmd & ~PCI_COMMAND_MEMORY); 931 932 /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 933 amdgpu_device_doorbell_fini(adev); 934 if (adev->asic_type >= CHIP_BONAIRE) 935 pci_release_resource(adev->pdev, 2); 936 937 pci_release_resource(adev->pdev, 0); 938 939 r = pci_resize_resource(adev->pdev, 0, rbar_size); 940 if (r == -ENOSPC) 941 DRM_INFO("Not enough PCI address space for a large BAR."); 942 else if (r && r != -ENOTSUPP) 943 DRM_ERROR("Problem resizing BAR0 (%d).", r); 944 945 pci_assign_unassigned_bus_resources(adev->pdev->bus); 946 947 /* When the doorbell or fb BAR isn't available we have no chance of 948 * using the device. 949 */ 950 r = amdgpu_device_doorbell_init(adev); 951 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 952 return -ENODEV; 953 954 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 955 956 return 0; 957 } 958 959 /* 960 * GPU helpers function. 961 */ 962 /** 963 * amdgpu_device_need_post - check if the hw need post or not 964 * 965 * @adev: amdgpu_device pointer 966 * 967 * Check if the asic has been initialized (all asics) at driver startup 968 * or post is needed if hw reset is performed. 969 * Returns true if need or false if not. 970 */ 971 bool amdgpu_device_need_post(struct amdgpu_device *adev) 972 { 973 uint32_t reg; 974 975 if (amdgpu_sriov_vf(adev)) 976 return false; 977 978 if (amdgpu_passthrough(adev)) { 979 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 980 * some old smc fw still need driver do vPost otherwise gpu hang, while 981 * those smc fw version above 22.15 doesn't have this flaw, so we force 982 * vpost executed for smc version below 22.15 983 */ 984 if (adev->asic_type == CHIP_FIJI) { 985 int err; 986 uint32_t fw_ver; 987 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 988 /* force vPost if error occured */ 989 if (err) 990 return true; 991 992 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 993 if (fw_ver < 0x00160e00) 994 return true; 995 } 996 } 997 998 if (adev->has_hw_reset) { 999 adev->has_hw_reset = false; 1000 return true; 1001 } 1002 1003 /* bios scratch used on CIK+ */ 1004 if (adev->asic_type >= CHIP_BONAIRE) 1005 return amdgpu_atombios_scratch_need_asic_init(adev); 1006 1007 /* check MEM_SIZE for older asics */ 1008 reg = amdgpu_asic_get_config_memsize(adev); 1009 1010 if ((reg != 0) && (reg != 0xffffffff)) 1011 return false; 1012 1013 return true; 1014 } 1015 1016 /* if we get transitioned to only one device, take VGA back */ 1017 /** 1018 * amdgpu_device_vga_set_decode - enable/disable vga decode 1019 * 1020 * @cookie: amdgpu_device pointer 1021 * @state: enable/disable vga decode 1022 * 1023 * Enable/disable vga decode (all asics). 1024 * Returns VGA resource flags. 1025 */ 1026 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) 1027 { 1028 struct amdgpu_device *adev = cookie; 1029 amdgpu_asic_set_vga_state(adev, state); 1030 if (state) 1031 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 1032 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1033 else 1034 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1035 } 1036 1037 /** 1038 * amdgpu_device_check_block_size - validate the vm block size 1039 * 1040 * @adev: amdgpu_device pointer 1041 * 1042 * Validates the vm block size specified via module parameter. 1043 * The vm block size defines number of bits in page table versus page directory, 1044 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1045 * page table and the remaining bits are in the page directory. 1046 */ 1047 static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 1048 { 1049 /* defines number of bits in page table versus page directory, 1050 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1051 * page table and the remaining bits are in the page directory */ 1052 if (amdgpu_vm_block_size == -1) 1053 return; 1054 1055 if (amdgpu_vm_block_size < 9) { 1056 dev_warn(adev->dev, "VM page table size (%d) too small\n", 1057 amdgpu_vm_block_size); 1058 amdgpu_vm_block_size = -1; 1059 } 1060 } 1061 1062 /** 1063 * amdgpu_device_check_vm_size - validate the vm size 1064 * 1065 * @adev: amdgpu_device pointer 1066 * 1067 * Validates the vm size in GB specified via module parameter. 1068 * The VM size is the size of the GPU virtual memory space in GB. 1069 */ 1070 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 1071 { 1072 /* no need to check the default value */ 1073 if (amdgpu_vm_size == -1) 1074 return; 1075 1076 if (amdgpu_vm_size < 1) { 1077 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 1078 amdgpu_vm_size); 1079 amdgpu_vm_size = -1; 1080 } 1081 } 1082 1083 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 1084 { 1085 struct sysinfo si; 1086 bool is_os_64 = (sizeof(void *) == 8); 1087 uint64_t total_memory; 1088 uint64_t dram_size_seven_GB = 0x1B8000000; 1089 uint64_t dram_size_three_GB = 0xB8000000; 1090 1091 if (amdgpu_smu_memory_pool_size == 0) 1092 return; 1093 1094 if (!is_os_64) { 1095 DRM_WARN("Not 64-bit OS, feature not supported\n"); 1096 goto def_value; 1097 } 1098 si_meminfo(&si); 1099 total_memory = (uint64_t)si.totalram * si.mem_unit; 1100 1101 if ((amdgpu_smu_memory_pool_size == 1) || 1102 (amdgpu_smu_memory_pool_size == 2)) { 1103 if (total_memory < dram_size_three_GB) 1104 goto def_value1; 1105 } else if ((amdgpu_smu_memory_pool_size == 4) || 1106 (amdgpu_smu_memory_pool_size == 8)) { 1107 if (total_memory < dram_size_seven_GB) 1108 goto def_value1; 1109 } else { 1110 DRM_WARN("Smu memory pool size not supported\n"); 1111 goto def_value; 1112 } 1113 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 1114 1115 return; 1116 1117 def_value1: 1118 DRM_WARN("No enough system memory\n"); 1119 def_value: 1120 adev->pm.smu_prv_buffer_size = 0; 1121 } 1122 1123 /** 1124 * amdgpu_device_check_arguments - validate module params 1125 * 1126 * @adev: amdgpu_device pointer 1127 * 1128 * Validates certain module parameters and updates 1129 * the associated values used by the driver (all asics). 1130 */ 1131 static int amdgpu_device_check_arguments(struct amdgpu_device *adev) 1132 { 1133 if (amdgpu_sched_jobs < 4) { 1134 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 1135 amdgpu_sched_jobs); 1136 amdgpu_sched_jobs = 4; 1137 } else if (!is_power_of_2(amdgpu_sched_jobs)){ 1138 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 1139 amdgpu_sched_jobs); 1140 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 1141 } 1142 1143 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 1144 /* gart size must be greater or equal to 32M */ 1145 dev_warn(adev->dev, "gart size (%d) too small\n", 1146 amdgpu_gart_size); 1147 amdgpu_gart_size = -1; 1148 } 1149 1150 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 1151 /* gtt size must be greater or equal to 32M */ 1152 dev_warn(adev->dev, "gtt size (%d) too small\n", 1153 amdgpu_gtt_size); 1154 amdgpu_gtt_size = -1; 1155 } 1156 1157 /* valid range is between 4 and 9 inclusive */ 1158 if (amdgpu_vm_fragment_size != -1 && 1159 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 1160 dev_warn(adev->dev, "valid range is between 4 and 9\n"); 1161 amdgpu_vm_fragment_size = -1; 1162 } 1163 1164 amdgpu_device_check_smu_prv_buffer_size(adev); 1165 1166 amdgpu_device_check_vm_size(adev); 1167 1168 amdgpu_device_check_block_size(adev); 1169 1170 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 1171 1172 amdgpu_gmc_tmz_set(adev); 1173 1174 return 0; 1175 } 1176 1177 /** 1178 * amdgpu_switcheroo_set_state - set switcheroo state 1179 * 1180 * @pdev: pci dev pointer 1181 * @state: vga_switcheroo state 1182 * 1183 * Callback for the switcheroo driver. Suspends or resumes the 1184 * the asics before or after it is powered up using ACPI methods. 1185 */ 1186 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) 1187 { 1188 struct drm_device *dev = pci_get_drvdata(pdev); 1189 int r; 1190 1191 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF) 1192 return; 1193 1194 if (state == VGA_SWITCHEROO_ON) { 1195 pr_info("switched on\n"); 1196 /* don't suspend or resume card normally */ 1197 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1198 1199 pci_set_power_state(dev->pdev, PCI_D0); 1200 pci_restore_state(dev->pdev); 1201 r = pci_enable_device(dev->pdev); 1202 if (r) 1203 DRM_WARN("pci_enable_device failed (%d)\n", r); 1204 amdgpu_device_resume(dev, true); 1205 1206 dev->switch_power_state = DRM_SWITCH_POWER_ON; 1207 drm_kms_helper_poll_enable(dev); 1208 } else { 1209 pr_info("switched off\n"); 1210 drm_kms_helper_poll_disable(dev); 1211 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1212 amdgpu_device_suspend(dev, true); 1213 pci_save_state(dev->pdev); 1214 /* Shut down the device */ 1215 pci_disable_device(dev->pdev); 1216 pci_set_power_state(dev->pdev, PCI_D3cold); 1217 dev->switch_power_state = DRM_SWITCH_POWER_OFF; 1218 } 1219 } 1220 1221 /** 1222 * amdgpu_switcheroo_can_switch - see if switcheroo state can change 1223 * 1224 * @pdev: pci dev pointer 1225 * 1226 * Callback for the switcheroo driver. Check of the switcheroo 1227 * state can be changed. 1228 * Returns true if the state can be changed, false if not. 1229 */ 1230 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 1231 { 1232 struct drm_device *dev = pci_get_drvdata(pdev); 1233 1234 /* 1235 * FIXME: open_count is protected by drm_global_mutex but that would lead to 1236 * locking inversion with the driver load path. And the access here is 1237 * completely racy anyway. So don't bother with locking for now. 1238 */ 1239 return atomic_read(&dev->open_count) == 0; 1240 } 1241 1242 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 1243 .set_gpu_state = amdgpu_switcheroo_set_state, 1244 .reprobe = NULL, 1245 .can_switch = amdgpu_switcheroo_can_switch, 1246 }; 1247 1248 /** 1249 * amdgpu_device_ip_set_clockgating_state - set the CG state 1250 * 1251 * @dev: amdgpu_device pointer 1252 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1253 * @state: clockgating state (gate or ungate) 1254 * 1255 * Sets the requested clockgating state for all instances of 1256 * the hardware IP specified. 1257 * Returns the error code from the last instance. 1258 */ 1259 int amdgpu_device_ip_set_clockgating_state(void *dev, 1260 enum amd_ip_block_type block_type, 1261 enum amd_clockgating_state state) 1262 { 1263 struct amdgpu_device *adev = dev; 1264 int i, r = 0; 1265 1266 for (i = 0; i < adev->num_ip_blocks; i++) { 1267 if (!adev->ip_blocks[i].status.valid) 1268 continue; 1269 if (adev->ip_blocks[i].version->type != block_type) 1270 continue; 1271 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1272 continue; 1273 r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1274 (void *)adev, state); 1275 if (r) 1276 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1277 adev->ip_blocks[i].version->funcs->name, r); 1278 } 1279 return r; 1280 } 1281 1282 /** 1283 * amdgpu_device_ip_set_powergating_state - set the PG state 1284 * 1285 * @dev: amdgpu_device pointer 1286 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1287 * @state: powergating state (gate or ungate) 1288 * 1289 * Sets the requested powergating state for all instances of 1290 * the hardware IP specified. 1291 * Returns the error code from the last instance. 1292 */ 1293 int amdgpu_device_ip_set_powergating_state(void *dev, 1294 enum amd_ip_block_type block_type, 1295 enum amd_powergating_state state) 1296 { 1297 struct amdgpu_device *adev = dev; 1298 int i, r = 0; 1299 1300 for (i = 0; i < adev->num_ip_blocks; i++) { 1301 if (!adev->ip_blocks[i].status.valid) 1302 continue; 1303 if (adev->ip_blocks[i].version->type != block_type) 1304 continue; 1305 if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1306 continue; 1307 r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1308 (void *)adev, state); 1309 if (r) 1310 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1311 adev->ip_blocks[i].version->funcs->name, r); 1312 } 1313 return r; 1314 } 1315 1316 /** 1317 * amdgpu_device_ip_get_clockgating_state - get the CG state 1318 * 1319 * @adev: amdgpu_device pointer 1320 * @flags: clockgating feature flags 1321 * 1322 * Walks the list of IPs on the device and updates the clockgating 1323 * flags for each IP. 1324 * Updates @flags with the feature flags for each hardware IP where 1325 * clockgating is enabled. 1326 */ 1327 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1328 u32 *flags) 1329 { 1330 int i; 1331 1332 for (i = 0; i < adev->num_ip_blocks; i++) { 1333 if (!adev->ip_blocks[i].status.valid) 1334 continue; 1335 if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1336 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1337 } 1338 } 1339 1340 /** 1341 * amdgpu_device_ip_wait_for_idle - wait for idle 1342 * 1343 * @adev: amdgpu_device pointer 1344 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1345 * 1346 * Waits for the request hardware IP to be idle. 1347 * Returns 0 for success or a negative error code on failure. 1348 */ 1349 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1350 enum amd_ip_block_type block_type) 1351 { 1352 int i, r; 1353 1354 for (i = 0; i < adev->num_ip_blocks; i++) { 1355 if (!adev->ip_blocks[i].status.valid) 1356 continue; 1357 if (adev->ip_blocks[i].version->type == block_type) { 1358 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1359 if (r) 1360 return r; 1361 break; 1362 } 1363 } 1364 return 0; 1365 1366 } 1367 1368 /** 1369 * amdgpu_device_ip_is_idle - is the hardware IP idle 1370 * 1371 * @adev: amdgpu_device pointer 1372 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1373 * 1374 * Check if the hardware IP is idle or not. 1375 * Returns true if it the IP is idle, false if not. 1376 */ 1377 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1378 enum amd_ip_block_type block_type) 1379 { 1380 int i; 1381 1382 for (i = 0; i < adev->num_ip_blocks; i++) { 1383 if (!adev->ip_blocks[i].status.valid) 1384 continue; 1385 if (adev->ip_blocks[i].version->type == block_type) 1386 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1387 } 1388 return true; 1389 1390 } 1391 1392 /** 1393 * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1394 * 1395 * @adev: amdgpu_device pointer 1396 * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1397 * 1398 * Returns a pointer to the hardware IP block structure 1399 * if it exists for the asic, otherwise NULL. 1400 */ 1401 struct amdgpu_ip_block * 1402 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1403 enum amd_ip_block_type type) 1404 { 1405 int i; 1406 1407 for (i = 0; i < adev->num_ip_blocks; i++) 1408 if (adev->ip_blocks[i].version->type == type) 1409 return &adev->ip_blocks[i]; 1410 1411 return NULL; 1412 } 1413 1414 /** 1415 * amdgpu_device_ip_block_version_cmp 1416 * 1417 * @adev: amdgpu_device pointer 1418 * @type: enum amd_ip_block_type 1419 * @major: major version 1420 * @minor: minor version 1421 * 1422 * return 0 if equal or greater 1423 * return 1 if smaller or the ip_block doesn't exist 1424 */ 1425 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1426 enum amd_ip_block_type type, 1427 u32 major, u32 minor) 1428 { 1429 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1430 1431 if (ip_block && ((ip_block->version->major > major) || 1432 ((ip_block->version->major == major) && 1433 (ip_block->version->minor >= minor)))) 1434 return 0; 1435 1436 return 1; 1437 } 1438 1439 /** 1440 * amdgpu_device_ip_block_add 1441 * 1442 * @adev: amdgpu_device pointer 1443 * @ip_block_version: pointer to the IP to add 1444 * 1445 * Adds the IP block driver information to the collection of IPs 1446 * on the asic. 1447 */ 1448 int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1449 const struct amdgpu_ip_block_version *ip_block_version) 1450 { 1451 if (!ip_block_version) 1452 return -EINVAL; 1453 1454 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1455 ip_block_version->funcs->name); 1456 1457 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1458 1459 return 0; 1460 } 1461 1462 /** 1463 * amdgpu_device_enable_virtual_display - enable virtual display feature 1464 * 1465 * @adev: amdgpu_device pointer 1466 * 1467 * Enabled the virtual display feature if the user has enabled it via 1468 * the module parameter virtual_display. This feature provides a virtual 1469 * display hardware on headless boards or in virtualized environments. 1470 * This function parses and validates the configuration string specified by 1471 * the user and configues the virtual display configuration (number of 1472 * virtual connectors, crtcs, etc.) specified. 1473 */ 1474 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1475 { 1476 adev->enable_virtual_display = false; 1477 1478 if (amdgpu_virtual_display) { 1479 struct drm_device *ddev = adev->ddev; 1480 const char *pci_address_name = pci_name(ddev->pdev); 1481 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 1482 1483 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 1484 pciaddstr_tmp = pciaddstr; 1485 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 1486 pciaddname = strsep(&pciaddname_tmp, ","); 1487 if (!strcmp("all", pciaddname) 1488 || !strcmp(pci_address_name, pciaddname)) { 1489 long num_crtc; 1490 int res = -1; 1491 1492 adev->enable_virtual_display = true; 1493 1494 if (pciaddname_tmp) 1495 res = kstrtol(pciaddname_tmp, 10, 1496 &num_crtc); 1497 1498 if (!res) { 1499 if (num_crtc < 1) 1500 num_crtc = 1; 1501 if (num_crtc > 6) 1502 num_crtc = 6; 1503 adev->mode_info.num_crtc = num_crtc; 1504 } else { 1505 adev->mode_info.num_crtc = 1; 1506 } 1507 break; 1508 } 1509 } 1510 1511 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 1512 amdgpu_virtual_display, pci_address_name, 1513 adev->enable_virtual_display, adev->mode_info.num_crtc); 1514 1515 kfree(pciaddstr); 1516 } 1517 } 1518 1519 /** 1520 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 1521 * 1522 * @adev: amdgpu_device pointer 1523 * 1524 * Parses the asic configuration parameters specified in the gpu info 1525 * firmware and makes them availale to the driver for use in configuring 1526 * the asic. 1527 * Returns 0 on success, -EINVAL on failure. 1528 */ 1529 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1530 { 1531 const char *chip_name; 1532 char fw_name[40]; 1533 int err; 1534 const struct gpu_info_firmware_header_v1_0 *hdr; 1535 1536 adev->firmware.gpu_info_fw = NULL; 1537 1538 if (adev->discovery_bin) { 1539 amdgpu_discovery_get_gfx_info(adev); 1540 return 0; 1541 } 1542 1543 switch (adev->asic_type) { 1544 #ifdef CONFIG_DRM_AMDGPU_SI 1545 case CHIP_VERDE: 1546 case CHIP_TAHITI: 1547 case CHIP_PITCAIRN: 1548 case CHIP_OLAND: 1549 case CHIP_HAINAN: 1550 #endif 1551 #ifdef CONFIG_DRM_AMDGPU_CIK 1552 case CHIP_BONAIRE: 1553 case CHIP_HAWAII: 1554 case CHIP_KAVERI: 1555 case CHIP_KABINI: 1556 case CHIP_MULLINS: 1557 #endif 1558 case CHIP_TOPAZ: 1559 case CHIP_TONGA: 1560 case CHIP_FIJI: 1561 case CHIP_POLARIS10: 1562 case CHIP_POLARIS11: 1563 case CHIP_POLARIS12: 1564 case CHIP_VEGAM: 1565 case CHIP_CARRIZO: 1566 case CHIP_STONEY: 1567 case CHIP_VEGA20: 1568 default: 1569 return 0; 1570 case CHIP_VEGA10: 1571 chip_name = "vega10"; 1572 break; 1573 case CHIP_VEGA12: 1574 chip_name = "vega12"; 1575 break; 1576 case CHIP_RAVEN: 1577 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1578 chip_name = "raven2"; 1579 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 1580 chip_name = "picasso"; 1581 else 1582 chip_name = "raven"; 1583 break; 1584 case CHIP_ARCTURUS: 1585 chip_name = "arcturus"; 1586 break; 1587 case CHIP_RENOIR: 1588 chip_name = "renoir"; 1589 break; 1590 case CHIP_NAVI10: 1591 chip_name = "navi10"; 1592 break; 1593 case CHIP_NAVI14: 1594 chip_name = "navi14"; 1595 break; 1596 case CHIP_NAVI12: 1597 chip_name = "navi12"; 1598 break; 1599 case CHIP_SIENNA_CICHLID: 1600 chip_name = "sienna_cichlid"; 1601 break; 1602 } 1603 1604 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 1605 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); 1606 if (err) { 1607 dev_err(adev->dev, 1608 "Failed to load gpu_info firmware \"%s\"\n", 1609 fw_name); 1610 goto out; 1611 } 1612 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); 1613 if (err) { 1614 dev_err(adev->dev, 1615 "Failed to validate gpu_info firmware \"%s\"\n", 1616 fw_name); 1617 goto out; 1618 } 1619 1620 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 1621 amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 1622 1623 switch (hdr->version_major) { 1624 case 1: 1625 { 1626 const struct gpu_info_firmware_v1_0 *gpu_info_fw = 1627 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 1628 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1629 1630 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 1631 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 1632 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 1633 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 1634 adev->gfx.config.max_texture_channel_caches = 1635 le32_to_cpu(gpu_info_fw->gc_num_tccs); 1636 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 1637 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 1638 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 1639 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 1640 adev->gfx.config.double_offchip_lds_buf = 1641 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 1642 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 1643 adev->gfx.cu_info.max_waves_per_simd = 1644 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 1645 adev->gfx.cu_info.max_scratch_slots_per_cu = 1646 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 1647 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 1648 if (hdr->version_minor >= 1) { 1649 const struct gpu_info_firmware_v1_1 *gpu_info_fw = 1650 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data + 1651 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1652 adev->gfx.config.num_sc_per_sh = 1653 le32_to_cpu(gpu_info_fw->num_sc_per_sh); 1654 adev->gfx.config.num_packer_per_sc = 1655 le32_to_cpu(gpu_info_fw->num_packer_per_sc); 1656 } 1657 1658 /* 1659 * soc bounding box info is not integrated in disocovery table, 1660 * we always need to parse it from gpu info firmware if needed. 1661 */ 1662 if (hdr->version_minor == 2) { 1663 const struct gpu_info_firmware_v1_2 *gpu_info_fw = 1664 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + 1665 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1666 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; 1667 } 1668 break; 1669 } 1670 default: 1671 dev_err(adev->dev, 1672 "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 1673 err = -EINVAL; 1674 goto out; 1675 } 1676 out: 1677 return err; 1678 } 1679 1680 /** 1681 * amdgpu_device_ip_early_init - run early init for hardware IPs 1682 * 1683 * @adev: amdgpu_device pointer 1684 * 1685 * Early initialization pass for hardware IPs. The hardware IPs that make 1686 * up each asic are discovered each IP's early_init callback is run. This 1687 * is the first stage in initializing the asic. 1688 * Returns 0 on success, negative error code on failure. 1689 */ 1690 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 1691 { 1692 int i, r; 1693 1694 amdgpu_device_enable_virtual_display(adev); 1695 1696 switch (adev->asic_type) { 1697 #ifdef CONFIG_DRM_AMDGPU_SI 1698 case CHIP_VERDE: 1699 case CHIP_TAHITI: 1700 case CHIP_PITCAIRN: 1701 case CHIP_OLAND: 1702 case CHIP_HAINAN: 1703 adev->family = AMDGPU_FAMILY_SI; 1704 r = si_set_ip_blocks(adev); 1705 if (r) 1706 return r; 1707 break; 1708 #endif 1709 #ifdef CONFIG_DRM_AMDGPU_CIK 1710 case CHIP_BONAIRE: 1711 case CHIP_HAWAII: 1712 case CHIP_KAVERI: 1713 case CHIP_KABINI: 1714 case CHIP_MULLINS: 1715 if (adev->flags & AMD_IS_APU) 1716 adev->family = AMDGPU_FAMILY_KV; 1717 else 1718 adev->family = AMDGPU_FAMILY_CI; 1719 1720 r = cik_set_ip_blocks(adev); 1721 if (r) 1722 return r; 1723 break; 1724 #endif 1725 case CHIP_TOPAZ: 1726 case CHIP_TONGA: 1727 case CHIP_FIJI: 1728 case CHIP_POLARIS10: 1729 case CHIP_POLARIS11: 1730 case CHIP_POLARIS12: 1731 case CHIP_VEGAM: 1732 case CHIP_CARRIZO: 1733 case CHIP_STONEY: 1734 if (adev->flags & AMD_IS_APU) 1735 adev->family = AMDGPU_FAMILY_CZ; 1736 else 1737 adev->family = AMDGPU_FAMILY_VI; 1738 1739 r = vi_set_ip_blocks(adev); 1740 if (r) 1741 return r; 1742 break; 1743 case CHIP_VEGA10: 1744 case CHIP_VEGA12: 1745 case CHIP_VEGA20: 1746 case CHIP_RAVEN: 1747 case CHIP_ARCTURUS: 1748 case CHIP_RENOIR: 1749 if (adev->flags & AMD_IS_APU) 1750 adev->family = AMDGPU_FAMILY_RV; 1751 else 1752 adev->family = AMDGPU_FAMILY_AI; 1753 1754 r = soc15_set_ip_blocks(adev); 1755 if (r) 1756 return r; 1757 break; 1758 case CHIP_NAVI10: 1759 case CHIP_NAVI14: 1760 case CHIP_NAVI12: 1761 case CHIP_SIENNA_CICHLID: 1762 adev->family = AMDGPU_FAMILY_NV; 1763 1764 r = nv_set_ip_blocks(adev); 1765 if (r) 1766 return r; 1767 break; 1768 default: 1769 /* FIXME: not supported yet */ 1770 return -EINVAL; 1771 } 1772 1773 amdgpu_amdkfd_device_probe(adev); 1774 1775 if (amdgpu_sriov_vf(adev)) { 1776 /* handle vbios stuff prior full access mode for new handshake */ 1777 if (adev->virt.req_init_data_ver == 1) { 1778 if (!amdgpu_get_bios(adev)) { 1779 DRM_ERROR("failed to get vbios\n"); 1780 return -EINVAL; 1781 } 1782 1783 r = amdgpu_atombios_init(adev); 1784 if (r) { 1785 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 1786 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 1787 return r; 1788 } 1789 } 1790 } 1791 1792 /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios 1793 * will not be prepared by host for this VF */ 1794 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) { 1795 r = amdgpu_virt_request_full_gpu(adev, true); 1796 if (r) 1797 return r; 1798 } 1799 1800 adev->pm.pp_feature = amdgpu_pp_feature_mask; 1801 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) 1802 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1803 1804 for (i = 0; i < adev->num_ip_blocks; i++) { 1805 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 1806 DRM_ERROR("disabled ip block: %d <%s>\n", 1807 i, adev->ip_blocks[i].version->funcs->name); 1808 adev->ip_blocks[i].status.valid = false; 1809 } else { 1810 if (adev->ip_blocks[i].version->funcs->early_init) { 1811 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 1812 if (r == -ENOENT) { 1813 adev->ip_blocks[i].status.valid = false; 1814 } else if (r) { 1815 DRM_ERROR("early_init of IP block <%s> failed %d\n", 1816 adev->ip_blocks[i].version->funcs->name, r); 1817 return r; 1818 } else { 1819 adev->ip_blocks[i].status.valid = true; 1820 } 1821 } else { 1822 adev->ip_blocks[i].status.valid = true; 1823 } 1824 } 1825 /* get the vbios after the asic_funcs are set up */ 1826 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { 1827 r = amdgpu_device_parse_gpu_info_fw(adev); 1828 if (r) 1829 return r; 1830 1831 /* skip vbios handling for new handshake */ 1832 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1) 1833 continue; 1834 1835 /* Read BIOS */ 1836 if (!amdgpu_get_bios(adev)) 1837 return -EINVAL; 1838 1839 r = amdgpu_atombios_init(adev); 1840 if (r) { 1841 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 1842 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 1843 return r; 1844 } 1845 } 1846 } 1847 1848 adev->cg_flags &= amdgpu_cg_mask; 1849 adev->pg_flags &= amdgpu_pg_mask; 1850 1851 return 0; 1852 } 1853 1854 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) 1855 { 1856 int i, r; 1857 1858 for (i = 0; i < adev->num_ip_blocks; i++) { 1859 if (!adev->ip_blocks[i].status.sw) 1860 continue; 1861 if (adev->ip_blocks[i].status.hw) 1862 continue; 1863 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 1864 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || 1865 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 1866 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1867 if (r) { 1868 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1869 adev->ip_blocks[i].version->funcs->name, r); 1870 return r; 1871 } 1872 adev->ip_blocks[i].status.hw = true; 1873 } 1874 } 1875 1876 return 0; 1877 } 1878 1879 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) 1880 { 1881 int i, r; 1882 1883 for (i = 0; i < adev->num_ip_blocks; i++) { 1884 if (!adev->ip_blocks[i].status.sw) 1885 continue; 1886 if (adev->ip_blocks[i].status.hw) 1887 continue; 1888 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1889 if (r) { 1890 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1891 adev->ip_blocks[i].version->funcs->name, r); 1892 return r; 1893 } 1894 adev->ip_blocks[i].status.hw = true; 1895 } 1896 1897 return 0; 1898 } 1899 1900 static int amdgpu_device_fw_loading(struct amdgpu_device *adev) 1901 { 1902 int r = 0; 1903 int i; 1904 uint32_t smu_version; 1905 1906 if (adev->asic_type >= CHIP_VEGA10) { 1907 for (i = 0; i < adev->num_ip_blocks; i++) { 1908 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) 1909 continue; 1910 1911 /* no need to do the fw loading again if already done*/ 1912 if (adev->ip_blocks[i].status.hw == true) 1913 break; 1914 1915 if (adev->in_gpu_reset || adev->in_suspend) { 1916 r = adev->ip_blocks[i].version->funcs->resume(adev); 1917 if (r) { 1918 DRM_ERROR("resume of IP block <%s> failed %d\n", 1919 adev->ip_blocks[i].version->funcs->name, r); 1920 return r; 1921 } 1922 } else { 1923 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1924 if (r) { 1925 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1926 adev->ip_blocks[i].version->funcs->name, r); 1927 return r; 1928 } 1929 } 1930 1931 adev->ip_blocks[i].status.hw = true; 1932 break; 1933 } 1934 } 1935 1936 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA) 1937 r = amdgpu_pm_load_smu_firmware(adev, &smu_version); 1938 1939 return r; 1940 } 1941 1942 /** 1943 * amdgpu_device_ip_init - run init for hardware IPs 1944 * 1945 * @adev: amdgpu_device pointer 1946 * 1947 * Main initialization pass for hardware IPs. The list of all the hardware 1948 * IPs that make up the asic is walked and the sw_init and hw_init callbacks 1949 * are run. sw_init initializes the software state associated with each IP 1950 * and hw_init initializes the hardware associated with each IP. 1951 * Returns 0 on success, negative error code on failure. 1952 */ 1953 static int amdgpu_device_ip_init(struct amdgpu_device *adev) 1954 { 1955 int i, r; 1956 1957 r = amdgpu_ras_init(adev); 1958 if (r) 1959 return r; 1960 1961 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) { 1962 r = amdgpu_virt_request_full_gpu(adev, true); 1963 if (r) 1964 return -EAGAIN; 1965 } 1966 1967 for (i = 0; i < adev->num_ip_blocks; i++) { 1968 if (!adev->ip_blocks[i].status.valid) 1969 continue; 1970 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 1971 if (r) { 1972 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 1973 adev->ip_blocks[i].version->funcs->name, r); 1974 goto init_failed; 1975 } 1976 adev->ip_blocks[i].status.sw = true; 1977 1978 /* need to do gmc hw init early so we can allocate gpu mem */ 1979 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1980 r = amdgpu_device_vram_scratch_init(adev); 1981 if (r) { 1982 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); 1983 goto init_failed; 1984 } 1985 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 1986 if (r) { 1987 DRM_ERROR("hw_init %d failed %d\n", i, r); 1988 goto init_failed; 1989 } 1990 r = amdgpu_device_wb_init(adev); 1991 if (r) { 1992 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 1993 goto init_failed; 1994 } 1995 adev->ip_blocks[i].status.hw = true; 1996 1997 /* right after GMC hw init, we create CSA */ 1998 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { 1999 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, 2000 AMDGPU_GEM_DOMAIN_VRAM, 2001 AMDGPU_CSA_SIZE); 2002 if (r) { 2003 DRM_ERROR("allocate CSA failed %d\n", r); 2004 goto init_failed; 2005 } 2006 } 2007 } 2008 } 2009 2010 if (amdgpu_sriov_vf(adev)) 2011 amdgpu_virt_init_data_exchange(adev); 2012 2013 r = amdgpu_ib_pool_init(adev); 2014 if (r) { 2015 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2016 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2017 goto init_failed; 2018 } 2019 2020 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 2021 if (r) 2022 goto init_failed; 2023 2024 r = amdgpu_device_ip_hw_init_phase1(adev); 2025 if (r) 2026 goto init_failed; 2027 2028 r = amdgpu_device_fw_loading(adev); 2029 if (r) 2030 goto init_failed; 2031 2032 r = amdgpu_device_ip_hw_init_phase2(adev); 2033 if (r) 2034 goto init_failed; 2035 2036 /* 2037 * retired pages will be loaded from eeprom and reserved here, 2038 * it should be called after amdgpu_device_ip_hw_init_phase2 since 2039 * for some ASICs the RAS EEPROM code relies on SMU fully functioning 2040 * for I2C communication which only true at this point. 2041 * recovery_init may fail, but it can free all resources allocated by 2042 * itself and its failure should not stop amdgpu init process. 2043 * 2044 * Note: theoretically, this should be called before all vram allocations 2045 * to protect retired page from abusing 2046 */ 2047 amdgpu_ras_recovery_init(adev); 2048 2049 if (adev->gmc.xgmi.num_physical_nodes > 1) 2050 amdgpu_xgmi_add_device(adev); 2051 amdgpu_amdkfd_device_init(adev); 2052 2053 amdgpu_fru_get_product_info(adev); 2054 2055 init_failed: 2056 if (amdgpu_sriov_vf(adev)) 2057 amdgpu_virt_release_full_gpu(adev, true); 2058 2059 return r; 2060 } 2061 2062 /** 2063 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 2064 * 2065 * @adev: amdgpu_device pointer 2066 * 2067 * Writes a reset magic value to the gart pointer in VRAM. The driver calls 2068 * this function before a GPU reset. If the value is retained after a 2069 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 2070 */ 2071 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 2072 { 2073 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 2074 } 2075 2076 /** 2077 * amdgpu_device_check_vram_lost - check if vram is valid 2078 * 2079 * @adev: amdgpu_device pointer 2080 * 2081 * Checks the reset magic value written to the gart pointer in VRAM. 2082 * The driver calls this after a GPU reset to see if the contents of 2083 * VRAM is lost or now. 2084 * returns true if vram is lost, false if not. 2085 */ 2086 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 2087 { 2088 if (memcmp(adev->gart.ptr, adev->reset_magic, 2089 AMDGPU_RESET_MAGIC_NUM)) 2090 return true; 2091 2092 if (!adev->in_gpu_reset) 2093 return false; 2094 2095 /* 2096 * For all ASICs with baco/mode1 reset, the VRAM is 2097 * always assumed to be lost. 2098 */ 2099 switch (amdgpu_asic_reset_method(adev)) { 2100 case AMD_RESET_METHOD_BACO: 2101 case AMD_RESET_METHOD_MODE1: 2102 return true; 2103 default: 2104 return false; 2105 } 2106 } 2107 2108 /** 2109 * amdgpu_device_set_cg_state - set clockgating for amdgpu device 2110 * 2111 * @adev: amdgpu_device pointer 2112 * @state: clockgating state (gate or ungate) 2113 * 2114 * The list of all the hardware IPs that make up the asic is walked and the 2115 * set_clockgating_state callbacks are run. 2116 * Late initialization pass enabling clockgating for hardware IPs. 2117 * Fini or suspend, pass disabling clockgating for hardware IPs. 2118 * Returns 0 on success, negative error code on failure. 2119 */ 2120 2121 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 2122 enum amd_clockgating_state state) 2123 { 2124 int i, j, r; 2125 2126 if (amdgpu_emu_mode == 1) 2127 return 0; 2128 2129 for (j = 0; j < adev->num_ip_blocks; j++) { 2130 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2131 if (!adev->ip_blocks[i].status.late_initialized) 2132 continue; 2133 /* skip CG for VCE/UVD, it's handled specially */ 2134 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2135 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2136 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2137 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2138 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 2139 /* enable clockgating to save power */ 2140 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 2141 state); 2142 if (r) { 2143 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 2144 adev->ip_blocks[i].version->funcs->name, r); 2145 return r; 2146 } 2147 } 2148 } 2149 2150 return 0; 2151 } 2152 2153 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state) 2154 { 2155 int i, j, r; 2156 2157 if (amdgpu_emu_mode == 1) 2158 return 0; 2159 2160 for (j = 0; j < adev->num_ip_blocks; j++) { 2161 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2162 if (!adev->ip_blocks[i].status.late_initialized) 2163 continue; 2164 /* skip CG for VCE/UVD, it's handled specially */ 2165 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2166 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2167 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2168 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2169 adev->ip_blocks[i].version->funcs->set_powergating_state) { 2170 /* enable powergating to save power */ 2171 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 2172 state); 2173 if (r) { 2174 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 2175 adev->ip_blocks[i].version->funcs->name, r); 2176 return r; 2177 } 2178 } 2179 } 2180 return 0; 2181 } 2182 2183 static int amdgpu_device_enable_mgpu_fan_boost(void) 2184 { 2185 struct amdgpu_gpu_instance *gpu_ins; 2186 struct amdgpu_device *adev; 2187 int i, ret = 0; 2188 2189 mutex_lock(&mgpu_info.mutex); 2190 2191 /* 2192 * MGPU fan boost feature should be enabled 2193 * only when there are two or more dGPUs in 2194 * the system 2195 */ 2196 if (mgpu_info.num_dgpu < 2) 2197 goto out; 2198 2199 for (i = 0; i < mgpu_info.num_dgpu; i++) { 2200 gpu_ins = &(mgpu_info.gpu_ins[i]); 2201 adev = gpu_ins->adev; 2202 if (!(adev->flags & AMD_IS_APU) && 2203 !gpu_ins->mgpu_fan_enabled && 2204 adev->powerplay.pp_funcs && 2205 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) { 2206 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); 2207 if (ret) 2208 break; 2209 2210 gpu_ins->mgpu_fan_enabled = 1; 2211 } 2212 } 2213 2214 out: 2215 mutex_unlock(&mgpu_info.mutex); 2216 2217 return ret; 2218 } 2219 2220 /** 2221 * amdgpu_device_ip_late_init - run late init for hardware IPs 2222 * 2223 * @adev: amdgpu_device pointer 2224 * 2225 * Late initialization pass for hardware IPs. The list of all the hardware 2226 * IPs that make up the asic is walked and the late_init callbacks are run. 2227 * late_init covers any special initialization that an IP requires 2228 * after all of the have been initialized or something that needs to happen 2229 * late in the init process. 2230 * Returns 0 on success, negative error code on failure. 2231 */ 2232 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 2233 { 2234 struct amdgpu_gpu_instance *gpu_instance; 2235 int i = 0, r; 2236 2237 for (i = 0; i < adev->num_ip_blocks; i++) { 2238 if (!adev->ip_blocks[i].status.hw) 2239 continue; 2240 if (adev->ip_blocks[i].version->funcs->late_init) { 2241 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 2242 if (r) { 2243 DRM_ERROR("late_init of IP block <%s> failed %d\n", 2244 adev->ip_blocks[i].version->funcs->name, r); 2245 return r; 2246 } 2247 } 2248 adev->ip_blocks[i].status.late_initialized = true; 2249 } 2250 2251 amdgpu_ras_set_error_query_ready(adev, true); 2252 2253 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 2254 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 2255 2256 amdgpu_device_fill_reset_magic(adev); 2257 2258 r = amdgpu_device_enable_mgpu_fan_boost(); 2259 if (r) 2260 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 2261 2262 2263 if (adev->gmc.xgmi.num_physical_nodes > 1) { 2264 mutex_lock(&mgpu_info.mutex); 2265 2266 /* 2267 * Reset device p-state to low as this was booted with high. 2268 * 2269 * This should be performed only after all devices from the same 2270 * hive get initialized. 2271 * 2272 * However, it's unknown how many device in the hive in advance. 2273 * As this is counted one by one during devices initializations. 2274 * 2275 * So, we wait for all XGMI interlinked devices initialized. 2276 * This may bring some delays as those devices may come from 2277 * different hives. But that should be OK. 2278 */ 2279 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { 2280 for (i = 0; i < mgpu_info.num_gpu; i++) { 2281 gpu_instance = &(mgpu_info.gpu_ins[i]); 2282 if (gpu_instance->adev->flags & AMD_IS_APU) 2283 continue; 2284 2285 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 2286 AMDGPU_XGMI_PSTATE_MIN); 2287 if (r) { 2288 DRM_ERROR("pstate setting failed (%d).\n", r); 2289 break; 2290 } 2291 } 2292 } 2293 2294 mutex_unlock(&mgpu_info.mutex); 2295 } 2296 2297 return 0; 2298 } 2299 2300 /** 2301 * amdgpu_device_ip_fini - run fini for hardware IPs 2302 * 2303 * @adev: amdgpu_device pointer 2304 * 2305 * Main teardown pass for hardware IPs. The list of all the hardware 2306 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 2307 * are run. hw_fini tears down the hardware associated with each IP 2308 * and sw_fini tears down any software state associated with each IP. 2309 * Returns 0 on success, negative error code on failure. 2310 */ 2311 static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 2312 { 2313 int i, r; 2314 2315 amdgpu_ras_pre_fini(adev); 2316 2317 if (adev->gmc.xgmi.num_physical_nodes > 1) 2318 amdgpu_xgmi_remove_device(adev); 2319 2320 amdgpu_amdkfd_device_fini(adev); 2321 2322 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2323 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2324 2325 /* need to disable SMC first */ 2326 for (i = 0; i < adev->num_ip_blocks; i++) { 2327 if (!adev->ip_blocks[i].status.hw) 2328 continue; 2329 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 2330 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2331 /* XXX handle errors */ 2332 if (r) { 2333 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2334 adev->ip_blocks[i].version->funcs->name, r); 2335 } 2336 adev->ip_blocks[i].status.hw = false; 2337 break; 2338 } 2339 } 2340 2341 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2342 if (!adev->ip_blocks[i].status.hw) 2343 continue; 2344 2345 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2346 /* XXX handle errors */ 2347 if (r) { 2348 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2349 adev->ip_blocks[i].version->funcs->name, r); 2350 } 2351 2352 adev->ip_blocks[i].status.hw = false; 2353 } 2354 2355 2356 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2357 if (!adev->ip_blocks[i].status.sw) 2358 continue; 2359 2360 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2361 amdgpu_ucode_free_bo(adev); 2362 amdgpu_free_static_csa(&adev->virt.csa_obj); 2363 amdgpu_device_wb_fini(adev); 2364 amdgpu_device_vram_scratch_fini(adev); 2365 amdgpu_ib_pool_fini(adev); 2366 } 2367 2368 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 2369 /* XXX handle errors */ 2370 if (r) { 2371 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 2372 adev->ip_blocks[i].version->funcs->name, r); 2373 } 2374 adev->ip_blocks[i].status.sw = false; 2375 adev->ip_blocks[i].status.valid = false; 2376 } 2377 2378 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2379 if (!adev->ip_blocks[i].status.late_initialized) 2380 continue; 2381 if (adev->ip_blocks[i].version->funcs->late_fini) 2382 adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 2383 adev->ip_blocks[i].status.late_initialized = false; 2384 } 2385 2386 amdgpu_ras_fini(adev); 2387 2388 if (amdgpu_sriov_vf(adev)) 2389 if (amdgpu_virt_release_full_gpu(adev, false)) 2390 DRM_ERROR("failed to release exclusive mode on fini\n"); 2391 2392 return 0; 2393 } 2394 2395 /** 2396 * amdgpu_device_delayed_init_work_handler - work handler for IB tests 2397 * 2398 * @work: work_struct. 2399 */ 2400 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) 2401 { 2402 struct amdgpu_device *adev = 2403 container_of(work, struct amdgpu_device, delayed_init_work.work); 2404 int r; 2405 2406 r = amdgpu_ib_ring_tests(adev); 2407 if (r) 2408 DRM_ERROR("ib ring test failed (%d).\n", r); 2409 } 2410 2411 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 2412 { 2413 struct amdgpu_device *adev = 2414 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 2415 2416 mutex_lock(&adev->gfx.gfx_off_mutex); 2417 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { 2418 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 2419 adev->gfx.gfx_off_state = true; 2420 } 2421 mutex_unlock(&adev->gfx.gfx_off_mutex); 2422 } 2423 2424 /** 2425 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 2426 * 2427 * @adev: amdgpu_device pointer 2428 * 2429 * Main suspend function for hardware IPs. The list of all the hardware 2430 * IPs that make up the asic is walked, clockgating is disabled and the 2431 * suspend callbacks are run. suspend puts the hardware and software state 2432 * in each IP into a state suitable for suspend. 2433 * Returns 0 on success, negative error code on failure. 2434 */ 2435 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 2436 { 2437 int i, r; 2438 2439 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2440 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2441 2442 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2443 if (!adev->ip_blocks[i].status.valid) 2444 continue; 2445 /* displays are handled separately */ 2446 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { 2447 /* XXX handle errors */ 2448 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2449 /* XXX handle errors */ 2450 if (r) { 2451 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2452 adev->ip_blocks[i].version->funcs->name, r); 2453 return r; 2454 } 2455 adev->ip_blocks[i].status.hw = false; 2456 } 2457 } 2458 2459 return 0; 2460 } 2461 2462 /** 2463 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 2464 * 2465 * @adev: amdgpu_device pointer 2466 * 2467 * Main suspend function for hardware IPs. The list of all the hardware 2468 * IPs that make up the asic is walked, clockgating is disabled and the 2469 * suspend callbacks are run. suspend puts the hardware and software state 2470 * in each IP into a state suitable for suspend. 2471 * Returns 0 on success, negative error code on failure. 2472 */ 2473 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 2474 { 2475 int i, r; 2476 2477 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2478 if (!adev->ip_blocks[i].status.valid) 2479 continue; 2480 /* displays are handled in phase1 */ 2481 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 2482 continue; 2483 /* PSP lost connection when err_event_athub occurs */ 2484 if (amdgpu_ras_intr_triggered() && 2485 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 2486 adev->ip_blocks[i].status.hw = false; 2487 continue; 2488 } 2489 /* XXX handle errors */ 2490 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2491 /* XXX handle errors */ 2492 if (r) { 2493 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2494 adev->ip_blocks[i].version->funcs->name, r); 2495 } 2496 adev->ip_blocks[i].status.hw = false; 2497 /* handle putting the SMC in the appropriate state */ 2498 if(!amdgpu_sriov_vf(adev)){ 2499 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 2500 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); 2501 if (r) { 2502 DRM_ERROR("SMC failed to set mp1 state %d, %d\n", 2503 adev->mp1_state, r); 2504 return r; 2505 } 2506 } 2507 } 2508 adev->ip_blocks[i].status.hw = false; 2509 } 2510 2511 return 0; 2512 } 2513 2514 /** 2515 * amdgpu_device_ip_suspend - run suspend for hardware IPs 2516 * 2517 * @adev: amdgpu_device pointer 2518 * 2519 * Main suspend function for hardware IPs. The list of all the hardware 2520 * IPs that make up the asic is walked, clockgating is disabled and the 2521 * suspend callbacks are run. suspend puts the hardware and software state 2522 * in each IP into a state suitable for suspend. 2523 * Returns 0 on success, negative error code on failure. 2524 */ 2525 int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 2526 { 2527 int r; 2528 2529 if (amdgpu_sriov_vf(adev)) 2530 amdgpu_virt_request_full_gpu(adev, false); 2531 2532 r = amdgpu_device_ip_suspend_phase1(adev); 2533 if (r) 2534 return r; 2535 r = amdgpu_device_ip_suspend_phase2(adev); 2536 2537 if (amdgpu_sriov_vf(adev)) 2538 amdgpu_virt_release_full_gpu(adev, false); 2539 2540 return r; 2541 } 2542 2543 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 2544 { 2545 int i, r; 2546 2547 static enum amd_ip_block_type ip_order[] = { 2548 AMD_IP_BLOCK_TYPE_GMC, 2549 AMD_IP_BLOCK_TYPE_COMMON, 2550 AMD_IP_BLOCK_TYPE_PSP, 2551 AMD_IP_BLOCK_TYPE_IH, 2552 }; 2553 2554 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2555 int j; 2556 struct amdgpu_ip_block *block; 2557 2558 for (j = 0; j < adev->num_ip_blocks; j++) { 2559 block = &adev->ip_blocks[j]; 2560 2561 block->status.hw = false; 2562 if (block->version->type != ip_order[i] || 2563 !block->status.valid) 2564 continue; 2565 2566 r = block->version->funcs->hw_init(adev); 2567 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2568 if (r) 2569 return r; 2570 block->status.hw = true; 2571 } 2572 } 2573 2574 return 0; 2575 } 2576 2577 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 2578 { 2579 int i, r; 2580 2581 static enum amd_ip_block_type ip_order[] = { 2582 AMD_IP_BLOCK_TYPE_SMC, 2583 AMD_IP_BLOCK_TYPE_DCE, 2584 AMD_IP_BLOCK_TYPE_GFX, 2585 AMD_IP_BLOCK_TYPE_SDMA, 2586 AMD_IP_BLOCK_TYPE_UVD, 2587 AMD_IP_BLOCK_TYPE_VCE, 2588 AMD_IP_BLOCK_TYPE_VCN 2589 }; 2590 2591 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2592 int j; 2593 struct amdgpu_ip_block *block; 2594 2595 for (j = 0; j < adev->num_ip_blocks; j++) { 2596 block = &adev->ip_blocks[j]; 2597 2598 if (block->version->type != ip_order[i] || 2599 !block->status.valid || 2600 block->status.hw) 2601 continue; 2602 2603 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) 2604 r = block->version->funcs->resume(adev); 2605 else 2606 r = block->version->funcs->hw_init(adev); 2607 2608 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2609 if (r) 2610 return r; 2611 block->status.hw = true; 2612 } 2613 } 2614 2615 return 0; 2616 } 2617 2618 /** 2619 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 2620 * 2621 * @adev: amdgpu_device pointer 2622 * 2623 * First resume function for hardware IPs. The list of all the hardware 2624 * IPs that make up the asic is walked and the resume callbacks are run for 2625 * COMMON, GMC, and IH. resume puts the hardware into a functional state 2626 * after a suspend and updates the software state as necessary. This 2627 * function is also used for restoring the GPU after a GPU reset. 2628 * Returns 0 on success, negative error code on failure. 2629 */ 2630 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 2631 { 2632 int i, r; 2633 2634 for (i = 0; i < adev->num_ip_blocks; i++) { 2635 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 2636 continue; 2637 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2638 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2639 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2640 2641 r = adev->ip_blocks[i].version->funcs->resume(adev); 2642 if (r) { 2643 DRM_ERROR("resume of IP block <%s> failed %d\n", 2644 adev->ip_blocks[i].version->funcs->name, r); 2645 return r; 2646 } 2647 adev->ip_blocks[i].status.hw = true; 2648 } 2649 } 2650 2651 return 0; 2652 } 2653 2654 /** 2655 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 2656 * 2657 * @adev: amdgpu_device pointer 2658 * 2659 * First resume function for hardware IPs. The list of all the hardware 2660 * IPs that make up the asic is walked and the resume callbacks are run for 2661 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 2662 * functional state after a suspend and updates the software state as 2663 * necessary. This function is also used for restoring the GPU after a GPU 2664 * reset. 2665 * Returns 0 on success, negative error code on failure. 2666 */ 2667 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 2668 { 2669 int i, r; 2670 2671 for (i = 0; i < adev->num_ip_blocks; i++) { 2672 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 2673 continue; 2674 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2675 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2676 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 2677 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 2678 continue; 2679 r = adev->ip_blocks[i].version->funcs->resume(adev); 2680 if (r) { 2681 DRM_ERROR("resume of IP block <%s> failed %d\n", 2682 adev->ip_blocks[i].version->funcs->name, r); 2683 return r; 2684 } 2685 adev->ip_blocks[i].status.hw = true; 2686 } 2687 2688 return 0; 2689 } 2690 2691 /** 2692 * amdgpu_device_ip_resume - run resume for hardware IPs 2693 * 2694 * @adev: amdgpu_device pointer 2695 * 2696 * Main resume function for hardware IPs. The hardware IPs 2697 * are split into two resume functions because they are 2698 * are also used in in recovering from a GPU reset and some additional 2699 * steps need to be take between them. In this case (S3/S4) they are 2700 * run sequentially. 2701 * Returns 0 on success, negative error code on failure. 2702 */ 2703 static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 2704 { 2705 int r; 2706 2707 r = amdgpu_device_ip_resume_phase1(adev); 2708 if (r) 2709 return r; 2710 2711 r = amdgpu_device_fw_loading(adev); 2712 if (r) 2713 return r; 2714 2715 r = amdgpu_device_ip_resume_phase2(adev); 2716 2717 return r; 2718 } 2719 2720 /** 2721 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 2722 * 2723 * @adev: amdgpu_device pointer 2724 * 2725 * Query the VBIOS data tables to determine if the board supports SR-IOV. 2726 */ 2727 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 2728 { 2729 if (amdgpu_sriov_vf(adev)) { 2730 if (adev->is_atom_fw) { 2731 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) 2732 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2733 } else { 2734 if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 2735 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2736 } 2737 2738 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 2739 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 2740 } 2741 } 2742 2743 /** 2744 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 2745 * 2746 * @asic_type: AMD asic type 2747 * 2748 * Check if there is DC (new modesetting infrastructre) support for an asic. 2749 * returns true if DC has support, false if not. 2750 */ 2751 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 2752 { 2753 switch (asic_type) { 2754 #if defined(CONFIG_DRM_AMD_DC) 2755 case CHIP_BONAIRE: 2756 case CHIP_KAVERI: 2757 case CHIP_KABINI: 2758 case CHIP_MULLINS: 2759 /* 2760 * We have systems in the wild with these ASICs that require 2761 * LVDS and VGA support which is not supported with DC. 2762 * 2763 * Fallback to the non-DC driver here by default so as not to 2764 * cause regressions. 2765 */ 2766 return amdgpu_dc > 0; 2767 case CHIP_HAWAII: 2768 case CHIP_CARRIZO: 2769 case CHIP_STONEY: 2770 case CHIP_POLARIS10: 2771 case CHIP_POLARIS11: 2772 case CHIP_POLARIS12: 2773 case CHIP_VEGAM: 2774 case CHIP_TONGA: 2775 case CHIP_FIJI: 2776 case CHIP_VEGA10: 2777 case CHIP_VEGA12: 2778 case CHIP_VEGA20: 2779 #if defined(CONFIG_DRM_AMD_DC_DCN) 2780 case CHIP_RAVEN: 2781 case CHIP_NAVI10: 2782 case CHIP_NAVI14: 2783 case CHIP_NAVI12: 2784 case CHIP_RENOIR: 2785 #endif 2786 return amdgpu_dc != 0; 2787 #endif 2788 default: 2789 if (amdgpu_dc > 0) 2790 DRM_INFO("Display Core has been requested via kernel parameter " 2791 "but isn't supported by ASIC, ignoring\n"); 2792 return false; 2793 } 2794 } 2795 2796 /** 2797 * amdgpu_device_has_dc_support - check if dc is supported 2798 * 2799 * @adev: amdgpu_device_pointer 2800 * 2801 * Returns true for supported, false for not supported 2802 */ 2803 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 2804 { 2805 if (amdgpu_sriov_vf(adev)) 2806 return false; 2807 2808 return amdgpu_device_asic_has_dc_support(adev->asic_type); 2809 } 2810 2811 2812 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) 2813 { 2814 struct amdgpu_device *adev = 2815 container_of(__work, struct amdgpu_device, xgmi_reset_work); 2816 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); 2817 2818 /* It's a bug to not have a hive within this function */ 2819 if (WARN_ON(!hive)) 2820 return; 2821 2822 /* 2823 * Use task barrier to synchronize all xgmi reset works across the 2824 * hive. task_barrier_enter and task_barrier_exit will block 2825 * until all the threads running the xgmi reset works reach 2826 * those points. task_barrier_full will do both blocks. 2827 */ 2828 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { 2829 2830 task_barrier_enter(&hive->tb); 2831 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev); 2832 2833 if (adev->asic_reset_res) 2834 goto fail; 2835 2836 task_barrier_exit(&hive->tb); 2837 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev); 2838 2839 if (adev->asic_reset_res) 2840 goto fail; 2841 2842 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count) 2843 adev->mmhub.funcs->reset_ras_error_count(adev); 2844 } else { 2845 2846 task_barrier_full(&hive->tb); 2847 adev->asic_reset_res = amdgpu_asic_reset(adev); 2848 } 2849 2850 fail: 2851 if (adev->asic_reset_res) 2852 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", 2853 adev->asic_reset_res, adev->ddev->unique); 2854 } 2855 2856 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) 2857 { 2858 char *input = amdgpu_lockup_timeout; 2859 char *timeout_setting = NULL; 2860 int index = 0; 2861 long timeout; 2862 int ret = 0; 2863 2864 /* 2865 * By default timeout for non compute jobs is 10000. 2866 * And there is no timeout enforced on compute jobs. 2867 * In SR-IOV or passthrough mode, timeout for compute 2868 * jobs are 60000 by default. 2869 */ 2870 adev->gfx_timeout = msecs_to_jiffies(10000); 2871 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 2872 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) 2873 adev->compute_timeout = msecs_to_jiffies(60000); 2874 else 2875 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; 2876 2877 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 2878 while ((timeout_setting = strsep(&input, ",")) && 2879 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 2880 ret = kstrtol(timeout_setting, 0, &timeout); 2881 if (ret) 2882 return ret; 2883 2884 if (timeout == 0) { 2885 index++; 2886 continue; 2887 } else if (timeout < 0) { 2888 timeout = MAX_SCHEDULE_TIMEOUT; 2889 } else { 2890 timeout = msecs_to_jiffies(timeout); 2891 } 2892 2893 switch (index++) { 2894 case 0: 2895 adev->gfx_timeout = timeout; 2896 break; 2897 case 1: 2898 adev->compute_timeout = timeout; 2899 break; 2900 case 2: 2901 adev->sdma_timeout = timeout; 2902 break; 2903 case 3: 2904 adev->video_timeout = timeout; 2905 break; 2906 default: 2907 break; 2908 } 2909 } 2910 /* 2911 * There is only one value specified and 2912 * it should apply to all non-compute jobs. 2913 */ 2914 if (index == 1) { 2915 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 2916 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) 2917 adev->compute_timeout = adev->gfx_timeout; 2918 } 2919 } 2920 2921 return ret; 2922 } 2923 2924 static const struct attribute *amdgpu_dev_attributes[] = { 2925 &dev_attr_product_name.attr, 2926 &dev_attr_product_number.attr, 2927 &dev_attr_serial_number.attr, 2928 &dev_attr_pcie_replay_count.attr, 2929 NULL 2930 }; 2931 2932 /** 2933 * amdgpu_device_init - initialize the driver 2934 * 2935 * @adev: amdgpu_device pointer 2936 * @ddev: drm dev pointer 2937 * @pdev: pci dev pointer 2938 * @flags: driver flags 2939 * 2940 * Initializes the driver info and hw (all asics). 2941 * Returns 0 for success or an error on failure. 2942 * Called at driver startup. 2943 */ 2944 int amdgpu_device_init(struct amdgpu_device *adev, 2945 struct drm_device *ddev, 2946 struct pci_dev *pdev, 2947 uint32_t flags) 2948 { 2949 int r, i; 2950 bool boco = false; 2951 u32 max_MBps; 2952 2953 adev->shutdown = false; 2954 adev->dev = &pdev->dev; 2955 adev->ddev = ddev; 2956 adev->pdev = pdev; 2957 adev->flags = flags; 2958 2959 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) 2960 adev->asic_type = amdgpu_force_asic_type; 2961 else 2962 adev->asic_type = flags & AMD_ASIC_MASK; 2963 2964 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 2965 if (amdgpu_emu_mode == 1) 2966 adev->usec_timeout *= 10; 2967 adev->gmc.gart_size = 512 * 1024 * 1024; 2968 adev->accel_working = false; 2969 adev->num_rings = 0; 2970 adev->mman.buffer_funcs = NULL; 2971 adev->mman.buffer_funcs_ring = NULL; 2972 adev->vm_manager.vm_pte_funcs = NULL; 2973 adev->vm_manager.vm_pte_num_scheds = 0; 2974 adev->gmc.gmc_funcs = NULL; 2975 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2976 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 2977 2978 adev->smc_rreg = &amdgpu_invalid_rreg; 2979 adev->smc_wreg = &amdgpu_invalid_wreg; 2980 adev->pcie_rreg = &amdgpu_invalid_rreg; 2981 adev->pcie_wreg = &amdgpu_invalid_wreg; 2982 adev->pciep_rreg = &amdgpu_invalid_rreg; 2983 adev->pciep_wreg = &amdgpu_invalid_wreg; 2984 adev->pcie_rreg64 = &amdgpu_invalid_rreg64; 2985 adev->pcie_wreg64 = &amdgpu_invalid_wreg64; 2986 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 2987 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 2988 adev->didt_rreg = &amdgpu_invalid_rreg; 2989 adev->didt_wreg = &amdgpu_invalid_wreg; 2990 adev->gc_cac_rreg = &amdgpu_invalid_rreg; 2991 adev->gc_cac_wreg = &amdgpu_invalid_wreg; 2992 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 2993 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 2994 2995 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 2996 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 2997 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 2998 2999 /* mutex initialization are all done here so we 3000 * can recall function without having locking issues */ 3001 atomic_set(&adev->irq.ih.lock, 0); 3002 mutex_init(&adev->firmware.mutex); 3003 mutex_init(&adev->pm.mutex); 3004 mutex_init(&adev->gfx.gpu_clock_mutex); 3005 mutex_init(&adev->srbm_mutex); 3006 mutex_init(&adev->gfx.pipe_reserve_mutex); 3007 mutex_init(&adev->gfx.gfx_off_mutex); 3008 mutex_init(&adev->grbm_idx_mutex); 3009 mutex_init(&adev->mn_lock); 3010 mutex_init(&adev->virt.vf_errors.lock); 3011 hash_init(adev->mn_hash); 3012 mutex_init(&adev->lock_reset); 3013 mutex_init(&adev->psp.mutex); 3014 mutex_init(&adev->notifier_lock); 3015 3016 r = amdgpu_device_check_arguments(adev); 3017 if (r) 3018 return r; 3019 3020 spin_lock_init(&adev->mmio_idx_lock); 3021 spin_lock_init(&adev->smc_idx_lock); 3022 spin_lock_init(&adev->pcie_idx_lock); 3023 spin_lock_init(&adev->uvd_ctx_idx_lock); 3024 spin_lock_init(&adev->didt_idx_lock); 3025 spin_lock_init(&adev->gc_cac_idx_lock); 3026 spin_lock_init(&adev->se_cac_idx_lock); 3027 spin_lock_init(&adev->audio_endpt_idx_lock); 3028 spin_lock_init(&adev->mm_stats.lock); 3029 3030 INIT_LIST_HEAD(&adev->shadow_list); 3031 mutex_init(&adev->shadow_list_lock); 3032 3033 INIT_DELAYED_WORK(&adev->delayed_init_work, 3034 amdgpu_device_delayed_init_work_handler); 3035 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 3036 amdgpu_device_delay_enable_gfx_off); 3037 3038 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); 3039 3040 adev->gfx.gfx_off_req_count = 1; 3041 adev->pm.ac_power = power_supply_is_system_supplied() > 0; 3042 3043 atomic_set(&adev->throttling_logging_enabled, 1); 3044 /* 3045 * If throttling continues, logging will be performed every minute 3046 * to avoid log flooding. "-1" is subtracted since the thermal 3047 * throttling interrupt comes every second. Thus, the total logging 3048 * interval is 59 seconds(retelimited printk interval) + 1(waiting 3049 * for throttling interrupt) = 60 seconds. 3050 */ 3051 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1); 3052 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE); 3053 3054 /* Registers mapping */ 3055 /* TODO: block userspace mapping of io register */ 3056 if (adev->asic_type >= CHIP_BONAIRE) { 3057 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 3058 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 3059 } else { 3060 adev->rmmio_base = pci_resource_start(adev->pdev, 2); 3061 adev->rmmio_size = pci_resource_len(adev->pdev, 2); 3062 } 3063 3064 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 3065 if (adev->rmmio == NULL) { 3066 return -ENOMEM; 3067 } 3068 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 3069 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); 3070 3071 /* io port mapping */ 3072 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 3073 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) { 3074 adev->rio_mem_size = pci_resource_len(adev->pdev, i); 3075 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size); 3076 break; 3077 } 3078 } 3079 if (adev->rio_mem == NULL) 3080 DRM_INFO("PCI I/O BAR is not found.\n"); 3081 3082 /* enable PCIE atomic ops */ 3083 r = pci_enable_atomic_ops_to_root(adev->pdev, 3084 PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 3085 PCI_EXP_DEVCAP2_ATOMIC_COMP64); 3086 if (r) { 3087 adev->have_atomics_support = false; 3088 DRM_INFO("PCIE atomic ops is not supported\n"); 3089 } else { 3090 adev->have_atomics_support = true; 3091 } 3092 3093 amdgpu_device_get_pcie_info(adev); 3094 3095 if (amdgpu_mcbp) 3096 DRM_INFO("MCBP is enabled\n"); 3097 3098 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10) 3099 adev->enable_mes = true; 3100 3101 /* detect hw virtualization here */ 3102 amdgpu_detect_virtualization(adev); 3103 3104 r = amdgpu_device_get_job_timeout_settings(adev); 3105 if (r) { 3106 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); 3107 return r; 3108 } 3109 3110 /* early init functions */ 3111 r = amdgpu_device_ip_early_init(adev); 3112 if (r) 3113 return r; 3114 3115 /* doorbell bar mapping and doorbell index init*/ 3116 amdgpu_device_doorbell_init(adev); 3117 3118 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 3119 /* this will fail for cards that aren't VGA class devices, just 3120 * ignore it */ 3121 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); 3122 3123 if (amdgpu_device_supports_boco(ddev)) 3124 boco = true; 3125 if (amdgpu_has_atpx() && 3126 (amdgpu_is_atpx_hybrid() || 3127 amdgpu_has_atpx_dgpu_power_cntl()) && 3128 !pci_is_thunderbolt_attached(adev->pdev)) 3129 vga_switcheroo_register_client(adev->pdev, 3130 &amdgpu_switcheroo_ops, boco); 3131 if (boco) 3132 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 3133 3134 if (amdgpu_emu_mode == 1) { 3135 /* post the asic on emulation mode */ 3136 emu_soc_asic_init(adev); 3137 goto fence_driver_init; 3138 } 3139 3140 /* detect if we are with an SRIOV vbios */ 3141 amdgpu_device_detect_sriov_bios(adev); 3142 3143 /* check if we need to reset the asic 3144 * E.g., driver was not cleanly unloaded previously, etc. 3145 */ 3146 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { 3147 r = amdgpu_asic_reset(adev); 3148 if (r) { 3149 dev_err(adev->dev, "asic reset on init failed\n"); 3150 goto failed; 3151 } 3152 } 3153 3154 /* Post card if necessary */ 3155 if (amdgpu_device_need_post(adev)) { 3156 if (!adev->bios) { 3157 dev_err(adev->dev, "no vBIOS found\n"); 3158 r = -EINVAL; 3159 goto failed; 3160 } 3161 DRM_INFO("GPU posting now...\n"); 3162 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 3163 if (r) { 3164 dev_err(adev->dev, "gpu post error!\n"); 3165 goto failed; 3166 } 3167 } 3168 3169 if (adev->is_atom_fw) { 3170 /* Initialize clocks */ 3171 r = amdgpu_atomfirmware_get_clock_info(adev); 3172 if (r) { 3173 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 3174 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3175 goto failed; 3176 } 3177 } else { 3178 /* Initialize clocks */ 3179 r = amdgpu_atombios_get_clock_info(adev); 3180 if (r) { 3181 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 3182 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3183 goto failed; 3184 } 3185 /* init i2c buses */ 3186 if (!amdgpu_device_has_dc_support(adev)) 3187 amdgpu_atombios_i2c_init(adev); 3188 } 3189 3190 fence_driver_init: 3191 /* Fence driver */ 3192 r = amdgpu_fence_driver_init(adev); 3193 if (r) { 3194 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); 3195 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 3196 goto failed; 3197 } 3198 3199 /* init the mode config */ 3200 drm_mode_config_init(adev->ddev); 3201 3202 r = amdgpu_device_ip_init(adev); 3203 if (r) { 3204 /* failed in exclusive mode due to timeout */ 3205 if (amdgpu_sriov_vf(adev) && 3206 !amdgpu_sriov_runtime(adev) && 3207 amdgpu_virt_mmio_blocked(adev) && 3208 !amdgpu_virt_wait_reset(adev)) { 3209 dev_err(adev->dev, "VF exclusive mode timeout\n"); 3210 /* Don't send request since VF is inactive. */ 3211 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 3212 adev->virt.ops = NULL; 3213 r = -EAGAIN; 3214 goto failed; 3215 } 3216 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 3217 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 3218 goto failed; 3219 } 3220 3221 dev_info(adev->dev, 3222 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", 3223 adev->gfx.config.max_shader_engines, 3224 adev->gfx.config.max_sh_per_se, 3225 adev->gfx.config.max_cu_per_sh, 3226 adev->gfx.cu_info.number); 3227 3228 adev->accel_working = true; 3229 3230 amdgpu_vm_check_compute_bug(adev); 3231 3232 /* Initialize the buffer migration limit. */ 3233 if (amdgpu_moverate >= 0) 3234 max_MBps = amdgpu_moverate; 3235 else 3236 max_MBps = 8; /* Allow 8 MB/s. */ 3237 /* Get a log2 for easy divisions. */ 3238 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 3239 3240 amdgpu_fbdev_init(adev); 3241 3242 r = amdgpu_pm_sysfs_init(adev); 3243 if (r) { 3244 adev->pm_sysfs_en = false; 3245 DRM_ERROR("registering pm debugfs failed (%d).\n", r); 3246 } else 3247 adev->pm_sysfs_en = true; 3248 3249 r = amdgpu_ucode_sysfs_init(adev); 3250 if (r) { 3251 adev->ucode_sysfs_en = false; 3252 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); 3253 } else 3254 adev->ucode_sysfs_en = true; 3255 3256 if ((amdgpu_testing & 1)) { 3257 if (adev->accel_working) 3258 amdgpu_test_moves(adev); 3259 else 3260 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); 3261 } 3262 if (amdgpu_benchmarking) { 3263 if (adev->accel_working) 3264 amdgpu_benchmark(adev, amdgpu_benchmarking); 3265 else 3266 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); 3267 } 3268 3269 /* 3270 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. 3271 * Otherwise the mgpu fan boost feature will be skipped due to the 3272 * gpu instance is counted less. 3273 */ 3274 amdgpu_register_gpu_instance(adev); 3275 3276 /* enable clockgating, etc. after ib tests, etc. since some blocks require 3277 * explicit gating rather than handling it automatically. 3278 */ 3279 r = amdgpu_device_ip_late_init(adev); 3280 if (r) { 3281 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 3282 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 3283 goto failed; 3284 } 3285 3286 /* must succeed. */ 3287 amdgpu_ras_resume(adev); 3288 3289 queue_delayed_work(system_wq, &adev->delayed_init_work, 3290 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3291 3292 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); 3293 if (r) { 3294 dev_err(adev->dev, "Could not create amdgpu device attr\n"); 3295 return r; 3296 } 3297 3298 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 3299 r = amdgpu_pmu_init(adev); 3300 if (r) 3301 dev_err(adev->dev, "amdgpu_pmu_init failed\n"); 3302 3303 return 0; 3304 3305 failed: 3306 amdgpu_vf_error_trans_all(adev); 3307 if (boco) 3308 vga_switcheroo_fini_domain_pm_ops(adev->dev); 3309 3310 return r; 3311 } 3312 3313 /** 3314 * amdgpu_device_fini - tear down the driver 3315 * 3316 * @adev: amdgpu_device pointer 3317 * 3318 * Tear down the driver info (all asics). 3319 * Called at driver shutdown. 3320 */ 3321 void amdgpu_device_fini(struct amdgpu_device *adev) 3322 { 3323 int r; 3324 3325 DRM_INFO("amdgpu: finishing device.\n"); 3326 flush_delayed_work(&adev->delayed_init_work); 3327 adev->shutdown = true; 3328 3329 /* make sure IB test finished before entering exclusive mode 3330 * to avoid preemption on IB test 3331 * */ 3332 if (amdgpu_sriov_vf(adev)) 3333 amdgpu_virt_request_full_gpu(adev, false); 3334 3335 /* disable all interrupts */ 3336 amdgpu_irq_disable_all(adev); 3337 if (adev->mode_info.mode_config_initialized){ 3338 if (!amdgpu_device_has_dc_support(adev)) 3339 drm_helper_force_disable_all(adev->ddev); 3340 else 3341 drm_atomic_helper_shutdown(adev->ddev); 3342 } 3343 amdgpu_fence_driver_fini(adev); 3344 if (adev->pm_sysfs_en) 3345 amdgpu_pm_sysfs_fini(adev); 3346 amdgpu_fbdev_fini(adev); 3347 r = amdgpu_device_ip_fini(adev); 3348 if (adev->firmware.gpu_info_fw) { 3349 release_firmware(adev->firmware.gpu_info_fw); 3350 adev->firmware.gpu_info_fw = NULL; 3351 } 3352 adev->accel_working = false; 3353 /* free i2c buses */ 3354 if (!amdgpu_device_has_dc_support(adev)) 3355 amdgpu_i2c_fini(adev); 3356 3357 if (amdgpu_emu_mode != 1) 3358 amdgpu_atombios_fini(adev); 3359 3360 kfree(adev->bios); 3361 adev->bios = NULL; 3362 if (amdgpu_has_atpx() && 3363 (amdgpu_is_atpx_hybrid() || 3364 amdgpu_has_atpx_dgpu_power_cntl()) && 3365 !pci_is_thunderbolt_attached(adev->pdev)) 3366 vga_switcheroo_unregister_client(adev->pdev); 3367 if (amdgpu_device_supports_boco(adev->ddev)) 3368 vga_switcheroo_fini_domain_pm_ops(adev->dev); 3369 vga_client_register(adev->pdev, NULL, NULL, NULL); 3370 if (adev->rio_mem) 3371 pci_iounmap(adev->pdev, adev->rio_mem); 3372 adev->rio_mem = NULL; 3373 iounmap(adev->rmmio); 3374 adev->rmmio = NULL; 3375 amdgpu_device_doorbell_fini(adev); 3376 3377 if (adev->ucode_sysfs_en) 3378 amdgpu_ucode_sysfs_fini(adev); 3379 3380 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); 3381 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 3382 amdgpu_pmu_fini(adev); 3383 if (adev->discovery_bin) 3384 amdgpu_discovery_fini(adev); 3385 } 3386 3387 3388 /* 3389 * Suspend & resume. 3390 */ 3391 /** 3392 * amdgpu_device_suspend - initiate device suspend 3393 * 3394 * @dev: drm dev pointer 3395 * @suspend: suspend state 3396 * @fbcon : notify the fbdev of suspend 3397 * 3398 * Puts the hw in the suspend state (all asics). 3399 * Returns 0 for success or an error on failure. 3400 * Called at driver suspend. 3401 */ 3402 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) 3403 { 3404 struct amdgpu_device *adev; 3405 struct drm_crtc *crtc; 3406 struct drm_connector *connector; 3407 struct drm_connector_list_iter iter; 3408 int r; 3409 3410 if (dev == NULL || dev->dev_private == NULL) { 3411 return -ENODEV; 3412 } 3413 3414 adev = dev->dev_private; 3415 3416 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 3417 return 0; 3418 3419 adev->in_suspend = true; 3420 drm_kms_helper_poll_disable(dev); 3421 3422 if (fbcon) 3423 amdgpu_fbdev_set_suspend(adev, 1); 3424 3425 cancel_delayed_work_sync(&adev->delayed_init_work); 3426 3427 if (!amdgpu_device_has_dc_support(adev)) { 3428 /* turn off display hw */ 3429 drm_modeset_lock_all(dev); 3430 drm_connector_list_iter_begin(dev, &iter); 3431 drm_for_each_connector_iter(connector, &iter) 3432 drm_helper_connector_dpms(connector, 3433 DRM_MODE_DPMS_OFF); 3434 drm_connector_list_iter_end(&iter); 3435 drm_modeset_unlock_all(dev); 3436 /* unpin the front buffers and cursors */ 3437 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 3438 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 3439 struct drm_framebuffer *fb = crtc->primary->fb; 3440 struct amdgpu_bo *robj; 3441 3442 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { 3443 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 3444 r = amdgpu_bo_reserve(aobj, true); 3445 if (r == 0) { 3446 amdgpu_bo_unpin(aobj); 3447 amdgpu_bo_unreserve(aobj); 3448 } 3449 } 3450 3451 if (fb == NULL || fb->obj[0] == NULL) { 3452 continue; 3453 } 3454 robj = gem_to_amdgpu_bo(fb->obj[0]); 3455 /* don't unpin kernel fb objects */ 3456 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { 3457 r = amdgpu_bo_reserve(robj, true); 3458 if (r == 0) { 3459 amdgpu_bo_unpin(robj); 3460 amdgpu_bo_unreserve(robj); 3461 } 3462 } 3463 } 3464 } 3465 3466 amdgpu_ras_suspend(adev); 3467 3468 r = amdgpu_device_ip_suspend_phase1(adev); 3469 3470 amdgpu_amdkfd_suspend(adev, !fbcon); 3471 3472 /* evict vram memory */ 3473 amdgpu_bo_evict_vram(adev); 3474 3475 amdgpu_fence_driver_suspend(adev); 3476 3477 r = amdgpu_device_ip_suspend_phase2(adev); 3478 3479 /* evict remaining vram memory 3480 * This second call to evict vram is to evict the gart page table 3481 * using the CPU. 3482 */ 3483 amdgpu_bo_evict_vram(adev); 3484 3485 return 0; 3486 } 3487 3488 /** 3489 * amdgpu_device_resume - initiate device resume 3490 * 3491 * @dev: drm dev pointer 3492 * @resume: resume state 3493 * @fbcon : notify the fbdev of resume 3494 * 3495 * Bring the hw back to operating state (all asics). 3496 * Returns 0 for success or an error on failure. 3497 * Called at driver resume. 3498 */ 3499 int amdgpu_device_resume(struct drm_device *dev, bool fbcon) 3500 { 3501 struct drm_connector *connector; 3502 struct drm_connector_list_iter iter; 3503 struct amdgpu_device *adev = dev->dev_private; 3504 struct drm_crtc *crtc; 3505 int r = 0; 3506 3507 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 3508 return 0; 3509 3510 /* post card */ 3511 if (amdgpu_device_need_post(adev)) { 3512 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 3513 if (r) 3514 DRM_ERROR("amdgpu asic init failed\n"); 3515 } 3516 3517 r = amdgpu_device_ip_resume(adev); 3518 if (r) { 3519 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r); 3520 return r; 3521 } 3522 amdgpu_fence_driver_resume(adev); 3523 3524 3525 r = amdgpu_device_ip_late_init(adev); 3526 if (r) 3527 return r; 3528 3529 queue_delayed_work(system_wq, &adev->delayed_init_work, 3530 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3531 3532 if (!amdgpu_device_has_dc_support(adev)) { 3533 /* pin cursors */ 3534 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 3535 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 3536 3537 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { 3538 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 3539 r = amdgpu_bo_reserve(aobj, true); 3540 if (r == 0) { 3541 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 3542 if (r != 0) 3543 DRM_ERROR("Failed to pin cursor BO (%d)\n", r); 3544 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); 3545 amdgpu_bo_unreserve(aobj); 3546 } 3547 } 3548 } 3549 } 3550 r = amdgpu_amdkfd_resume(adev, !fbcon); 3551 if (r) 3552 return r; 3553 3554 /* Make sure IB tests flushed */ 3555 flush_delayed_work(&adev->delayed_init_work); 3556 3557 /* blat the mode back in */ 3558 if (fbcon) { 3559 if (!amdgpu_device_has_dc_support(adev)) { 3560 /* pre DCE11 */ 3561 drm_helper_resume_force_mode(dev); 3562 3563 /* turn on display hw */ 3564 drm_modeset_lock_all(dev); 3565 3566 drm_connector_list_iter_begin(dev, &iter); 3567 drm_for_each_connector_iter(connector, &iter) 3568 drm_helper_connector_dpms(connector, 3569 DRM_MODE_DPMS_ON); 3570 drm_connector_list_iter_end(&iter); 3571 3572 drm_modeset_unlock_all(dev); 3573 } 3574 amdgpu_fbdev_set_suspend(adev, 0); 3575 } 3576 3577 drm_kms_helper_poll_enable(dev); 3578 3579 amdgpu_ras_resume(adev); 3580 3581 /* 3582 * Most of the connector probing functions try to acquire runtime pm 3583 * refs to ensure that the GPU is powered on when connector polling is 3584 * performed. Since we're calling this from a runtime PM callback, 3585 * trying to acquire rpm refs will cause us to deadlock. 3586 * 3587 * Since we're guaranteed to be holding the rpm lock, it's safe to 3588 * temporarily disable the rpm helpers so this doesn't deadlock us. 3589 */ 3590 #ifdef CONFIG_PM 3591 dev->dev->power.disable_depth++; 3592 #endif 3593 if (!amdgpu_device_has_dc_support(adev)) 3594 drm_helper_hpd_irq_event(dev); 3595 else 3596 drm_kms_helper_hotplug_event(dev); 3597 #ifdef CONFIG_PM 3598 dev->dev->power.disable_depth--; 3599 #endif 3600 adev->in_suspend = false; 3601 3602 return 0; 3603 } 3604 3605 /** 3606 * amdgpu_device_ip_check_soft_reset - did soft reset succeed 3607 * 3608 * @adev: amdgpu_device pointer 3609 * 3610 * The list of all the hardware IPs that make up the asic is walked and 3611 * the check_soft_reset callbacks are run. check_soft_reset determines 3612 * if the asic is still hung or not. 3613 * Returns true if any of the IPs are still in a hung state, false if not. 3614 */ 3615 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 3616 { 3617 int i; 3618 bool asic_hang = false; 3619 3620 if (amdgpu_sriov_vf(adev)) 3621 return true; 3622 3623 if (amdgpu_asic_need_full_reset(adev)) 3624 return true; 3625 3626 for (i = 0; i < adev->num_ip_blocks; i++) { 3627 if (!adev->ip_blocks[i].status.valid) 3628 continue; 3629 if (adev->ip_blocks[i].version->funcs->check_soft_reset) 3630 adev->ip_blocks[i].status.hang = 3631 adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 3632 if (adev->ip_blocks[i].status.hang) { 3633 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 3634 asic_hang = true; 3635 } 3636 } 3637 return asic_hang; 3638 } 3639 3640 /** 3641 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 3642 * 3643 * @adev: amdgpu_device pointer 3644 * 3645 * The list of all the hardware IPs that make up the asic is walked and the 3646 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 3647 * handles any IP specific hardware or software state changes that are 3648 * necessary for a soft reset to succeed. 3649 * Returns 0 on success, negative error code on failure. 3650 */ 3651 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 3652 { 3653 int i, r = 0; 3654 3655 for (i = 0; i < adev->num_ip_blocks; i++) { 3656 if (!adev->ip_blocks[i].status.valid) 3657 continue; 3658 if (adev->ip_blocks[i].status.hang && 3659 adev->ip_blocks[i].version->funcs->pre_soft_reset) { 3660 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 3661 if (r) 3662 return r; 3663 } 3664 } 3665 3666 return 0; 3667 } 3668 3669 /** 3670 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 3671 * 3672 * @adev: amdgpu_device pointer 3673 * 3674 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 3675 * reset is necessary to recover. 3676 * Returns true if a full asic reset is required, false if not. 3677 */ 3678 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 3679 { 3680 int i; 3681 3682 if (amdgpu_asic_need_full_reset(adev)) 3683 return true; 3684 3685 for (i = 0; i < adev->num_ip_blocks; i++) { 3686 if (!adev->ip_blocks[i].status.valid) 3687 continue; 3688 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 3689 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 3690 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 3691 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 3692 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 3693 if (adev->ip_blocks[i].status.hang) { 3694 DRM_INFO("Some block need full reset!\n"); 3695 return true; 3696 } 3697 } 3698 } 3699 return false; 3700 } 3701 3702 /** 3703 * amdgpu_device_ip_soft_reset - do a soft reset 3704 * 3705 * @adev: amdgpu_device pointer 3706 * 3707 * The list of all the hardware IPs that make up the asic is walked and the 3708 * soft_reset callbacks are run if the block is hung. soft_reset handles any 3709 * IP specific hardware or software state changes that are necessary to soft 3710 * reset the IP. 3711 * Returns 0 on success, negative error code on failure. 3712 */ 3713 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 3714 { 3715 int i, r = 0; 3716 3717 for (i = 0; i < adev->num_ip_blocks; i++) { 3718 if (!adev->ip_blocks[i].status.valid) 3719 continue; 3720 if (adev->ip_blocks[i].status.hang && 3721 adev->ip_blocks[i].version->funcs->soft_reset) { 3722 r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 3723 if (r) 3724 return r; 3725 } 3726 } 3727 3728 return 0; 3729 } 3730 3731 /** 3732 * amdgpu_device_ip_post_soft_reset - clean up from soft reset 3733 * 3734 * @adev: amdgpu_device pointer 3735 * 3736 * The list of all the hardware IPs that make up the asic is walked and the 3737 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 3738 * handles any IP specific hardware or software state changes that are 3739 * necessary after the IP has been soft reset. 3740 * Returns 0 on success, negative error code on failure. 3741 */ 3742 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 3743 { 3744 int i, r = 0; 3745 3746 for (i = 0; i < adev->num_ip_blocks; i++) { 3747 if (!adev->ip_blocks[i].status.valid) 3748 continue; 3749 if (adev->ip_blocks[i].status.hang && 3750 adev->ip_blocks[i].version->funcs->post_soft_reset) 3751 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 3752 if (r) 3753 return r; 3754 } 3755 3756 return 0; 3757 } 3758 3759 /** 3760 * amdgpu_device_recover_vram - Recover some VRAM contents 3761 * 3762 * @adev: amdgpu_device pointer 3763 * 3764 * Restores the contents of VRAM buffers from the shadows in GTT. Used to 3765 * restore things like GPUVM page tables after a GPU reset where 3766 * the contents of VRAM might be lost. 3767 * 3768 * Returns: 3769 * 0 on success, negative error code on failure. 3770 */ 3771 static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 3772 { 3773 struct dma_fence *fence = NULL, *next = NULL; 3774 struct amdgpu_bo *shadow; 3775 long r = 1, tmo; 3776 3777 if (amdgpu_sriov_runtime(adev)) 3778 tmo = msecs_to_jiffies(8000); 3779 else 3780 tmo = msecs_to_jiffies(100); 3781 3782 DRM_INFO("recover vram bo from shadow start\n"); 3783 mutex_lock(&adev->shadow_list_lock); 3784 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) { 3785 3786 /* No need to recover an evicted BO */ 3787 if (shadow->tbo.mem.mem_type != TTM_PL_TT || 3788 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET || 3789 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) 3790 continue; 3791 3792 r = amdgpu_bo_restore_shadow(shadow, &next); 3793 if (r) 3794 break; 3795 3796 if (fence) { 3797 tmo = dma_fence_wait_timeout(fence, false, tmo); 3798 dma_fence_put(fence); 3799 fence = next; 3800 if (tmo == 0) { 3801 r = -ETIMEDOUT; 3802 break; 3803 } else if (tmo < 0) { 3804 r = tmo; 3805 break; 3806 } 3807 } else { 3808 fence = next; 3809 } 3810 } 3811 mutex_unlock(&adev->shadow_list_lock); 3812 3813 if (fence) 3814 tmo = dma_fence_wait_timeout(fence, false, tmo); 3815 dma_fence_put(fence); 3816 3817 if (r < 0 || tmo <= 0) { 3818 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); 3819 return -EIO; 3820 } 3821 3822 DRM_INFO("recover vram bo from shadow done\n"); 3823 return 0; 3824 } 3825 3826 3827 /** 3828 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 3829 * 3830 * @adev: amdgpu device pointer 3831 * @from_hypervisor: request from hypervisor 3832 * 3833 * do VF FLR and reinitialize Asic 3834 * return 0 means succeeded otherwise failed 3835 */ 3836 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 3837 bool from_hypervisor) 3838 { 3839 int r; 3840 3841 if (from_hypervisor) 3842 r = amdgpu_virt_request_full_gpu(adev, true); 3843 else 3844 r = amdgpu_virt_reset_gpu(adev); 3845 if (r) 3846 return r; 3847 3848 amdgpu_amdkfd_pre_reset(adev); 3849 3850 /* Resume IP prior to SMC */ 3851 r = amdgpu_device_ip_reinit_early_sriov(adev); 3852 if (r) 3853 goto error; 3854 3855 amdgpu_virt_init_data_exchange(adev); 3856 /* we need recover gart prior to run SMC/CP/SDMA resume */ 3857 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); 3858 3859 r = amdgpu_device_fw_loading(adev); 3860 if (r) 3861 return r; 3862 3863 /* now we are okay to resume SMC/CP/SDMA */ 3864 r = amdgpu_device_ip_reinit_late_sriov(adev); 3865 if (r) 3866 goto error; 3867 3868 amdgpu_irq_gpu_reset_resume_helper(adev); 3869 r = amdgpu_ib_ring_tests(adev); 3870 amdgpu_amdkfd_post_reset(adev); 3871 3872 error: 3873 amdgpu_virt_release_full_gpu(adev, true); 3874 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 3875 amdgpu_inc_vram_lost(adev); 3876 r = amdgpu_device_recover_vram(adev); 3877 } 3878 3879 return r; 3880 } 3881 3882 /** 3883 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 3884 * 3885 * @adev: amdgpu device pointer 3886 * 3887 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 3888 * a hung GPU. 3889 */ 3890 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 3891 { 3892 if (!amdgpu_device_ip_check_soft_reset(adev)) { 3893 DRM_INFO("Timeout, but no hardware hang detected.\n"); 3894 return false; 3895 } 3896 3897 if (amdgpu_gpu_recovery == 0) 3898 goto disabled; 3899 3900 if (amdgpu_sriov_vf(adev)) 3901 return true; 3902 3903 if (amdgpu_gpu_recovery == -1) { 3904 switch (adev->asic_type) { 3905 case CHIP_BONAIRE: 3906 case CHIP_HAWAII: 3907 case CHIP_TOPAZ: 3908 case CHIP_TONGA: 3909 case CHIP_FIJI: 3910 case CHIP_POLARIS10: 3911 case CHIP_POLARIS11: 3912 case CHIP_POLARIS12: 3913 case CHIP_VEGAM: 3914 case CHIP_VEGA20: 3915 case CHIP_VEGA10: 3916 case CHIP_VEGA12: 3917 case CHIP_RAVEN: 3918 case CHIP_ARCTURUS: 3919 case CHIP_RENOIR: 3920 case CHIP_NAVI10: 3921 case CHIP_NAVI14: 3922 case CHIP_NAVI12: 3923 break; 3924 default: 3925 goto disabled; 3926 } 3927 } 3928 3929 return true; 3930 3931 disabled: 3932 DRM_INFO("GPU recovery disabled.\n"); 3933 return false; 3934 } 3935 3936 3937 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, 3938 struct amdgpu_job *job, 3939 bool *need_full_reset_arg) 3940 { 3941 int i, r = 0; 3942 bool need_full_reset = *need_full_reset_arg; 3943 3944 amdgpu_debugfs_wait_dump(adev); 3945 3946 /* block all schedulers and reset given job's ring */ 3947 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3948 struct amdgpu_ring *ring = adev->rings[i]; 3949 3950 if (!ring || !ring->sched.thread) 3951 continue; 3952 3953 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 3954 amdgpu_fence_driver_force_completion(ring); 3955 } 3956 3957 if(job) 3958 drm_sched_increase_karma(&job->base); 3959 3960 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ 3961 if (!amdgpu_sriov_vf(adev)) { 3962 3963 if (!need_full_reset) 3964 need_full_reset = amdgpu_device_ip_need_full_reset(adev); 3965 3966 if (!need_full_reset) { 3967 amdgpu_device_ip_pre_soft_reset(adev); 3968 r = amdgpu_device_ip_soft_reset(adev); 3969 amdgpu_device_ip_post_soft_reset(adev); 3970 if (r || amdgpu_device_ip_check_soft_reset(adev)) { 3971 DRM_INFO("soft reset failed, will fallback to full reset!\n"); 3972 need_full_reset = true; 3973 } 3974 } 3975 3976 if (need_full_reset) 3977 r = amdgpu_device_ip_suspend(adev); 3978 3979 *need_full_reset_arg = need_full_reset; 3980 } 3981 3982 return r; 3983 } 3984 3985 static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, 3986 struct list_head *device_list_handle, 3987 bool *need_full_reset_arg) 3988 { 3989 struct amdgpu_device *tmp_adev = NULL; 3990 bool need_full_reset = *need_full_reset_arg, vram_lost = false; 3991 int r = 0; 3992 3993 /* 3994 * ASIC reset has to be done on all HGMI hive nodes ASAP 3995 * to allow proper links negotiation in FW (within 1 sec) 3996 */ 3997 if (need_full_reset) { 3998 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 3999 /* For XGMI run all resets in parallel to speed up the process */ 4000 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 4001 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) 4002 r = -EALREADY; 4003 } else 4004 r = amdgpu_asic_reset(tmp_adev); 4005 4006 if (r) { 4007 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s", 4008 r, tmp_adev->ddev->unique); 4009 break; 4010 } 4011 } 4012 4013 /* For XGMI wait for all resets to complete before proceed */ 4014 if (!r) { 4015 list_for_each_entry(tmp_adev, device_list_handle, 4016 gmc.xgmi.head) { 4017 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 4018 flush_work(&tmp_adev->xgmi_reset_work); 4019 r = tmp_adev->asic_reset_res; 4020 if (r) 4021 break; 4022 } 4023 } 4024 } 4025 } 4026 4027 if (!r && amdgpu_ras_intr_triggered()) { 4028 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4029 if (tmp_adev->mmhub.funcs && 4030 tmp_adev->mmhub.funcs->reset_ras_error_count) 4031 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev); 4032 } 4033 4034 amdgpu_ras_intr_cleared(); 4035 } 4036 4037 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4038 if (need_full_reset) { 4039 /* post card */ 4040 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) 4041 DRM_WARN("asic atom init failed!"); 4042 4043 if (!r) { 4044 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); 4045 r = amdgpu_device_ip_resume_phase1(tmp_adev); 4046 if (r) 4047 goto out; 4048 4049 vram_lost = amdgpu_device_check_vram_lost(tmp_adev); 4050 if (vram_lost) { 4051 DRM_INFO("VRAM is lost due to GPU reset!\n"); 4052 amdgpu_inc_vram_lost(tmp_adev); 4053 } 4054 4055 r = amdgpu_gtt_mgr_recover( 4056 &tmp_adev->mman.bdev.man[TTM_PL_TT]); 4057 if (r) 4058 goto out; 4059 4060 r = amdgpu_device_fw_loading(tmp_adev); 4061 if (r) 4062 return r; 4063 4064 r = amdgpu_device_ip_resume_phase2(tmp_adev); 4065 if (r) 4066 goto out; 4067 4068 if (vram_lost) 4069 amdgpu_device_fill_reset_magic(tmp_adev); 4070 4071 /* 4072 * Add this ASIC as tracked as reset was already 4073 * complete successfully. 4074 */ 4075 amdgpu_register_gpu_instance(tmp_adev); 4076 4077 r = amdgpu_device_ip_late_init(tmp_adev); 4078 if (r) 4079 goto out; 4080 4081 amdgpu_fbdev_set_suspend(tmp_adev, 0); 4082 4083 /* must succeed. */ 4084 amdgpu_ras_resume(tmp_adev); 4085 4086 /* Update PSP FW topology after reset */ 4087 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) 4088 r = amdgpu_xgmi_update_topology(hive, tmp_adev); 4089 } 4090 } 4091 4092 4093 out: 4094 if (!r) { 4095 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 4096 r = amdgpu_ib_ring_tests(tmp_adev); 4097 if (r) { 4098 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); 4099 r = amdgpu_device_ip_suspend(tmp_adev); 4100 need_full_reset = true; 4101 r = -EAGAIN; 4102 goto end; 4103 } 4104 } 4105 4106 if (!r) 4107 r = amdgpu_device_recover_vram(tmp_adev); 4108 else 4109 tmp_adev->asic_reset_res = r; 4110 } 4111 4112 end: 4113 *need_full_reset_arg = need_full_reset; 4114 return r; 4115 } 4116 4117 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) 4118 { 4119 if (trylock) { 4120 if (!mutex_trylock(&adev->lock_reset)) 4121 return false; 4122 } else 4123 mutex_lock(&adev->lock_reset); 4124 4125 atomic_inc(&adev->gpu_reset_counter); 4126 adev->in_gpu_reset = true; 4127 switch (amdgpu_asic_reset_method(adev)) { 4128 case AMD_RESET_METHOD_MODE1: 4129 adev->mp1_state = PP_MP1_STATE_SHUTDOWN; 4130 break; 4131 case AMD_RESET_METHOD_MODE2: 4132 adev->mp1_state = PP_MP1_STATE_RESET; 4133 break; 4134 default: 4135 adev->mp1_state = PP_MP1_STATE_NONE; 4136 break; 4137 } 4138 4139 return true; 4140 } 4141 4142 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) 4143 { 4144 amdgpu_vf_error_trans_all(adev); 4145 adev->mp1_state = PP_MP1_STATE_NONE; 4146 adev->in_gpu_reset = false; 4147 mutex_unlock(&adev->lock_reset); 4148 } 4149 4150 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) 4151 { 4152 struct pci_dev *p = NULL; 4153 4154 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 4155 adev->pdev->bus->number, 1); 4156 if (p) { 4157 pm_runtime_enable(&(p->dev)); 4158 pm_runtime_resume(&(p->dev)); 4159 } 4160 } 4161 4162 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) 4163 { 4164 enum amd_reset_method reset_method; 4165 struct pci_dev *p = NULL; 4166 u64 expires; 4167 4168 /* 4169 * For now, only BACO and mode1 reset are confirmed 4170 * to suffer the audio issue without proper suspended. 4171 */ 4172 reset_method = amdgpu_asic_reset_method(adev); 4173 if ((reset_method != AMD_RESET_METHOD_BACO) && 4174 (reset_method != AMD_RESET_METHOD_MODE1)) 4175 return -EINVAL; 4176 4177 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 4178 adev->pdev->bus->number, 1); 4179 if (!p) 4180 return -ENODEV; 4181 4182 expires = pm_runtime_autosuspend_expiration(&(p->dev)); 4183 if (!expires) 4184 /* 4185 * If we cannot get the audio device autosuspend delay, 4186 * a fixed 4S interval will be used. Considering 3S is 4187 * the audio controller default autosuspend delay setting. 4188 * 4S used here is guaranteed to cover that. 4189 */ 4190 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL; 4191 4192 while (!pm_runtime_status_suspended(&(p->dev))) { 4193 if (!pm_runtime_suspend(&(p->dev))) 4194 break; 4195 4196 if (expires < ktime_get_mono_fast_ns()) { 4197 dev_warn(adev->dev, "failed to suspend display audio\n"); 4198 /* TODO: abort the succeeding gpu reset? */ 4199 return -ETIMEDOUT; 4200 } 4201 } 4202 4203 pm_runtime_disable(&(p->dev)); 4204 4205 return 0; 4206 } 4207 4208 /** 4209 * amdgpu_device_gpu_recover - reset the asic and recover scheduler 4210 * 4211 * @adev: amdgpu device pointer 4212 * @job: which job trigger hang 4213 * 4214 * Attempt to reset the GPU if it has hung (all asics). 4215 * Attempt to do soft-reset or full-reset and reinitialize Asic 4216 * Returns 0 for success or an error on failure. 4217 */ 4218 4219 int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 4220 struct amdgpu_job *job) 4221 { 4222 struct list_head device_list, *device_list_handle = NULL; 4223 bool need_full_reset = false; 4224 bool job_signaled = false; 4225 struct amdgpu_hive_info *hive = NULL; 4226 struct amdgpu_device *tmp_adev = NULL; 4227 int i, r = 0; 4228 bool in_ras_intr = amdgpu_ras_intr_triggered(); 4229 bool use_baco = 4230 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? 4231 true : false; 4232 bool audio_suspended = false; 4233 4234 /* 4235 * Flush RAM to disk so that after reboot 4236 * the user can read log and see why the system rebooted. 4237 */ 4238 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) { 4239 4240 DRM_WARN("Emergency reboot."); 4241 4242 ksys_sync_helper(); 4243 emergency_restart(); 4244 } 4245 4246 dev_info(adev->dev, "GPU %s begin!\n", 4247 (in_ras_intr && !use_baco) ? "jobs stop":"reset"); 4248 4249 /* 4250 * Here we trylock to avoid chain of resets executing from 4251 * either trigger by jobs on different adevs in XGMI hive or jobs on 4252 * different schedulers for same device while this TO handler is running. 4253 * We always reset all schedulers for device and all devices for XGMI 4254 * hive so that should take care of them too. 4255 */ 4256 hive = amdgpu_get_xgmi_hive(adev, true); 4257 if (hive && !mutex_trylock(&hive->reset_lock)) { 4258 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", 4259 job ? job->base.id : -1, hive->hive_id); 4260 mutex_unlock(&hive->hive_lock); 4261 return 0; 4262 } 4263 4264 /* 4265 * Build list of devices to reset. 4266 * In case we are in XGMI hive mode, resort the device list 4267 * to put adev in the 1st position. 4268 */ 4269 INIT_LIST_HEAD(&device_list); 4270 if (adev->gmc.xgmi.num_physical_nodes > 1) { 4271 if (!hive) 4272 return -ENODEV; 4273 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list)) 4274 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list); 4275 device_list_handle = &hive->device_list; 4276 } else { 4277 list_add_tail(&adev->gmc.xgmi.head, &device_list); 4278 device_list_handle = &device_list; 4279 } 4280 4281 /* block all schedulers and reset given job's ring */ 4282 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4283 if (!amdgpu_device_lock_adev(tmp_adev, !hive)) { 4284 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", 4285 job ? job->base.id : -1); 4286 mutex_unlock(&hive->hive_lock); 4287 return 0; 4288 } 4289 4290 /* 4291 * Try to put the audio codec into suspend state 4292 * before gpu reset started. 4293 * 4294 * Due to the power domain of the graphics device 4295 * is shared with AZ power domain. Without this, 4296 * we may change the audio hardware from behind 4297 * the audio driver's back. That will trigger 4298 * some audio codec errors. 4299 */ 4300 if (!amdgpu_device_suspend_display_audio(tmp_adev)) 4301 audio_suspended = true; 4302 4303 amdgpu_ras_set_error_query_ready(tmp_adev, false); 4304 4305 cancel_delayed_work_sync(&tmp_adev->delayed_init_work); 4306 4307 if (!amdgpu_sriov_vf(tmp_adev)) 4308 amdgpu_amdkfd_pre_reset(tmp_adev); 4309 4310 /* 4311 * Mark these ASICs to be reseted as untracked first 4312 * And add them back after reset completed 4313 */ 4314 amdgpu_unregister_gpu_instance(tmp_adev); 4315 4316 amdgpu_fbdev_set_suspend(tmp_adev, 1); 4317 4318 /* disable ras on ALL IPs */ 4319 if (!(in_ras_intr && !use_baco) && 4320 amdgpu_device_ip_need_full_reset(tmp_adev)) 4321 amdgpu_ras_suspend(tmp_adev); 4322 4323 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4324 struct amdgpu_ring *ring = tmp_adev->rings[i]; 4325 4326 if (!ring || !ring->sched.thread) 4327 continue; 4328 4329 drm_sched_stop(&ring->sched, job ? &job->base : NULL); 4330 4331 if (in_ras_intr && !use_baco) 4332 amdgpu_job_stop_all_jobs_on_sched(&ring->sched); 4333 } 4334 } 4335 4336 if (in_ras_intr && !use_baco) 4337 goto skip_sched_resume; 4338 4339 /* 4340 * Must check guilty signal here since after this point all old 4341 * HW fences are force signaled. 4342 * 4343 * job->base holds a reference to parent fence 4344 */ 4345 if (job && job->base.s_fence->parent && 4346 dma_fence_is_signaled(job->base.s_fence->parent)) { 4347 job_signaled = true; 4348 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); 4349 goto skip_hw_reset; 4350 } 4351 4352 retry: /* Rest of adevs pre asic reset from XGMI hive. */ 4353 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4354 r = amdgpu_device_pre_asic_reset(tmp_adev, 4355 NULL, 4356 &need_full_reset); 4357 /*TODO Should we stop ?*/ 4358 if (r) { 4359 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", 4360 r, tmp_adev->ddev->unique); 4361 tmp_adev->asic_reset_res = r; 4362 } 4363 } 4364 4365 /* Actual ASIC resets if needed.*/ 4366 /* TODO Implement XGMI hive reset logic for SRIOV */ 4367 if (amdgpu_sriov_vf(adev)) { 4368 r = amdgpu_device_reset_sriov(adev, job ? false : true); 4369 if (r) 4370 adev->asic_reset_res = r; 4371 } else { 4372 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset); 4373 if (r && r == -EAGAIN) 4374 goto retry; 4375 } 4376 4377 skip_hw_reset: 4378 4379 /* Post ASIC reset for all devs .*/ 4380 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4381 4382 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4383 struct amdgpu_ring *ring = tmp_adev->rings[i]; 4384 4385 if (!ring || !ring->sched.thread) 4386 continue; 4387 4388 /* No point to resubmit jobs if we didn't HW reset*/ 4389 if (!tmp_adev->asic_reset_res && !job_signaled) 4390 drm_sched_resubmit_jobs(&ring->sched); 4391 4392 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); 4393 } 4394 4395 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) { 4396 drm_helper_resume_force_mode(tmp_adev->ddev); 4397 } 4398 4399 tmp_adev->asic_reset_res = 0; 4400 4401 if (r) { 4402 /* bad news, how to tell it to userspace ? */ 4403 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); 4404 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 4405 } else { 4406 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); 4407 } 4408 } 4409 4410 skip_sched_resume: 4411 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4412 /*unlock kfd: SRIOV would do it separately */ 4413 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) 4414 amdgpu_amdkfd_post_reset(tmp_adev); 4415 if (audio_suspended) 4416 amdgpu_device_resume_display_audio(tmp_adev); 4417 amdgpu_device_unlock_adev(tmp_adev); 4418 } 4419 4420 if (hive) { 4421 mutex_unlock(&hive->reset_lock); 4422 mutex_unlock(&hive->hive_lock); 4423 } 4424 4425 if (r) 4426 dev_info(adev->dev, "GPU reset end with ret = %d\n", r); 4427 return r; 4428 } 4429 4430 /** 4431 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 4432 * 4433 * @adev: amdgpu_device pointer 4434 * 4435 * Fetchs and stores in the driver the PCIE capabilities (gen speed 4436 * and lanes) of the slot the device is in. Handles APUs and 4437 * virtualized environments where PCIE config space may not be available. 4438 */ 4439 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 4440 { 4441 struct pci_dev *pdev; 4442 enum pci_bus_speed speed_cap, platform_speed_cap; 4443 enum pcie_link_width platform_link_width; 4444 4445 if (amdgpu_pcie_gen_cap) 4446 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 4447 4448 if (amdgpu_pcie_lane_cap) 4449 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 4450 4451 /* covers APUs as well */ 4452 if (pci_is_root_bus(adev->pdev->bus)) { 4453 if (adev->pm.pcie_gen_mask == 0) 4454 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 4455 if (adev->pm.pcie_mlw_mask == 0) 4456 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 4457 return; 4458 } 4459 4460 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) 4461 return; 4462 4463 pcie_bandwidth_available(adev->pdev, NULL, 4464 &platform_speed_cap, &platform_link_width); 4465 4466 if (adev->pm.pcie_gen_mask == 0) { 4467 /* asic caps */ 4468 pdev = adev->pdev; 4469 speed_cap = pcie_get_speed_cap(pdev); 4470 if (speed_cap == PCI_SPEED_UNKNOWN) { 4471 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4472 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4473 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 4474 } else { 4475 if (speed_cap == PCIE_SPEED_16_0GT) 4476 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4477 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4478 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 4479 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 4480 else if (speed_cap == PCIE_SPEED_8_0GT) 4481 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4482 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4483 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 4484 else if (speed_cap == PCIE_SPEED_5_0GT) 4485 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4486 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 4487 else 4488 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 4489 } 4490 /* platform caps */ 4491 if (platform_speed_cap == PCI_SPEED_UNKNOWN) { 4492 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4493 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 4494 } else { 4495 if (platform_speed_cap == PCIE_SPEED_16_0GT) 4496 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4497 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4498 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 4499 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 4500 else if (platform_speed_cap == PCIE_SPEED_8_0GT) 4501 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4502 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4503 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 4504 else if (platform_speed_cap == PCIE_SPEED_5_0GT) 4505 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4506 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 4507 else 4508 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 4509 4510 } 4511 } 4512 if (adev->pm.pcie_mlw_mask == 0) { 4513 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { 4514 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 4515 } else { 4516 switch (platform_link_width) { 4517 case PCIE_LNK_X32: 4518 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 4519 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 4520 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 4521 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4522 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4524 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4525 break; 4526 case PCIE_LNK_X16: 4527 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 4528 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 4529 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4530 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4531 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4532 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4533 break; 4534 case PCIE_LNK_X12: 4535 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 4536 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4537 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4538 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4539 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4540 break; 4541 case PCIE_LNK_X8: 4542 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4543 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4544 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4545 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4546 break; 4547 case PCIE_LNK_X4: 4548 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4549 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4550 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4551 break; 4552 case PCIE_LNK_X2: 4553 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4554 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4555 break; 4556 case PCIE_LNK_X1: 4557 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 4558 break; 4559 default: 4560 break; 4561 } 4562 } 4563 } 4564 } 4565 4566 int amdgpu_device_baco_enter(struct drm_device *dev) 4567 { 4568 struct amdgpu_device *adev = dev->dev_private; 4569 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 4570 4571 if (!amdgpu_device_supports_baco(adev->ddev)) 4572 return -ENOTSUPP; 4573 4574 if (ras && ras->supported) 4575 adev->nbio.funcs->enable_doorbell_interrupt(adev, false); 4576 4577 return amdgpu_dpm_baco_enter(adev); 4578 } 4579 4580 int amdgpu_device_baco_exit(struct drm_device *dev) 4581 { 4582 struct amdgpu_device *adev = dev->dev_private; 4583 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 4584 int ret = 0; 4585 4586 if (!amdgpu_device_supports_baco(adev->ddev)) 4587 return -ENOTSUPP; 4588 4589 ret = amdgpu_dpm_baco_exit(adev); 4590 if (ret) 4591 return ret; 4592 4593 if (ras && ras->supported) 4594 adev->nbio.funcs->enable_doorbell_interrupt(adev, true); 4595 4596 return 0; 4597 } 4598