1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/power_supply.h> 29 #include <linux/kthread.h> 30 #include <linux/console.h> 31 #include <linux/slab.h> 32 #include <drm/drmP.h> 33 #include <drm/drm_crtc_helper.h> 34 #include <drm/drm_atomic_helper.h> 35 #include <drm/amdgpu_drm.h> 36 #include <linux/vgaarb.h> 37 #include <linux/vga_switcheroo.h> 38 #include <linux/efi.h> 39 #include "amdgpu.h" 40 #include "amdgpu_trace.h" 41 #include "amdgpu_i2c.h" 42 #include "atom.h" 43 #include "amdgpu_atombios.h" 44 #include "amdgpu_atomfirmware.h" 45 #include "amd_pcie.h" 46 #ifdef CONFIG_DRM_AMDGPU_SI 47 #include "si.h" 48 #endif 49 #ifdef CONFIG_DRM_AMDGPU_CIK 50 #include "cik.h" 51 #endif 52 #include "vi.h" 53 #include "soc15.h" 54 #include "bif/bif_4_1_d.h" 55 #include <linux/pci.h> 56 #include <linux/firmware.h> 57 #include "amdgpu_vf_error.h" 58 59 #include "amdgpu_amdkfd.h" 60 #include "amdgpu_pm.h" 61 62 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 63 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 64 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 65 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 66 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 67 68 #define AMDGPU_RESUME_MS 2000 69 70 static const char *amdgpu_asic_name[] = { 71 "TAHITI", 72 "PITCAIRN", 73 "VERDE", 74 "OLAND", 75 "HAINAN", 76 "BONAIRE", 77 "KAVERI", 78 "KABINI", 79 "HAWAII", 80 "MULLINS", 81 "TOPAZ", 82 "TONGA", 83 "FIJI", 84 "CARRIZO", 85 "STONEY", 86 "POLARIS10", 87 "POLARIS11", 88 "POLARIS12", 89 "VEGAM", 90 "VEGA10", 91 "VEGA12", 92 "VEGA20", 93 "RAVEN", 94 "LAST", 95 }; 96 97 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 98 99 /** 100 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control 101 * 102 * @dev: drm_device pointer 103 * 104 * Returns true if the device is a dGPU with HG/PX power control, 105 * otherwise return false. 106 */ 107 bool amdgpu_device_is_px(struct drm_device *dev) 108 { 109 struct amdgpu_device *adev = dev->dev_private; 110 111 if (adev->flags & AMD_IS_PX) 112 return true; 113 return false; 114 } 115 116 /* 117 * MMIO register access helper functions. 118 */ 119 /** 120 * amdgpu_mm_rreg - read a memory mapped IO register 121 * 122 * @adev: amdgpu_device pointer 123 * @reg: dword aligned register offset 124 * @acc_flags: access flags which require special behavior 125 * 126 * Returns the 32 bit value from the offset specified. 127 */ 128 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, 129 uint32_t acc_flags) 130 { 131 uint32_t ret; 132 133 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 134 return amdgpu_virt_kiq_rreg(adev, reg); 135 136 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) 137 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 138 else { 139 unsigned long flags; 140 141 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 142 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); 143 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); 144 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 145 } 146 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret); 147 return ret; 148 } 149 150 /* 151 * MMIO register read with bytes helper functions 152 * @offset:bytes offset from MMIO start 153 * 154 */ 155 156 /** 157 * amdgpu_mm_rreg8 - read a memory mapped IO register 158 * 159 * @adev: amdgpu_device pointer 160 * @offset: byte aligned register offset 161 * 162 * Returns the 8 bit value from the offset specified. 163 */ 164 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { 165 if (offset < adev->rmmio_size) 166 return (readb(adev->rmmio + offset)); 167 BUG(); 168 } 169 170 /* 171 * MMIO register write with bytes helper functions 172 * @offset:bytes offset from MMIO start 173 * @value: the value want to be written to the register 174 * 175 */ 176 /** 177 * amdgpu_mm_wreg8 - read a memory mapped IO register 178 * 179 * @adev: amdgpu_device pointer 180 * @offset: byte aligned register offset 181 * @value: 8 bit value to write 182 * 183 * Writes the value specified to the offset specified. 184 */ 185 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { 186 if (offset < adev->rmmio_size) 187 writeb(value, adev->rmmio + offset); 188 else 189 BUG(); 190 } 191 192 /** 193 * amdgpu_mm_wreg - write to a memory mapped IO register 194 * 195 * @adev: amdgpu_device pointer 196 * @reg: dword aligned register offset 197 * @v: 32 bit value to write to the register 198 * @acc_flags: access flags which require special behavior 199 * 200 * Writes the value specified to the offset specified. 201 */ 202 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 203 uint32_t acc_flags) 204 { 205 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); 206 207 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 208 adev->last_mm_index = v; 209 } 210 211 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 212 return amdgpu_virt_kiq_wreg(adev, reg, v); 213 214 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) 215 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 216 else { 217 unsigned long flags; 218 219 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 220 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); 221 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); 222 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 223 } 224 225 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { 226 udelay(500); 227 } 228 } 229 230 /** 231 * amdgpu_io_rreg - read an IO register 232 * 233 * @adev: amdgpu_device pointer 234 * @reg: dword aligned register offset 235 * 236 * Returns the 32 bit value from the offset specified. 237 */ 238 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) 239 { 240 if ((reg * 4) < adev->rio_mem_size) 241 return ioread32(adev->rio_mem + (reg * 4)); 242 else { 243 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 244 return ioread32(adev->rio_mem + (mmMM_DATA * 4)); 245 } 246 } 247 248 /** 249 * amdgpu_io_wreg - write to an IO register 250 * 251 * @adev: amdgpu_device pointer 252 * @reg: dword aligned register offset 253 * @v: 32 bit value to write to the register 254 * 255 * Writes the value specified to the offset specified. 256 */ 257 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) 258 { 259 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 260 adev->last_mm_index = v; 261 } 262 263 if ((reg * 4) < adev->rio_mem_size) 264 iowrite32(v, adev->rio_mem + (reg * 4)); 265 else { 266 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 267 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); 268 } 269 270 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { 271 udelay(500); 272 } 273 } 274 275 /** 276 * amdgpu_mm_rdoorbell - read a doorbell dword 277 * 278 * @adev: amdgpu_device pointer 279 * @index: doorbell index 280 * 281 * Returns the value in the doorbell aperture at the 282 * requested doorbell index (CIK). 283 */ 284 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) 285 { 286 if (index < adev->doorbell.num_doorbells) { 287 return readl(adev->doorbell.ptr + index); 288 } else { 289 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 290 return 0; 291 } 292 } 293 294 /** 295 * amdgpu_mm_wdoorbell - write a doorbell dword 296 * 297 * @adev: amdgpu_device pointer 298 * @index: doorbell index 299 * @v: value to write 300 * 301 * Writes @v to the doorbell aperture at the 302 * requested doorbell index (CIK). 303 */ 304 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) 305 { 306 if (index < adev->doorbell.num_doorbells) { 307 writel(v, adev->doorbell.ptr + index); 308 } else { 309 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 310 } 311 } 312 313 /** 314 * amdgpu_mm_rdoorbell64 - read a doorbell Qword 315 * 316 * @adev: amdgpu_device pointer 317 * @index: doorbell index 318 * 319 * Returns the value in the doorbell aperture at the 320 * requested doorbell index (VEGA10+). 321 */ 322 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) 323 { 324 if (index < adev->doorbell.num_doorbells) { 325 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); 326 } else { 327 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 328 return 0; 329 } 330 } 331 332 /** 333 * amdgpu_mm_wdoorbell64 - write a doorbell Qword 334 * 335 * @adev: amdgpu_device pointer 336 * @index: doorbell index 337 * @v: value to write 338 * 339 * Writes @v to the doorbell aperture at the 340 * requested doorbell index (VEGA10+). 341 */ 342 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) 343 { 344 if (index < adev->doorbell.num_doorbells) { 345 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); 346 } else { 347 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 348 } 349 } 350 351 /** 352 * amdgpu_invalid_rreg - dummy reg read function 353 * 354 * @adev: amdgpu device pointer 355 * @reg: offset of register 356 * 357 * Dummy register read function. Used for register blocks 358 * that certain asics don't have (all asics). 359 * Returns the value in the register. 360 */ 361 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 362 { 363 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 364 BUG(); 365 return 0; 366 } 367 368 /** 369 * amdgpu_invalid_wreg - dummy reg write function 370 * 371 * @adev: amdgpu device pointer 372 * @reg: offset of register 373 * @v: value to write to the register 374 * 375 * Dummy register read function. Used for register blocks 376 * that certain asics don't have (all asics). 377 */ 378 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 379 { 380 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 381 reg, v); 382 BUG(); 383 } 384 385 /** 386 * amdgpu_block_invalid_rreg - dummy reg read function 387 * 388 * @adev: amdgpu device pointer 389 * @block: offset of instance 390 * @reg: offset of register 391 * 392 * Dummy register read function. Used for register blocks 393 * that certain asics don't have (all asics). 394 * Returns the value in the register. 395 */ 396 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 397 uint32_t block, uint32_t reg) 398 { 399 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 400 reg, block); 401 BUG(); 402 return 0; 403 } 404 405 /** 406 * amdgpu_block_invalid_wreg - dummy reg write function 407 * 408 * @adev: amdgpu device pointer 409 * @block: offset of instance 410 * @reg: offset of register 411 * @v: value to write to the register 412 * 413 * Dummy register read function. Used for register blocks 414 * that certain asics don't have (all asics). 415 */ 416 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 417 uint32_t block, 418 uint32_t reg, uint32_t v) 419 { 420 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 421 reg, block, v); 422 BUG(); 423 } 424 425 /** 426 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page 427 * 428 * @adev: amdgpu device pointer 429 * 430 * Allocates a scratch page of VRAM for use by various things in the 431 * driver. 432 */ 433 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) 434 { 435 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, 436 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 437 &adev->vram_scratch.robj, 438 &adev->vram_scratch.gpu_addr, 439 (void **)&adev->vram_scratch.ptr); 440 } 441 442 /** 443 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page 444 * 445 * @adev: amdgpu device pointer 446 * 447 * Frees the VRAM scratch page. 448 */ 449 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) 450 { 451 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); 452 } 453 454 /** 455 * amdgpu_device_program_register_sequence - program an array of registers. 456 * 457 * @adev: amdgpu_device pointer 458 * @registers: pointer to the register array 459 * @array_size: size of the register array 460 * 461 * Programs an array or registers with and and or masks. 462 * This is a helper for setting golden registers. 463 */ 464 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 465 const u32 *registers, 466 const u32 array_size) 467 { 468 u32 tmp, reg, and_mask, or_mask; 469 int i; 470 471 if (array_size % 3) 472 return; 473 474 for (i = 0; i < array_size; i +=3) { 475 reg = registers[i + 0]; 476 and_mask = registers[i + 1]; 477 or_mask = registers[i + 2]; 478 479 if (and_mask == 0xffffffff) { 480 tmp = or_mask; 481 } else { 482 tmp = RREG32(reg); 483 tmp &= ~and_mask; 484 tmp |= or_mask; 485 } 486 WREG32(reg, tmp); 487 } 488 } 489 490 /** 491 * amdgpu_device_pci_config_reset - reset the GPU 492 * 493 * @adev: amdgpu_device pointer 494 * 495 * Resets the GPU using the pci config reset sequence. 496 * Only applicable to asics prior to vega10. 497 */ 498 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 499 { 500 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 501 } 502 503 /* 504 * GPU doorbell aperture helpers function. 505 */ 506 /** 507 * amdgpu_device_doorbell_init - Init doorbell driver information. 508 * 509 * @adev: amdgpu_device pointer 510 * 511 * Init doorbell driver information (CIK) 512 * Returns 0 on success, error on failure. 513 */ 514 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) 515 { 516 /* No doorbell on SI hardware generation */ 517 if (adev->asic_type < CHIP_BONAIRE) { 518 adev->doorbell.base = 0; 519 adev->doorbell.size = 0; 520 adev->doorbell.num_doorbells = 0; 521 adev->doorbell.ptr = NULL; 522 return 0; 523 } 524 525 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) 526 return -EINVAL; 527 528 /* doorbell bar mapping */ 529 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 530 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 531 532 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), 533 AMDGPU_DOORBELL_MAX_ASSIGNMENT+1); 534 if (adev->doorbell.num_doorbells == 0) 535 return -EINVAL; 536 537 adev->doorbell.ptr = ioremap(adev->doorbell.base, 538 adev->doorbell.num_doorbells * 539 sizeof(u32)); 540 if (adev->doorbell.ptr == NULL) 541 return -ENOMEM; 542 543 return 0; 544 } 545 546 /** 547 * amdgpu_device_doorbell_fini - Tear down doorbell driver information. 548 * 549 * @adev: amdgpu_device pointer 550 * 551 * Tear down doorbell driver information (CIK) 552 */ 553 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) 554 { 555 iounmap(adev->doorbell.ptr); 556 adev->doorbell.ptr = NULL; 557 } 558 559 560 561 /* 562 * amdgpu_device_wb_*() 563 * Writeback is the method by which the GPU updates special pages in memory 564 * with the status of certain GPU events (fences, ring pointers,etc.). 565 */ 566 567 /** 568 * amdgpu_device_wb_fini - Disable Writeback and free memory 569 * 570 * @adev: amdgpu_device pointer 571 * 572 * Disables Writeback and frees the Writeback memory (all asics). 573 * Used at driver shutdown. 574 */ 575 static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 576 { 577 if (adev->wb.wb_obj) { 578 amdgpu_bo_free_kernel(&adev->wb.wb_obj, 579 &adev->wb.gpu_addr, 580 (void **)&adev->wb.wb); 581 adev->wb.wb_obj = NULL; 582 } 583 } 584 585 /** 586 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory 587 * 588 * @adev: amdgpu_device pointer 589 * 590 * Initializes writeback and allocates writeback memory (all asics). 591 * Used at driver startup. 592 * Returns 0 on success or an -error on failure. 593 */ 594 static int amdgpu_device_wb_init(struct amdgpu_device *adev) 595 { 596 int r; 597 598 if (adev->wb.wb_obj == NULL) { 599 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 600 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 601 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 602 &adev->wb.wb_obj, &adev->wb.gpu_addr, 603 (void **)&adev->wb.wb); 604 if (r) { 605 dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 606 return r; 607 } 608 609 adev->wb.num_wb = AMDGPU_MAX_WB; 610 memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 611 612 /* clear wb memory */ 613 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 614 } 615 616 return 0; 617 } 618 619 /** 620 * amdgpu_device_wb_get - Allocate a wb entry 621 * 622 * @adev: amdgpu_device pointer 623 * @wb: wb index 624 * 625 * Allocate a wb slot for use by the driver (all asics). 626 * Returns 0 on success or -EINVAL on failure. 627 */ 628 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 629 { 630 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 631 632 if (offset < adev->wb.num_wb) { 633 __set_bit(offset, adev->wb.used); 634 *wb = offset << 3; /* convert to dw offset */ 635 return 0; 636 } else { 637 return -EINVAL; 638 } 639 } 640 641 /** 642 * amdgpu_device_wb_free - Free a wb entry 643 * 644 * @adev: amdgpu_device pointer 645 * @wb: wb index 646 * 647 * Free a wb slot allocated for use by the driver (all asics) 648 */ 649 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 650 { 651 wb >>= 3; 652 if (wb < adev->wb.num_wb) 653 __clear_bit(wb, adev->wb.used); 654 } 655 656 /** 657 * amdgpu_device_resize_fb_bar - try to resize FB BAR 658 * 659 * @adev: amdgpu_device pointer 660 * 661 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 662 * to fail, but if any of the BARs is not accessible after the size we abort 663 * driver loading by returning -ENODEV. 664 */ 665 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 666 { 667 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size); 668 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; 669 struct pci_bus *root; 670 struct resource *res; 671 unsigned i; 672 u16 cmd; 673 int r; 674 675 /* Bypass for VF */ 676 if (amdgpu_sriov_vf(adev)) 677 return 0; 678 679 /* Check if the root BUS has 64bit memory resources */ 680 root = adev->pdev->bus; 681 while (root->parent) 682 root = root->parent; 683 684 pci_bus_for_each_resource(root, res, i) { 685 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 686 res->start > 0x100000000ull) 687 break; 688 } 689 690 /* Trying to resize is pointless without a root hub window above 4GB */ 691 if (!res) 692 return 0; 693 694 /* Disable memory decoding while we change the BAR addresses and size */ 695 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 696 pci_write_config_word(adev->pdev, PCI_COMMAND, 697 cmd & ~PCI_COMMAND_MEMORY); 698 699 /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 700 amdgpu_device_doorbell_fini(adev); 701 if (adev->asic_type >= CHIP_BONAIRE) 702 pci_release_resource(adev->pdev, 2); 703 704 pci_release_resource(adev->pdev, 0); 705 706 r = pci_resize_resource(adev->pdev, 0, rbar_size); 707 if (r == -ENOSPC) 708 DRM_INFO("Not enough PCI address space for a large BAR."); 709 else if (r && r != -ENOTSUPP) 710 DRM_ERROR("Problem resizing BAR0 (%d).", r); 711 712 pci_assign_unassigned_bus_resources(adev->pdev->bus); 713 714 /* When the doorbell or fb BAR isn't available we have no chance of 715 * using the device. 716 */ 717 r = amdgpu_device_doorbell_init(adev); 718 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 719 return -ENODEV; 720 721 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 722 723 return 0; 724 } 725 726 /* 727 * GPU helpers function. 728 */ 729 /** 730 * amdgpu_device_need_post - check if the hw need post or not 731 * 732 * @adev: amdgpu_device pointer 733 * 734 * Check if the asic has been initialized (all asics) at driver startup 735 * or post is needed if hw reset is performed. 736 * Returns true if need or false if not. 737 */ 738 bool amdgpu_device_need_post(struct amdgpu_device *adev) 739 { 740 uint32_t reg; 741 742 if (amdgpu_sriov_vf(adev)) 743 return false; 744 745 if (amdgpu_passthrough(adev)) { 746 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 747 * some old smc fw still need driver do vPost otherwise gpu hang, while 748 * those smc fw version above 22.15 doesn't have this flaw, so we force 749 * vpost executed for smc version below 22.15 750 */ 751 if (adev->asic_type == CHIP_FIJI) { 752 int err; 753 uint32_t fw_ver; 754 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 755 /* force vPost if error occured */ 756 if (err) 757 return true; 758 759 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 760 if (fw_ver < 0x00160e00) 761 return true; 762 } 763 } 764 765 if (adev->has_hw_reset) { 766 adev->has_hw_reset = false; 767 return true; 768 } 769 770 /* bios scratch used on CIK+ */ 771 if (adev->asic_type >= CHIP_BONAIRE) 772 return amdgpu_atombios_scratch_need_asic_init(adev); 773 774 /* check MEM_SIZE for older asics */ 775 reg = amdgpu_asic_get_config_memsize(adev); 776 777 if ((reg != 0) && (reg != 0xffffffff)) 778 return false; 779 780 return true; 781 } 782 783 /* if we get transitioned to only one device, take VGA back */ 784 /** 785 * amdgpu_device_vga_set_decode - enable/disable vga decode 786 * 787 * @cookie: amdgpu_device pointer 788 * @state: enable/disable vga decode 789 * 790 * Enable/disable vga decode (all asics). 791 * Returns VGA resource flags. 792 */ 793 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) 794 { 795 struct amdgpu_device *adev = cookie; 796 amdgpu_asic_set_vga_state(adev, state); 797 if (state) 798 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 799 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 800 else 801 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 802 } 803 804 /** 805 * amdgpu_device_check_block_size - validate the vm block size 806 * 807 * @adev: amdgpu_device pointer 808 * 809 * Validates the vm block size specified via module parameter. 810 * The vm block size defines number of bits in page table versus page directory, 811 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 812 * page table and the remaining bits are in the page directory. 813 */ 814 static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 815 { 816 /* defines number of bits in page table versus page directory, 817 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 818 * page table and the remaining bits are in the page directory */ 819 if (amdgpu_vm_block_size == -1) 820 return; 821 822 if (amdgpu_vm_block_size < 9) { 823 dev_warn(adev->dev, "VM page table size (%d) too small\n", 824 amdgpu_vm_block_size); 825 amdgpu_vm_block_size = -1; 826 } 827 } 828 829 /** 830 * amdgpu_device_check_vm_size - validate the vm size 831 * 832 * @adev: amdgpu_device pointer 833 * 834 * Validates the vm size in GB specified via module parameter. 835 * The VM size is the size of the GPU virtual memory space in GB. 836 */ 837 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 838 { 839 /* no need to check the default value */ 840 if (amdgpu_vm_size == -1) 841 return; 842 843 if (amdgpu_vm_size < 1) { 844 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 845 amdgpu_vm_size); 846 amdgpu_vm_size = -1; 847 } 848 } 849 850 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 851 { 852 struct sysinfo si; 853 bool is_os_64 = (sizeof(void *) == 8) ? true : false; 854 uint64_t total_memory; 855 uint64_t dram_size_seven_GB = 0x1B8000000; 856 uint64_t dram_size_three_GB = 0xB8000000; 857 858 if (amdgpu_smu_memory_pool_size == 0) 859 return; 860 861 if (!is_os_64) { 862 DRM_WARN("Not 64-bit OS, feature not supported\n"); 863 goto def_value; 864 } 865 si_meminfo(&si); 866 total_memory = (uint64_t)si.totalram * si.mem_unit; 867 868 if ((amdgpu_smu_memory_pool_size == 1) || 869 (amdgpu_smu_memory_pool_size == 2)) { 870 if (total_memory < dram_size_three_GB) 871 goto def_value1; 872 } else if ((amdgpu_smu_memory_pool_size == 4) || 873 (amdgpu_smu_memory_pool_size == 8)) { 874 if (total_memory < dram_size_seven_GB) 875 goto def_value1; 876 } else { 877 DRM_WARN("Smu memory pool size not supported\n"); 878 goto def_value; 879 } 880 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 881 882 return; 883 884 def_value1: 885 DRM_WARN("No enough system memory\n"); 886 def_value: 887 adev->pm.smu_prv_buffer_size = 0; 888 } 889 890 /** 891 * amdgpu_device_check_arguments - validate module params 892 * 893 * @adev: amdgpu_device pointer 894 * 895 * Validates certain module parameters and updates 896 * the associated values used by the driver (all asics). 897 */ 898 static void amdgpu_device_check_arguments(struct amdgpu_device *adev) 899 { 900 if (amdgpu_sched_jobs < 4) { 901 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 902 amdgpu_sched_jobs); 903 amdgpu_sched_jobs = 4; 904 } else if (!is_power_of_2(amdgpu_sched_jobs)){ 905 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 906 amdgpu_sched_jobs); 907 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 908 } 909 910 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 911 /* gart size must be greater or equal to 32M */ 912 dev_warn(adev->dev, "gart size (%d) too small\n", 913 amdgpu_gart_size); 914 amdgpu_gart_size = -1; 915 } 916 917 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 918 /* gtt size must be greater or equal to 32M */ 919 dev_warn(adev->dev, "gtt size (%d) too small\n", 920 amdgpu_gtt_size); 921 amdgpu_gtt_size = -1; 922 } 923 924 /* valid range is between 4 and 9 inclusive */ 925 if (amdgpu_vm_fragment_size != -1 && 926 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 927 dev_warn(adev->dev, "valid range is between 4 and 9\n"); 928 amdgpu_vm_fragment_size = -1; 929 } 930 931 amdgpu_device_check_smu_prv_buffer_size(adev); 932 933 amdgpu_device_check_vm_size(adev); 934 935 amdgpu_device_check_block_size(adev); 936 937 if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 || 938 !is_power_of_2(amdgpu_vram_page_split))) { 939 dev_warn(adev->dev, "invalid VRAM page split (%d)\n", 940 amdgpu_vram_page_split); 941 amdgpu_vram_page_split = 1024; 942 } 943 944 if (amdgpu_lockup_timeout == 0) { 945 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); 946 amdgpu_lockup_timeout = 10000; 947 } 948 949 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 950 } 951 952 /** 953 * amdgpu_switcheroo_set_state - set switcheroo state 954 * 955 * @pdev: pci dev pointer 956 * @state: vga_switcheroo state 957 * 958 * Callback for the switcheroo driver. Suspends or resumes the 959 * the asics before or after it is powered up using ACPI methods. 960 */ 961 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) 962 { 963 struct drm_device *dev = pci_get_drvdata(pdev); 964 965 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF) 966 return; 967 968 if (state == VGA_SWITCHEROO_ON) { 969 pr_info("amdgpu: switched on\n"); 970 /* don't suspend or resume card normally */ 971 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 972 973 amdgpu_device_resume(dev, true, true); 974 975 dev->switch_power_state = DRM_SWITCH_POWER_ON; 976 drm_kms_helper_poll_enable(dev); 977 } else { 978 pr_info("amdgpu: switched off\n"); 979 drm_kms_helper_poll_disable(dev); 980 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 981 amdgpu_device_suspend(dev, true, true); 982 dev->switch_power_state = DRM_SWITCH_POWER_OFF; 983 } 984 } 985 986 /** 987 * amdgpu_switcheroo_can_switch - see if switcheroo state can change 988 * 989 * @pdev: pci dev pointer 990 * 991 * Callback for the switcheroo driver. Check of the switcheroo 992 * state can be changed. 993 * Returns true if the state can be changed, false if not. 994 */ 995 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 996 { 997 struct drm_device *dev = pci_get_drvdata(pdev); 998 999 /* 1000 * FIXME: open_count is protected by drm_global_mutex but that would lead to 1001 * locking inversion with the driver load path. And the access here is 1002 * completely racy anyway. So don't bother with locking for now. 1003 */ 1004 return dev->open_count == 0; 1005 } 1006 1007 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 1008 .set_gpu_state = amdgpu_switcheroo_set_state, 1009 .reprobe = NULL, 1010 .can_switch = amdgpu_switcheroo_can_switch, 1011 }; 1012 1013 /** 1014 * amdgpu_device_ip_set_clockgating_state - set the CG state 1015 * 1016 * @dev: amdgpu_device pointer 1017 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1018 * @state: clockgating state (gate or ungate) 1019 * 1020 * Sets the requested clockgating state for all instances of 1021 * the hardware IP specified. 1022 * Returns the error code from the last instance. 1023 */ 1024 int amdgpu_device_ip_set_clockgating_state(void *dev, 1025 enum amd_ip_block_type block_type, 1026 enum amd_clockgating_state state) 1027 { 1028 struct amdgpu_device *adev = dev; 1029 int i, r = 0; 1030 1031 for (i = 0; i < adev->num_ip_blocks; i++) { 1032 if (!adev->ip_blocks[i].status.valid) 1033 continue; 1034 if (adev->ip_blocks[i].version->type != block_type) 1035 continue; 1036 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1037 continue; 1038 r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1039 (void *)adev, state); 1040 if (r) 1041 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1042 adev->ip_blocks[i].version->funcs->name, r); 1043 } 1044 return r; 1045 } 1046 1047 /** 1048 * amdgpu_device_ip_set_powergating_state - set the PG state 1049 * 1050 * @dev: amdgpu_device pointer 1051 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1052 * @state: powergating state (gate or ungate) 1053 * 1054 * Sets the requested powergating state for all instances of 1055 * the hardware IP specified. 1056 * Returns the error code from the last instance. 1057 */ 1058 int amdgpu_device_ip_set_powergating_state(void *dev, 1059 enum amd_ip_block_type block_type, 1060 enum amd_powergating_state state) 1061 { 1062 struct amdgpu_device *adev = dev; 1063 int i, r = 0; 1064 1065 for (i = 0; i < adev->num_ip_blocks; i++) { 1066 if (!adev->ip_blocks[i].status.valid) 1067 continue; 1068 if (adev->ip_blocks[i].version->type != block_type) 1069 continue; 1070 if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1071 continue; 1072 r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1073 (void *)adev, state); 1074 if (r) 1075 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1076 adev->ip_blocks[i].version->funcs->name, r); 1077 } 1078 return r; 1079 } 1080 1081 /** 1082 * amdgpu_device_ip_get_clockgating_state - get the CG state 1083 * 1084 * @adev: amdgpu_device pointer 1085 * @flags: clockgating feature flags 1086 * 1087 * Walks the list of IPs on the device and updates the clockgating 1088 * flags for each IP. 1089 * Updates @flags with the feature flags for each hardware IP where 1090 * clockgating is enabled. 1091 */ 1092 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1093 u32 *flags) 1094 { 1095 int i; 1096 1097 for (i = 0; i < adev->num_ip_blocks; i++) { 1098 if (!adev->ip_blocks[i].status.valid) 1099 continue; 1100 if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1101 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1102 } 1103 } 1104 1105 /** 1106 * amdgpu_device_ip_wait_for_idle - wait for idle 1107 * 1108 * @adev: amdgpu_device pointer 1109 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1110 * 1111 * Waits for the request hardware IP to be idle. 1112 * Returns 0 for success or a negative error code on failure. 1113 */ 1114 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1115 enum amd_ip_block_type block_type) 1116 { 1117 int i, r; 1118 1119 for (i = 0; i < adev->num_ip_blocks; i++) { 1120 if (!adev->ip_blocks[i].status.valid) 1121 continue; 1122 if (adev->ip_blocks[i].version->type == block_type) { 1123 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1124 if (r) 1125 return r; 1126 break; 1127 } 1128 } 1129 return 0; 1130 1131 } 1132 1133 /** 1134 * amdgpu_device_ip_is_idle - is the hardware IP idle 1135 * 1136 * @adev: amdgpu_device pointer 1137 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1138 * 1139 * Check if the hardware IP is idle or not. 1140 * Returns true if it the IP is idle, false if not. 1141 */ 1142 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1143 enum amd_ip_block_type block_type) 1144 { 1145 int i; 1146 1147 for (i = 0; i < adev->num_ip_blocks; i++) { 1148 if (!adev->ip_blocks[i].status.valid) 1149 continue; 1150 if (adev->ip_blocks[i].version->type == block_type) 1151 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1152 } 1153 return true; 1154 1155 } 1156 1157 /** 1158 * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1159 * 1160 * @adev: amdgpu_device pointer 1161 * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1162 * 1163 * Returns a pointer to the hardware IP block structure 1164 * if it exists for the asic, otherwise NULL. 1165 */ 1166 struct amdgpu_ip_block * 1167 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1168 enum amd_ip_block_type type) 1169 { 1170 int i; 1171 1172 for (i = 0; i < adev->num_ip_blocks; i++) 1173 if (adev->ip_blocks[i].version->type == type) 1174 return &adev->ip_blocks[i]; 1175 1176 return NULL; 1177 } 1178 1179 /** 1180 * amdgpu_device_ip_block_version_cmp 1181 * 1182 * @adev: amdgpu_device pointer 1183 * @type: enum amd_ip_block_type 1184 * @major: major version 1185 * @minor: minor version 1186 * 1187 * return 0 if equal or greater 1188 * return 1 if smaller or the ip_block doesn't exist 1189 */ 1190 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1191 enum amd_ip_block_type type, 1192 u32 major, u32 minor) 1193 { 1194 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1195 1196 if (ip_block && ((ip_block->version->major > major) || 1197 ((ip_block->version->major == major) && 1198 (ip_block->version->minor >= minor)))) 1199 return 0; 1200 1201 return 1; 1202 } 1203 1204 /** 1205 * amdgpu_device_ip_block_add 1206 * 1207 * @adev: amdgpu_device pointer 1208 * @ip_block_version: pointer to the IP to add 1209 * 1210 * Adds the IP block driver information to the collection of IPs 1211 * on the asic. 1212 */ 1213 int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1214 const struct amdgpu_ip_block_version *ip_block_version) 1215 { 1216 if (!ip_block_version) 1217 return -EINVAL; 1218 1219 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1220 ip_block_version->funcs->name); 1221 1222 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1223 1224 return 0; 1225 } 1226 1227 /** 1228 * amdgpu_device_enable_virtual_display - enable virtual display feature 1229 * 1230 * @adev: amdgpu_device pointer 1231 * 1232 * Enabled the virtual display feature if the user has enabled it via 1233 * the module parameter virtual_display. This feature provides a virtual 1234 * display hardware on headless boards or in virtualized environments. 1235 * This function parses and validates the configuration string specified by 1236 * the user and configues the virtual display configuration (number of 1237 * virtual connectors, crtcs, etc.) specified. 1238 */ 1239 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1240 { 1241 adev->enable_virtual_display = false; 1242 1243 if (amdgpu_virtual_display) { 1244 struct drm_device *ddev = adev->ddev; 1245 const char *pci_address_name = pci_name(ddev->pdev); 1246 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 1247 1248 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 1249 pciaddstr_tmp = pciaddstr; 1250 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 1251 pciaddname = strsep(&pciaddname_tmp, ","); 1252 if (!strcmp("all", pciaddname) 1253 || !strcmp(pci_address_name, pciaddname)) { 1254 long num_crtc; 1255 int res = -1; 1256 1257 adev->enable_virtual_display = true; 1258 1259 if (pciaddname_tmp) 1260 res = kstrtol(pciaddname_tmp, 10, 1261 &num_crtc); 1262 1263 if (!res) { 1264 if (num_crtc < 1) 1265 num_crtc = 1; 1266 if (num_crtc > 6) 1267 num_crtc = 6; 1268 adev->mode_info.num_crtc = num_crtc; 1269 } else { 1270 adev->mode_info.num_crtc = 1; 1271 } 1272 break; 1273 } 1274 } 1275 1276 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 1277 amdgpu_virtual_display, pci_address_name, 1278 adev->enable_virtual_display, adev->mode_info.num_crtc); 1279 1280 kfree(pciaddstr); 1281 } 1282 } 1283 1284 /** 1285 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 1286 * 1287 * @adev: amdgpu_device pointer 1288 * 1289 * Parses the asic configuration parameters specified in the gpu info 1290 * firmware and makes them availale to the driver for use in configuring 1291 * the asic. 1292 * Returns 0 on success, -EINVAL on failure. 1293 */ 1294 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1295 { 1296 const char *chip_name; 1297 char fw_name[30]; 1298 int err; 1299 const struct gpu_info_firmware_header_v1_0 *hdr; 1300 1301 adev->firmware.gpu_info_fw = NULL; 1302 1303 switch (adev->asic_type) { 1304 case CHIP_TOPAZ: 1305 case CHIP_TONGA: 1306 case CHIP_FIJI: 1307 case CHIP_POLARIS10: 1308 case CHIP_POLARIS11: 1309 case CHIP_POLARIS12: 1310 case CHIP_VEGAM: 1311 case CHIP_CARRIZO: 1312 case CHIP_STONEY: 1313 #ifdef CONFIG_DRM_AMDGPU_SI 1314 case CHIP_VERDE: 1315 case CHIP_TAHITI: 1316 case CHIP_PITCAIRN: 1317 case CHIP_OLAND: 1318 case CHIP_HAINAN: 1319 #endif 1320 #ifdef CONFIG_DRM_AMDGPU_CIK 1321 case CHIP_BONAIRE: 1322 case CHIP_HAWAII: 1323 case CHIP_KAVERI: 1324 case CHIP_KABINI: 1325 case CHIP_MULLINS: 1326 #endif 1327 case CHIP_VEGA20: 1328 default: 1329 return 0; 1330 case CHIP_VEGA10: 1331 chip_name = "vega10"; 1332 break; 1333 case CHIP_VEGA12: 1334 chip_name = "vega12"; 1335 break; 1336 case CHIP_RAVEN: 1337 if (adev->rev_id >= 8) 1338 chip_name = "raven2"; 1339 else if (adev->pdev->device == 0x15d8) 1340 chip_name = "picasso"; 1341 else 1342 chip_name = "raven"; 1343 break; 1344 } 1345 1346 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 1347 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); 1348 if (err) { 1349 dev_err(adev->dev, 1350 "Failed to load gpu_info firmware \"%s\"\n", 1351 fw_name); 1352 goto out; 1353 } 1354 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); 1355 if (err) { 1356 dev_err(adev->dev, 1357 "Failed to validate gpu_info firmware \"%s\"\n", 1358 fw_name); 1359 goto out; 1360 } 1361 1362 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 1363 amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 1364 1365 switch (hdr->version_major) { 1366 case 1: 1367 { 1368 const struct gpu_info_firmware_v1_0 *gpu_info_fw = 1369 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 1370 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1371 1372 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 1373 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 1374 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 1375 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 1376 adev->gfx.config.max_texture_channel_caches = 1377 le32_to_cpu(gpu_info_fw->gc_num_tccs); 1378 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 1379 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 1380 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 1381 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 1382 adev->gfx.config.double_offchip_lds_buf = 1383 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 1384 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 1385 adev->gfx.cu_info.max_waves_per_simd = 1386 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 1387 adev->gfx.cu_info.max_scratch_slots_per_cu = 1388 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 1389 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 1390 break; 1391 } 1392 default: 1393 dev_err(adev->dev, 1394 "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 1395 err = -EINVAL; 1396 goto out; 1397 } 1398 out: 1399 return err; 1400 } 1401 1402 /** 1403 * amdgpu_device_ip_early_init - run early init for hardware IPs 1404 * 1405 * @adev: amdgpu_device pointer 1406 * 1407 * Early initialization pass for hardware IPs. The hardware IPs that make 1408 * up each asic are discovered each IP's early_init callback is run. This 1409 * is the first stage in initializing the asic. 1410 * Returns 0 on success, negative error code on failure. 1411 */ 1412 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 1413 { 1414 int i, r; 1415 1416 amdgpu_device_enable_virtual_display(adev); 1417 1418 switch (adev->asic_type) { 1419 case CHIP_TOPAZ: 1420 case CHIP_TONGA: 1421 case CHIP_FIJI: 1422 case CHIP_POLARIS10: 1423 case CHIP_POLARIS11: 1424 case CHIP_POLARIS12: 1425 case CHIP_VEGAM: 1426 case CHIP_CARRIZO: 1427 case CHIP_STONEY: 1428 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) 1429 adev->family = AMDGPU_FAMILY_CZ; 1430 else 1431 adev->family = AMDGPU_FAMILY_VI; 1432 1433 r = vi_set_ip_blocks(adev); 1434 if (r) 1435 return r; 1436 break; 1437 #ifdef CONFIG_DRM_AMDGPU_SI 1438 case CHIP_VERDE: 1439 case CHIP_TAHITI: 1440 case CHIP_PITCAIRN: 1441 case CHIP_OLAND: 1442 case CHIP_HAINAN: 1443 adev->family = AMDGPU_FAMILY_SI; 1444 r = si_set_ip_blocks(adev); 1445 if (r) 1446 return r; 1447 break; 1448 #endif 1449 #ifdef CONFIG_DRM_AMDGPU_CIK 1450 case CHIP_BONAIRE: 1451 case CHIP_HAWAII: 1452 case CHIP_KAVERI: 1453 case CHIP_KABINI: 1454 case CHIP_MULLINS: 1455 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII)) 1456 adev->family = AMDGPU_FAMILY_CI; 1457 else 1458 adev->family = AMDGPU_FAMILY_KV; 1459 1460 r = cik_set_ip_blocks(adev); 1461 if (r) 1462 return r; 1463 break; 1464 #endif 1465 case CHIP_VEGA10: 1466 case CHIP_VEGA12: 1467 case CHIP_VEGA20: 1468 case CHIP_RAVEN: 1469 if (adev->asic_type == CHIP_RAVEN) 1470 adev->family = AMDGPU_FAMILY_RV; 1471 else 1472 adev->family = AMDGPU_FAMILY_AI; 1473 1474 r = soc15_set_ip_blocks(adev); 1475 if (r) 1476 return r; 1477 break; 1478 default: 1479 /* FIXME: not supported yet */ 1480 return -EINVAL; 1481 } 1482 1483 r = amdgpu_device_parse_gpu_info_fw(adev); 1484 if (r) 1485 return r; 1486 1487 amdgpu_amdkfd_device_probe(adev); 1488 1489 if (amdgpu_sriov_vf(adev)) { 1490 r = amdgpu_virt_request_full_gpu(adev, true); 1491 if (r) 1492 return -EAGAIN; 1493 } 1494 1495 adev->powerplay.pp_feature = amdgpu_pp_feature_mask; 1496 1497 for (i = 0; i < adev->num_ip_blocks; i++) { 1498 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 1499 DRM_ERROR("disabled ip block: %d <%s>\n", 1500 i, adev->ip_blocks[i].version->funcs->name); 1501 adev->ip_blocks[i].status.valid = false; 1502 } else { 1503 if (adev->ip_blocks[i].version->funcs->early_init) { 1504 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 1505 if (r == -ENOENT) { 1506 adev->ip_blocks[i].status.valid = false; 1507 } else if (r) { 1508 DRM_ERROR("early_init of IP block <%s> failed %d\n", 1509 adev->ip_blocks[i].version->funcs->name, r); 1510 return r; 1511 } else { 1512 adev->ip_blocks[i].status.valid = true; 1513 } 1514 } else { 1515 adev->ip_blocks[i].status.valid = true; 1516 } 1517 } 1518 } 1519 1520 adev->cg_flags &= amdgpu_cg_mask; 1521 adev->pg_flags &= amdgpu_pg_mask; 1522 1523 return 0; 1524 } 1525 1526 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) 1527 { 1528 int i, r; 1529 1530 for (i = 0; i < adev->num_ip_blocks; i++) { 1531 if (!adev->ip_blocks[i].status.sw) 1532 continue; 1533 if (adev->ip_blocks[i].status.hw) 1534 continue; 1535 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 1536 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 1537 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1538 if (r) { 1539 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1540 adev->ip_blocks[i].version->funcs->name, r); 1541 return r; 1542 } 1543 adev->ip_blocks[i].status.hw = true; 1544 } 1545 } 1546 1547 return 0; 1548 } 1549 1550 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) 1551 { 1552 int i, r; 1553 1554 for (i = 0; i < adev->num_ip_blocks; i++) { 1555 if (!adev->ip_blocks[i].status.sw) 1556 continue; 1557 if (adev->ip_blocks[i].status.hw) 1558 continue; 1559 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1560 if (r) { 1561 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1562 adev->ip_blocks[i].version->funcs->name, r); 1563 return r; 1564 } 1565 adev->ip_blocks[i].status.hw = true; 1566 } 1567 1568 return 0; 1569 } 1570 1571 static int amdgpu_device_fw_loading(struct amdgpu_device *adev) 1572 { 1573 int r = 0; 1574 int i; 1575 1576 if (adev->asic_type >= CHIP_VEGA10) { 1577 for (i = 0; i < adev->num_ip_blocks; i++) { 1578 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 1579 if (adev->in_gpu_reset || adev->in_suspend) { 1580 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) 1581 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */ 1582 r = adev->ip_blocks[i].version->funcs->resume(adev); 1583 if (r) { 1584 DRM_ERROR("resume of IP block <%s> failed %d\n", 1585 adev->ip_blocks[i].version->funcs->name, r); 1586 return r; 1587 } 1588 } else { 1589 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1590 if (r) { 1591 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1592 adev->ip_blocks[i].version->funcs->name, r); 1593 return r; 1594 } 1595 } 1596 adev->ip_blocks[i].status.hw = true; 1597 } 1598 } 1599 } 1600 1601 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) { 1602 r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle); 1603 if (r) { 1604 pr_err("firmware loading failed\n"); 1605 return r; 1606 } 1607 } 1608 1609 return 0; 1610 } 1611 1612 /** 1613 * amdgpu_device_ip_init - run init for hardware IPs 1614 * 1615 * @adev: amdgpu_device pointer 1616 * 1617 * Main initialization pass for hardware IPs. The list of all the hardware 1618 * IPs that make up the asic is walked and the sw_init and hw_init callbacks 1619 * are run. sw_init initializes the software state associated with each IP 1620 * and hw_init initializes the hardware associated with each IP. 1621 * Returns 0 on success, negative error code on failure. 1622 */ 1623 static int amdgpu_device_ip_init(struct amdgpu_device *adev) 1624 { 1625 int i, r; 1626 1627 for (i = 0; i < adev->num_ip_blocks; i++) { 1628 if (!adev->ip_blocks[i].status.valid) 1629 continue; 1630 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 1631 if (r) { 1632 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 1633 adev->ip_blocks[i].version->funcs->name, r); 1634 return r; 1635 } 1636 adev->ip_blocks[i].status.sw = true; 1637 1638 /* need to do gmc hw init early so we can allocate gpu mem */ 1639 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1640 r = amdgpu_device_vram_scratch_init(adev); 1641 if (r) { 1642 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); 1643 return r; 1644 } 1645 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 1646 if (r) { 1647 DRM_ERROR("hw_init %d failed %d\n", i, r); 1648 return r; 1649 } 1650 r = amdgpu_device_wb_init(adev); 1651 if (r) { 1652 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 1653 return r; 1654 } 1655 adev->ip_blocks[i].status.hw = true; 1656 1657 /* right after GMC hw init, we create CSA */ 1658 if (amdgpu_sriov_vf(adev)) { 1659 r = amdgpu_allocate_static_csa(adev); 1660 if (r) { 1661 DRM_ERROR("allocate CSA failed %d\n", r); 1662 return r; 1663 } 1664 } 1665 } 1666 } 1667 1668 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 1669 if (r) 1670 return r; 1671 1672 r = amdgpu_device_ip_hw_init_phase1(adev); 1673 if (r) 1674 return r; 1675 1676 r = amdgpu_device_fw_loading(adev); 1677 if (r) 1678 return r; 1679 1680 r = amdgpu_device_ip_hw_init_phase2(adev); 1681 if (r) 1682 return r; 1683 1684 amdgpu_xgmi_add_device(adev); 1685 amdgpu_amdkfd_device_init(adev); 1686 1687 if (amdgpu_sriov_vf(adev)) 1688 amdgpu_virt_release_full_gpu(adev, true); 1689 1690 return 0; 1691 } 1692 1693 /** 1694 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 1695 * 1696 * @adev: amdgpu_device pointer 1697 * 1698 * Writes a reset magic value to the gart pointer in VRAM. The driver calls 1699 * this function before a GPU reset. If the value is retained after a 1700 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 1701 */ 1702 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 1703 { 1704 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 1705 } 1706 1707 /** 1708 * amdgpu_device_check_vram_lost - check if vram is valid 1709 * 1710 * @adev: amdgpu_device pointer 1711 * 1712 * Checks the reset magic value written to the gart pointer in VRAM. 1713 * The driver calls this after a GPU reset to see if the contents of 1714 * VRAM is lost or now. 1715 * returns true if vram is lost, false if not. 1716 */ 1717 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 1718 { 1719 return !!memcmp(adev->gart.ptr, adev->reset_magic, 1720 AMDGPU_RESET_MAGIC_NUM); 1721 } 1722 1723 /** 1724 * amdgpu_device_set_cg_state - set clockgating for amdgpu device 1725 * 1726 * @adev: amdgpu_device pointer 1727 * 1728 * The list of all the hardware IPs that make up the asic is walked and the 1729 * set_clockgating_state callbacks are run. 1730 * Late initialization pass enabling clockgating for hardware IPs. 1731 * Fini or suspend, pass disabling clockgating for hardware IPs. 1732 * Returns 0 on success, negative error code on failure. 1733 */ 1734 1735 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 1736 enum amd_clockgating_state state) 1737 { 1738 int i, j, r; 1739 1740 if (amdgpu_emu_mode == 1) 1741 return 0; 1742 1743 for (j = 0; j < adev->num_ip_blocks; j++) { 1744 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 1745 if (!adev->ip_blocks[i].status.late_initialized) 1746 continue; 1747 /* skip CG for VCE/UVD, it's handled specially */ 1748 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1749 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 1750 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 1751 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 1752 /* enable clockgating to save power */ 1753 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 1754 state); 1755 if (r) { 1756 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 1757 adev->ip_blocks[i].version->funcs->name, r); 1758 return r; 1759 } 1760 } 1761 } 1762 1763 return 0; 1764 } 1765 1766 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state) 1767 { 1768 int i, j, r; 1769 1770 if (amdgpu_emu_mode == 1) 1771 return 0; 1772 1773 for (j = 0; j < adev->num_ip_blocks; j++) { 1774 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 1775 if (!adev->ip_blocks[i].status.late_initialized) 1776 continue; 1777 /* skip CG for VCE/UVD, it's handled specially */ 1778 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1779 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 1780 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 1781 adev->ip_blocks[i].version->funcs->set_powergating_state) { 1782 /* enable powergating to save power */ 1783 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 1784 state); 1785 if (r) { 1786 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 1787 adev->ip_blocks[i].version->funcs->name, r); 1788 return r; 1789 } 1790 } 1791 } 1792 return 0; 1793 } 1794 1795 /** 1796 * amdgpu_device_ip_late_init - run late init for hardware IPs 1797 * 1798 * @adev: amdgpu_device pointer 1799 * 1800 * Late initialization pass for hardware IPs. The list of all the hardware 1801 * IPs that make up the asic is walked and the late_init callbacks are run. 1802 * late_init covers any special initialization that an IP requires 1803 * after all of the have been initialized or something that needs to happen 1804 * late in the init process. 1805 * Returns 0 on success, negative error code on failure. 1806 */ 1807 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 1808 { 1809 int i = 0, r; 1810 1811 for (i = 0; i < adev->num_ip_blocks; i++) { 1812 if (!adev->ip_blocks[i].status.hw) 1813 continue; 1814 if (adev->ip_blocks[i].version->funcs->late_init) { 1815 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 1816 if (r) { 1817 DRM_ERROR("late_init of IP block <%s> failed %d\n", 1818 adev->ip_blocks[i].version->funcs->name, r); 1819 return r; 1820 } 1821 } 1822 adev->ip_blocks[i].status.late_initialized = true; 1823 } 1824 1825 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 1826 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 1827 1828 queue_delayed_work(system_wq, &adev->late_init_work, 1829 msecs_to_jiffies(AMDGPU_RESUME_MS)); 1830 1831 amdgpu_device_fill_reset_magic(adev); 1832 1833 return 0; 1834 } 1835 1836 /** 1837 * amdgpu_device_ip_fini - run fini for hardware IPs 1838 * 1839 * @adev: amdgpu_device pointer 1840 * 1841 * Main teardown pass for hardware IPs. The list of all the hardware 1842 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 1843 * are run. hw_fini tears down the hardware associated with each IP 1844 * and sw_fini tears down any software state associated with each IP. 1845 * Returns 0 on success, negative error code on failure. 1846 */ 1847 static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 1848 { 1849 int i, r; 1850 1851 amdgpu_amdkfd_device_fini(adev); 1852 1853 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 1854 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 1855 1856 /* need to disable SMC first */ 1857 for (i = 0; i < adev->num_ip_blocks; i++) { 1858 if (!adev->ip_blocks[i].status.hw) 1859 continue; 1860 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 1861 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 1862 /* XXX handle errors */ 1863 if (r) { 1864 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 1865 adev->ip_blocks[i].version->funcs->name, r); 1866 } 1867 adev->ip_blocks[i].status.hw = false; 1868 break; 1869 } 1870 } 1871 1872 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1873 if (!adev->ip_blocks[i].status.hw) 1874 continue; 1875 1876 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 1877 /* XXX handle errors */ 1878 if (r) { 1879 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 1880 adev->ip_blocks[i].version->funcs->name, r); 1881 } 1882 1883 adev->ip_blocks[i].status.hw = false; 1884 } 1885 1886 1887 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1888 if (!adev->ip_blocks[i].status.sw) 1889 continue; 1890 1891 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1892 amdgpu_ucode_free_bo(adev); 1893 amdgpu_free_static_csa(adev); 1894 amdgpu_device_wb_fini(adev); 1895 amdgpu_device_vram_scratch_fini(adev); 1896 } 1897 1898 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 1899 /* XXX handle errors */ 1900 if (r) { 1901 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 1902 adev->ip_blocks[i].version->funcs->name, r); 1903 } 1904 adev->ip_blocks[i].status.sw = false; 1905 adev->ip_blocks[i].status.valid = false; 1906 } 1907 1908 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1909 if (!adev->ip_blocks[i].status.late_initialized) 1910 continue; 1911 if (adev->ip_blocks[i].version->funcs->late_fini) 1912 adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 1913 adev->ip_blocks[i].status.late_initialized = false; 1914 } 1915 1916 if (amdgpu_sriov_vf(adev)) 1917 if (amdgpu_virt_release_full_gpu(adev, false)) 1918 DRM_ERROR("failed to release exclusive mode on fini\n"); 1919 1920 return 0; 1921 } 1922 1923 static int amdgpu_device_enable_mgpu_fan_boost(void) 1924 { 1925 struct amdgpu_gpu_instance *gpu_ins; 1926 struct amdgpu_device *adev; 1927 int i, ret = 0; 1928 1929 mutex_lock(&mgpu_info.mutex); 1930 1931 /* 1932 * MGPU fan boost feature should be enabled 1933 * only when there are two or more dGPUs in 1934 * the system 1935 */ 1936 if (mgpu_info.num_dgpu < 2) 1937 goto out; 1938 1939 for (i = 0; i < mgpu_info.num_dgpu; i++) { 1940 gpu_ins = &(mgpu_info.gpu_ins[i]); 1941 adev = gpu_ins->adev; 1942 if (!(adev->flags & AMD_IS_APU) && 1943 !gpu_ins->mgpu_fan_enabled && 1944 adev->powerplay.pp_funcs && 1945 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) { 1946 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); 1947 if (ret) 1948 break; 1949 1950 gpu_ins->mgpu_fan_enabled = 1; 1951 } 1952 } 1953 1954 out: 1955 mutex_unlock(&mgpu_info.mutex); 1956 1957 return ret; 1958 } 1959 1960 /** 1961 * amdgpu_device_ip_late_init_func_handler - work handler for ib test 1962 * 1963 * @work: work_struct. 1964 */ 1965 static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) 1966 { 1967 struct amdgpu_device *adev = 1968 container_of(work, struct amdgpu_device, late_init_work.work); 1969 int r; 1970 1971 r = amdgpu_ib_ring_tests(adev); 1972 if (r) 1973 DRM_ERROR("ib ring test failed (%d).\n", r); 1974 1975 r = amdgpu_device_enable_mgpu_fan_boost(); 1976 if (r) 1977 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 1978 } 1979 1980 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 1981 { 1982 struct amdgpu_device *adev = 1983 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 1984 1985 mutex_lock(&adev->gfx.gfx_off_mutex); 1986 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { 1987 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 1988 adev->gfx.gfx_off_state = true; 1989 } 1990 mutex_unlock(&adev->gfx.gfx_off_mutex); 1991 } 1992 1993 /** 1994 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 1995 * 1996 * @adev: amdgpu_device pointer 1997 * 1998 * Main suspend function for hardware IPs. The list of all the hardware 1999 * IPs that make up the asic is walked, clockgating is disabled and the 2000 * suspend callbacks are run. suspend puts the hardware and software state 2001 * in each IP into a state suitable for suspend. 2002 * Returns 0 on success, negative error code on failure. 2003 */ 2004 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 2005 { 2006 int i, r; 2007 2008 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2009 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2010 2011 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2012 if (!adev->ip_blocks[i].status.valid) 2013 continue; 2014 /* displays are handled separately */ 2015 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { 2016 /* XXX handle errors */ 2017 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2018 /* XXX handle errors */ 2019 if (r) { 2020 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2021 adev->ip_blocks[i].version->funcs->name, r); 2022 } 2023 } 2024 } 2025 2026 return 0; 2027 } 2028 2029 /** 2030 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 2031 * 2032 * @adev: amdgpu_device pointer 2033 * 2034 * Main suspend function for hardware IPs. The list of all the hardware 2035 * IPs that make up the asic is walked, clockgating is disabled and the 2036 * suspend callbacks are run. suspend puts the hardware and software state 2037 * in each IP into a state suitable for suspend. 2038 * Returns 0 on success, negative error code on failure. 2039 */ 2040 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 2041 { 2042 int i, r; 2043 2044 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2045 if (!adev->ip_blocks[i].status.valid) 2046 continue; 2047 /* displays are handled in phase1 */ 2048 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 2049 continue; 2050 /* XXX handle errors */ 2051 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2052 /* XXX handle errors */ 2053 if (r) { 2054 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2055 adev->ip_blocks[i].version->funcs->name, r); 2056 } 2057 } 2058 2059 return 0; 2060 } 2061 2062 /** 2063 * amdgpu_device_ip_suspend - run suspend for hardware IPs 2064 * 2065 * @adev: amdgpu_device pointer 2066 * 2067 * Main suspend function for hardware IPs. The list of all the hardware 2068 * IPs that make up the asic is walked, clockgating is disabled and the 2069 * suspend callbacks are run. suspend puts the hardware and software state 2070 * in each IP into a state suitable for suspend. 2071 * Returns 0 on success, negative error code on failure. 2072 */ 2073 int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 2074 { 2075 int r; 2076 2077 if (amdgpu_sriov_vf(adev)) 2078 amdgpu_virt_request_full_gpu(adev, false); 2079 2080 r = amdgpu_device_ip_suspend_phase1(adev); 2081 if (r) 2082 return r; 2083 r = amdgpu_device_ip_suspend_phase2(adev); 2084 2085 if (amdgpu_sriov_vf(adev)) 2086 amdgpu_virt_release_full_gpu(adev, false); 2087 2088 return r; 2089 } 2090 2091 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 2092 { 2093 int i, r; 2094 2095 static enum amd_ip_block_type ip_order[] = { 2096 AMD_IP_BLOCK_TYPE_GMC, 2097 AMD_IP_BLOCK_TYPE_COMMON, 2098 AMD_IP_BLOCK_TYPE_PSP, 2099 AMD_IP_BLOCK_TYPE_IH, 2100 }; 2101 2102 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2103 int j; 2104 struct amdgpu_ip_block *block; 2105 2106 for (j = 0; j < adev->num_ip_blocks; j++) { 2107 block = &adev->ip_blocks[j]; 2108 2109 if (block->version->type != ip_order[i] || 2110 !block->status.valid) 2111 continue; 2112 2113 r = block->version->funcs->hw_init(adev); 2114 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2115 if (r) 2116 return r; 2117 } 2118 } 2119 2120 return 0; 2121 } 2122 2123 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 2124 { 2125 int i, r; 2126 2127 static enum amd_ip_block_type ip_order[] = { 2128 AMD_IP_BLOCK_TYPE_SMC, 2129 AMD_IP_BLOCK_TYPE_DCE, 2130 AMD_IP_BLOCK_TYPE_GFX, 2131 AMD_IP_BLOCK_TYPE_SDMA, 2132 AMD_IP_BLOCK_TYPE_UVD, 2133 AMD_IP_BLOCK_TYPE_VCE 2134 }; 2135 2136 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2137 int j; 2138 struct amdgpu_ip_block *block; 2139 2140 for (j = 0; j < adev->num_ip_blocks; j++) { 2141 block = &adev->ip_blocks[j]; 2142 2143 if (block->version->type != ip_order[i] || 2144 !block->status.valid) 2145 continue; 2146 2147 r = block->version->funcs->hw_init(adev); 2148 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2149 if (r) 2150 return r; 2151 } 2152 } 2153 2154 return 0; 2155 } 2156 2157 /** 2158 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 2159 * 2160 * @adev: amdgpu_device pointer 2161 * 2162 * First resume function for hardware IPs. The list of all the hardware 2163 * IPs that make up the asic is walked and the resume callbacks are run for 2164 * COMMON, GMC, and IH. resume puts the hardware into a functional state 2165 * after a suspend and updates the software state as necessary. This 2166 * function is also used for restoring the GPU after a GPU reset. 2167 * Returns 0 on success, negative error code on failure. 2168 */ 2169 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 2170 { 2171 int i, r; 2172 2173 for (i = 0; i < adev->num_ip_blocks; i++) { 2174 if (!adev->ip_blocks[i].status.valid) 2175 continue; 2176 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2177 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2178 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2179 r = adev->ip_blocks[i].version->funcs->resume(adev); 2180 if (r) { 2181 DRM_ERROR("resume of IP block <%s> failed %d\n", 2182 adev->ip_blocks[i].version->funcs->name, r); 2183 return r; 2184 } 2185 } 2186 } 2187 2188 return 0; 2189 } 2190 2191 /** 2192 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 2193 * 2194 * @adev: amdgpu_device pointer 2195 * 2196 * First resume function for hardware IPs. The list of all the hardware 2197 * IPs that make up the asic is walked and the resume callbacks are run for 2198 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 2199 * functional state after a suspend and updates the software state as 2200 * necessary. This function is also used for restoring the GPU after a GPU 2201 * reset. 2202 * Returns 0 on success, negative error code on failure. 2203 */ 2204 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 2205 { 2206 int i, r; 2207 2208 for (i = 0; i < adev->num_ip_blocks; i++) { 2209 if (!adev->ip_blocks[i].status.valid) 2210 continue; 2211 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2212 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2213 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 2214 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 2215 continue; 2216 r = adev->ip_blocks[i].version->funcs->resume(adev); 2217 if (r) { 2218 DRM_ERROR("resume of IP block <%s> failed %d\n", 2219 adev->ip_blocks[i].version->funcs->name, r); 2220 return r; 2221 } 2222 } 2223 2224 return 0; 2225 } 2226 2227 /** 2228 * amdgpu_device_ip_resume - run resume for hardware IPs 2229 * 2230 * @adev: amdgpu_device pointer 2231 * 2232 * Main resume function for hardware IPs. The hardware IPs 2233 * are split into two resume functions because they are 2234 * are also used in in recovering from a GPU reset and some additional 2235 * steps need to be take between them. In this case (S3/S4) they are 2236 * run sequentially. 2237 * Returns 0 on success, negative error code on failure. 2238 */ 2239 static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 2240 { 2241 int r; 2242 2243 r = amdgpu_device_ip_resume_phase1(adev); 2244 if (r) 2245 return r; 2246 2247 r = amdgpu_device_fw_loading(adev); 2248 if (r) 2249 return r; 2250 2251 r = amdgpu_device_ip_resume_phase2(adev); 2252 2253 return r; 2254 } 2255 2256 /** 2257 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 2258 * 2259 * @adev: amdgpu_device pointer 2260 * 2261 * Query the VBIOS data tables to determine if the board supports SR-IOV. 2262 */ 2263 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 2264 { 2265 if (amdgpu_sriov_vf(adev)) { 2266 if (adev->is_atom_fw) { 2267 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) 2268 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2269 } else { 2270 if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 2271 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2272 } 2273 2274 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 2275 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 2276 } 2277 } 2278 2279 /** 2280 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 2281 * 2282 * @asic_type: AMD asic type 2283 * 2284 * Check if there is DC (new modesetting infrastructre) support for an asic. 2285 * returns true if DC has support, false if not. 2286 */ 2287 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 2288 { 2289 switch (asic_type) { 2290 #if defined(CONFIG_DRM_AMD_DC) 2291 case CHIP_BONAIRE: 2292 case CHIP_KAVERI: 2293 case CHIP_KABINI: 2294 case CHIP_MULLINS: 2295 /* 2296 * We have systems in the wild with these ASICs that require 2297 * LVDS and VGA support which is not supported with DC. 2298 * 2299 * Fallback to the non-DC driver here by default so as not to 2300 * cause regressions. 2301 */ 2302 return amdgpu_dc > 0; 2303 case CHIP_HAWAII: 2304 case CHIP_CARRIZO: 2305 case CHIP_STONEY: 2306 case CHIP_POLARIS10: 2307 case CHIP_POLARIS11: 2308 case CHIP_POLARIS12: 2309 case CHIP_VEGAM: 2310 case CHIP_TONGA: 2311 case CHIP_FIJI: 2312 case CHIP_VEGA10: 2313 case CHIP_VEGA12: 2314 case CHIP_VEGA20: 2315 #if defined(CONFIG_DRM_AMD_DC_DCN1_0) 2316 case CHIP_RAVEN: 2317 #endif 2318 return amdgpu_dc != 0; 2319 #endif 2320 default: 2321 return false; 2322 } 2323 } 2324 2325 /** 2326 * amdgpu_device_has_dc_support - check if dc is supported 2327 * 2328 * @adev: amdgpu_device_pointer 2329 * 2330 * Returns true for supported, false for not supported 2331 */ 2332 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 2333 { 2334 if (amdgpu_sriov_vf(adev)) 2335 return false; 2336 2337 return amdgpu_device_asic_has_dc_support(adev->asic_type); 2338 } 2339 2340 /** 2341 * amdgpu_device_init - initialize the driver 2342 * 2343 * @adev: amdgpu_device pointer 2344 * @ddev: drm dev pointer 2345 * @pdev: pci dev pointer 2346 * @flags: driver flags 2347 * 2348 * Initializes the driver info and hw (all asics). 2349 * Returns 0 for success or an error on failure. 2350 * Called at driver startup. 2351 */ 2352 int amdgpu_device_init(struct amdgpu_device *adev, 2353 struct drm_device *ddev, 2354 struct pci_dev *pdev, 2355 uint32_t flags) 2356 { 2357 int r, i; 2358 bool runtime = false; 2359 u32 max_MBps; 2360 2361 adev->shutdown = false; 2362 adev->dev = &pdev->dev; 2363 adev->ddev = ddev; 2364 adev->pdev = pdev; 2365 adev->flags = flags; 2366 adev->asic_type = flags & AMD_ASIC_MASK; 2367 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 2368 if (amdgpu_emu_mode == 1) 2369 adev->usec_timeout *= 2; 2370 adev->gmc.gart_size = 512 * 1024 * 1024; 2371 adev->accel_working = false; 2372 adev->num_rings = 0; 2373 adev->mman.buffer_funcs = NULL; 2374 adev->mman.buffer_funcs_ring = NULL; 2375 adev->vm_manager.vm_pte_funcs = NULL; 2376 adev->vm_manager.vm_pte_num_rqs = 0; 2377 adev->gmc.gmc_funcs = NULL; 2378 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2379 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 2380 2381 adev->smc_rreg = &amdgpu_invalid_rreg; 2382 adev->smc_wreg = &amdgpu_invalid_wreg; 2383 adev->pcie_rreg = &amdgpu_invalid_rreg; 2384 adev->pcie_wreg = &amdgpu_invalid_wreg; 2385 adev->pciep_rreg = &amdgpu_invalid_rreg; 2386 adev->pciep_wreg = &amdgpu_invalid_wreg; 2387 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 2388 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 2389 adev->didt_rreg = &amdgpu_invalid_rreg; 2390 adev->didt_wreg = &amdgpu_invalid_wreg; 2391 adev->gc_cac_rreg = &amdgpu_invalid_rreg; 2392 adev->gc_cac_wreg = &amdgpu_invalid_wreg; 2393 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 2394 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 2395 2396 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 2397 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 2398 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 2399 2400 /* mutex initialization are all done here so we 2401 * can recall function without having locking issues */ 2402 atomic_set(&adev->irq.ih.lock, 0); 2403 mutex_init(&adev->firmware.mutex); 2404 mutex_init(&adev->pm.mutex); 2405 mutex_init(&adev->gfx.gpu_clock_mutex); 2406 mutex_init(&adev->srbm_mutex); 2407 mutex_init(&adev->gfx.pipe_reserve_mutex); 2408 mutex_init(&adev->gfx.gfx_off_mutex); 2409 mutex_init(&adev->grbm_idx_mutex); 2410 mutex_init(&adev->mn_lock); 2411 mutex_init(&adev->virt.vf_errors.lock); 2412 hash_init(adev->mn_hash); 2413 mutex_init(&adev->lock_reset); 2414 2415 amdgpu_device_check_arguments(adev); 2416 2417 spin_lock_init(&adev->mmio_idx_lock); 2418 spin_lock_init(&adev->smc_idx_lock); 2419 spin_lock_init(&adev->pcie_idx_lock); 2420 spin_lock_init(&adev->uvd_ctx_idx_lock); 2421 spin_lock_init(&adev->didt_idx_lock); 2422 spin_lock_init(&adev->gc_cac_idx_lock); 2423 spin_lock_init(&adev->se_cac_idx_lock); 2424 spin_lock_init(&adev->audio_endpt_idx_lock); 2425 spin_lock_init(&adev->mm_stats.lock); 2426 2427 INIT_LIST_HEAD(&adev->shadow_list); 2428 mutex_init(&adev->shadow_list_lock); 2429 2430 INIT_LIST_HEAD(&adev->ring_lru_list); 2431 spin_lock_init(&adev->ring_lru_list_lock); 2432 2433 INIT_DELAYED_WORK(&adev->late_init_work, 2434 amdgpu_device_ip_late_init_func_handler); 2435 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 2436 amdgpu_device_delay_enable_gfx_off); 2437 2438 adev->gfx.gfx_off_req_count = 1; 2439 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; 2440 2441 /* Registers mapping */ 2442 /* TODO: block userspace mapping of io register */ 2443 if (adev->asic_type >= CHIP_BONAIRE) { 2444 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 2445 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 2446 } else { 2447 adev->rmmio_base = pci_resource_start(adev->pdev, 2); 2448 adev->rmmio_size = pci_resource_len(adev->pdev, 2); 2449 } 2450 2451 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 2452 if (adev->rmmio == NULL) { 2453 return -ENOMEM; 2454 } 2455 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 2456 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); 2457 2458 /* doorbell bar mapping */ 2459 amdgpu_device_doorbell_init(adev); 2460 2461 /* io port mapping */ 2462 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 2463 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) { 2464 adev->rio_mem_size = pci_resource_len(adev->pdev, i); 2465 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size); 2466 break; 2467 } 2468 } 2469 if (adev->rio_mem == NULL) 2470 DRM_INFO("PCI I/O BAR is not found.\n"); 2471 2472 amdgpu_device_get_pcie_info(adev); 2473 2474 /* early init functions */ 2475 r = amdgpu_device_ip_early_init(adev); 2476 if (r) 2477 return r; 2478 2479 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 2480 /* this will fail for cards that aren't VGA class devices, just 2481 * ignore it */ 2482 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); 2483 2484 if (amdgpu_device_is_px(ddev)) 2485 runtime = true; 2486 if (!pci_is_thunderbolt_attached(adev->pdev)) 2487 vga_switcheroo_register_client(adev->pdev, 2488 &amdgpu_switcheroo_ops, runtime); 2489 if (runtime) 2490 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 2491 2492 if (amdgpu_emu_mode == 1) { 2493 /* post the asic on emulation mode */ 2494 emu_soc_asic_init(adev); 2495 goto fence_driver_init; 2496 } 2497 2498 /* Read BIOS */ 2499 if (!amdgpu_get_bios(adev)) { 2500 r = -EINVAL; 2501 goto failed; 2502 } 2503 2504 r = amdgpu_atombios_init(adev); 2505 if (r) { 2506 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 2507 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 2508 goto failed; 2509 } 2510 2511 /* detect if we are with an SRIOV vbios */ 2512 amdgpu_device_detect_sriov_bios(adev); 2513 2514 /* Post card if necessary */ 2515 if (amdgpu_device_need_post(adev)) { 2516 if (!adev->bios) { 2517 dev_err(adev->dev, "no vBIOS found\n"); 2518 r = -EINVAL; 2519 goto failed; 2520 } 2521 DRM_INFO("GPU posting now...\n"); 2522 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 2523 if (r) { 2524 dev_err(adev->dev, "gpu post error!\n"); 2525 goto failed; 2526 } 2527 } 2528 2529 if (adev->is_atom_fw) { 2530 /* Initialize clocks */ 2531 r = amdgpu_atomfirmware_get_clock_info(adev); 2532 if (r) { 2533 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 2534 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 2535 goto failed; 2536 } 2537 } else { 2538 /* Initialize clocks */ 2539 r = amdgpu_atombios_get_clock_info(adev); 2540 if (r) { 2541 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 2542 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 2543 goto failed; 2544 } 2545 /* init i2c buses */ 2546 if (!amdgpu_device_has_dc_support(adev)) 2547 amdgpu_atombios_i2c_init(adev); 2548 } 2549 2550 fence_driver_init: 2551 /* Fence driver */ 2552 r = amdgpu_fence_driver_init(adev); 2553 if (r) { 2554 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); 2555 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 2556 goto failed; 2557 } 2558 2559 /* init the mode config */ 2560 drm_mode_config_init(adev->ddev); 2561 2562 r = amdgpu_device_ip_init(adev); 2563 if (r) { 2564 /* failed in exclusive mode due to timeout */ 2565 if (amdgpu_sriov_vf(adev) && 2566 !amdgpu_sriov_runtime(adev) && 2567 amdgpu_virt_mmio_blocked(adev) && 2568 !amdgpu_virt_wait_reset(adev)) { 2569 dev_err(adev->dev, "VF exclusive mode timeout\n"); 2570 /* Don't send request since VF is inactive. */ 2571 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 2572 adev->virt.ops = NULL; 2573 r = -EAGAIN; 2574 goto failed; 2575 } 2576 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 2577 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 2578 goto failed; 2579 } 2580 2581 adev->accel_working = true; 2582 2583 amdgpu_vm_check_compute_bug(adev); 2584 2585 /* Initialize the buffer migration limit. */ 2586 if (amdgpu_moverate >= 0) 2587 max_MBps = amdgpu_moverate; 2588 else 2589 max_MBps = 8; /* Allow 8 MB/s. */ 2590 /* Get a log2 for easy divisions. */ 2591 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 2592 2593 r = amdgpu_ib_pool_init(adev); 2594 if (r) { 2595 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2596 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2597 goto failed; 2598 } 2599 2600 if (amdgpu_sriov_vf(adev)) 2601 amdgpu_virt_init_data_exchange(adev); 2602 2603 amdgpu_fbdev_init(adev); 2604 2605 r = amdgpu_pm_sysfs_init(adev); 2606 if (r) 2607 DRM_ERROR("registering pm debugfs failed (%d).\n", r); 2608 2609 r = amdgpu_debugfs_gem_init(adev); 2610 if (r) 2611 DRM_ERROR("registering gem debugfs failed (%d).\n", r); 2612 2613 r = amdgpu_debugfs_regs_init(adev); 2614 if (r) 2615 DRM_ERROR("registering register debugfs failed (%d).\n", r); 2616 2617 r = amdgpu_debugfs_firmware_init(adev); 2618 if (r) 2619 DRM_ERROR("registering firmware debugfs failed (%d).\n", r); 2620 2621 r = amdgpu_debugfs_init(adev); 2622 if (r) 2623 DRM_ERROR("Creating debugfs files failed (%d).\n", r); 2624 2625 if ((amdgpu_testing & 1)) { 2626 if (adev->accel_working) 2627 amdgpu_test_moves(adev); 2628 else 2629 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); 2630 } 2631 if (amdgpu_benchmarking) { 2632 if (adev->accel_working) 2633 amdgpu_benchmark(adev, amdgpu_benchmarking); 2634 else 2635 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); 2636 } 2637 2638 /* enable clockgating, etc. after ib tests, etc. since some blocks require 2639 * explicit gating rather than handling it automatically. 2640 */ 2641 r = amdgpu_device_ip_late_init(adev); 2642 if (r) { 2643 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 2644 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 2645 goto failed; 2646 } 2647 2648 return 0; 2649 2650 failed: 2651 amdgpu_vf_error_trans_all(adev); 2652 if (runtime) 2653 vga_switcheroo_fini_domain_pm_ops(adev->dev); 2654 2655 return r; 2656 } 2657 2658 /** 2659 * amdgpu_device_fini - tear down the driver 2660 * 2661 * @adev: amdgpu_device pointer 2662 * 2663 * Tear down the driver info (all asics). 2664 * Called at driver shutdown. 2665 */ 2666 void amdgpu_device_fini(struct amdgpu_device *adev) 2667 { 2668 int r; 2669 2670 DRM_INFO("amdgpu: finishing device.\n"); 2671 adev->shutdown = true; 2672 /* disable all interrupts */ 2673 amdgpu_irq_disable_all(adev); 2674 if (adev->mode_info.mode_config_initialized){ 2675 if (!amdgpu_device_has_dc_support(adev)) 2676 drm_crtc_force_disable_all(adev->ddev); 2677 else 2678 drm_atomic_helper_shutdown(adev->ddev); 2679 } 2680 amdgpu_ib_pool_fini(adev); 2681 amdgpu_fence_driver_fini(adev); 2682 amdgpu_pm_sysfs_fini(adev); 2683 amdgpu_fbdev_fini(adev); 2684 r = amdgpu_device_ip_fini(adev); 2685 if (adev->firmware.gpu_info_fw) { 2686 release_firmware(adev->firmware.gpu_info_fw); 2687 adev->firmware.gpu_info_fw = NULL; 2688 } 2689 adev->accel_working = false; 2690 cancel_delayed_work_sync(&adev->late_init_work); 2691 /* free i2c buses */ 2692 if (!amdgpu_device_has_dc_support(adev)) 2693 amdgpu_i2c_fini(adev); 2694 2695 if (amdgpu_emu_mode != 1) 2696 amdgpu_atombios_fini(adev); 2697 2698 kfree(adev->bios); 2699 adev->bios = NULL; 2700 if (!pci_is_thunderbolt_attached(adev->pdev)) 2701 vga_switcheroo_unregister_client(adev->pdev); 2702 if (adev->flags & AMD_IS_PX) 2703 vga_switcheroo_fini_domain_pm_ops(adev->dev); 2704 vga_client_register(adev->pdev, NULL, NULL, NULL); 2705 if (adev->rio_mem) 2706 pci_iounmap(adev->pdev, adev->rio_mem); 2707 adev->rio_mem = NULL; 2708 iounmap(adev->rmmio); 2709 adev->rmmio = NULL; 2710 amdgpu_device_doorbell_fini(adev); 2711 amdgpu_debugfs_regs_cleanup(adev); 2712 } 2713 2714 2715 /* 2716 * Suspend & resume. 2717 */ 2718 /** 2719 * amdgpu_device_suspend - initiate device suspend 2720 * 2721 * @dev: drm dev pointer 2722 * @suspend: suspend state 2723 * @fbcon : notify the fbdev of suspend 2724 * 2725 * Puts the hw in the suspend state (all asics). 2726 * Returns 0 for success or an error on failure. 2727 * Called at driver suspend. 2728 */ 2729 int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) 2730 { 2731 struct amdgpu_device *adev; 2732 struct drm_crtc *crtc; 2733 struct drm_connector *connector; 2734 int r; 2735 2736 if (dev == NULL || dev->dev_private == NULL) { 2737 return -ENODEV; 2738 } 2739 2740 adev = dev->dev_private; 2741 2742 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 2743 return 0; 2744 2745 adev->in_suspend = true; 2746 drm_kms_helper_poll_disable(dev); 2747 2748 if (fbcon) 2749 amdgpu_fbdev_set_suspend(adev, 1); 2750 2751 cancel_delayed_work_sync(&adev->late_init_work); 2752 2753 if (!amdgpu_device_has_dc_support(adev)) { 2754 /* turn off display hw */ 2755 drm_modeset_lock_all(dev); 2756 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 2757 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); 2758 } 2759 drm_modeset_unlock_all(dev); 2760 /* unpin the front buffers and cursors */ 2761 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 2762 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2763 struct drm_framebuffer *fb = crtc->primary->fb; 2764 struct amdgpu_bo *robj; 2765 2766 if (amdgpu_crtc->cursor_bo) { 2767 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2768 r = amdgpu_bo_reserve(aobj, true); 2769 if (r == 0) { 2770 amdgpu_bo_unpin(aobj); 2771 amdgpu_bo_unreserve(aobj); 2772 } 2773 } 2774 2775 if (fb == NULL || fb->obj[0] == NULL) { 2776 continue; 2777 } 2778 robj = gem_to_amdgpu_bo(fb->obj[0]); 2779 /* don't unpin kernel fb objects */ 2780 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { 2781 r = amdgpu_bo_reserve(robj, true); 2782 if (r == 0) { 2783 amdgpu_bo_unpin(robj); 2784 amdgpu_bo_unreserve(robj); 2785 } 2786 } 2787 } 2788 } 2789 2790 amdgpu_amdkfd_suspend(adev); 2791 2792 r = amdgpu_device_ip_suspend_phase1(adev); 2793 2794 /* evict vram memory */ 2795 amdgpu_bo_evict_vram(adev); 2796 2797 amdgpu_fence_driver_suspend(adev); 2798 2799 r = amdgpu_device_ip_suspend_phase2(adev); 2800 2801 /* evict remaining vram memory 2802 * This second call to evict vram is to evict the gart page table 2803 * using the CPU. 2804 */ 2805 amdgpu_bo_evict_vram(adev); 2806 2807 pci_save_state(dev->pdev); 2808 if (suspend) { 2809 /* Shut down the device */ 2810 pci_disable_device(dev->pdev); 2811 pci_set_power_state(dev->pdev, PCI_D3hot); 2812 } else { 2813 r = amdgpu_asic_reset(adev); 2814 if (r) 2815 DRM_ERROR("amdgpu asic reset failed\n"); 2816 } 2817 2818 return 0; 2819 } 2820 2821 /** 2822 * amdgpu_device_resume - initiate device resume 2823 * 2824 * @dev: drm dev pointer 2825 * @resume: resume state 2826 * @fbcon : notify the fbdev of resume 2827 * 2828 * Bring the hw back to operating state (all asics). 2829 * Returns 0 for success or an error on failure. 2830 * Called at driver resume. 2831 */ 2832 int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) 2833 { 2834 struct drm_connector *connector; 2835 struct amdgpu_device *adev = dev->dev_private; 2836 struct drm_crtc *crtc; 2837 int r = 0; 2838 2839 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 2840 return 0; 2841 2842 if (resume) { 2843 pci_set_power_state(dev->pdev, PCI_D0); 2844 pci_restore_state(dev->pdev); 2845 r = pci_enable_device(dev->pdev); 2846 if (r) 2847 return r; 2848 } 2849 2850 /* post card */ 2851 if (amdgpu_device_need_post(adev)) { 2852 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 2853 if (r) 2854 DRM_ERROR("amdgpu asic init failed\n"); 2855 } 2856 2857 r = amdgpu_device_ip_resume(adev); 2858 if (r) { 2859 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r); 2860 return r; 2861 } 2862 amdgpu_fence_driver_resume(adev); 2863 2864 2865 r = amdgpu_device_ip_late_init(adev); 2866 if (r) 2867 return r; 2868 2869 if (!amdgpu_device_has_dc_support(adev)) { 2870 /* pin cursors */ 2871 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 2872 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2873 2874 if (amdgpu_crtc->cursor_bo) { 2875 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2876 r = amdgpu_bo_reserve(aobj, true); 2877 if (r == 0) { 2878 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 2879 if (r != 0) 2880 DRM_ERROR("Failed to pin cursor BO (%d)\n", r); 2881 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); 2882 amdgpu_bo_unreserve(aobj); 2883 } 2884 } 2885 } 2886 } 2887 r = amdgpu_amdkfd_resume(adev); 2888 if (r) 2889 return r; 2890 2891 /* Make sure IB tests flushed */ 2892 flush_delayed_work(&adev->late_init_work); 2893 2894 /* blat the mode back in */ 2895 if (fbcon) { 2896 if (!amdgpu_device_has_dc_support(adev)) { 2897 /* pre DCE11 */ 2898 drm_helper_resume_force_mode(dev); 2899 2900 /* turn on display hw */ 2901 drm_modeset_lock_all(dev); 2902 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 2903 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); 2904 } 2905 drm_modeset_unlock_all(dev); 2906 } 2907 amdgpu_fbdev_set_suspend(adev, 0); 2908 } 2909 2910 drm_kms_helper_poll_enable(dev); 2911 2912 /* 2913 * Most of the connector probing functions try to acquire runtime pm 2914 * refs to ensure that the GPU is powered on when connector polling is 2915 * performed. Since we're calling this from a runtime PM callback, 2916 * trying to acquire rpm refs will cause us to deadlock. 2917 * 2918 * Since we're guaranteed to be holding the rpm lock, it's safe to 2919 * temporarily disable the rpm helpers so this doesn't deadlock us. 2920 */ 2921 #ifdef CONFIG_PM 2922 dev->dev->power.disable_depth++; 2923 #endif 2924 if (!amdgpu_device_has_dc_support(adev)) 2925 drm_helper_hpd_irq_event(dev); 2926 else 2927 drm_kms_helper_hotplug_event(dev); 2928 #ifdef CONFIG_PM 2929 dev->dev->power.disable_depth--; 2930 #endif 2931 adev->in_suspend = false; 2932 2933 return 0; 2934 } 2935 2936 /** 2937 * amdgpu_device_ip_check_soft_reset - did soft reset succeed 2938 * 2939 * @adev: amdgpu_device pointer 2940 * 2941 * The list of all the hardware IPs that make up the asic is walked and 2942 * the check_soft_reset callbacks are run. check_soft_reset determines 2943 * if the asic is still hung or not. 2944 * Returns true if any of the IPs are still in a hung state, false if not. 2945 */ 2946 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 2947 { 2948 int i; 2949 bool asic_hang = false; 2950 2951 if (amdgpu_sriov_vf(adev)) 2952 return true; 2953 2954 if (amdgpu_asic_need_full_reset(adev)) 2955 return true; 2956 2957 for (i = 0; i < adev->num_ip_blocks; i++) { 2958 if (!adev->ip_blocks[i].status.valid) 2959 continue; 2960 if (adev->ip_blocks[i].version->funcs->check_soft_reset) 2961 adev->ip_blocks[i].status.hang = 2962 adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 2963 if (adev->ip_blocks[i].status.hang) { 2964 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 2965 asic_hang = true; 2966 } 2967 } 2968 return asic_hang; 2969 } 2970 2971 /** 2972 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 2973 * 2974 * @adev: amdgpu_device pointer 2975 * 2976 * The list of all the hardware IPs that make up the asic is walked and the 2977 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 2978 * handles any IP specific hardware or software state changes that are 2979 * necessary for a soft reset to succeed. 2980 * Returns 0 on success, negative error code on failure. 2981 */ 2982 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 2983 { 2984 int i, r = 0; 2985 2986 for (i = 0; i < adev->num_ip_blocks; i++) { 2987 if (!adev->ip_blocks[i].status.valid) 2988 continue; 2989 if (adev->ip_blocks[i].status.hang && 2990 adev->ip_blocks[i].version->funcs->pre_soft_reset) { 2991 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 2992 if (r) 2993 return r; 2994 } 2995 } 2996 2997 return 0; 2998 } 2999 3000 /** 3001 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 3002 * 3003 * @adev: amdgpu_device pointer 3004 * 3005 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 3006 * reset is necessary to recover. 3007 * Returns true if a full asic reset is required, false if not. 3008 */ 3009 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 3010 { 3011 int i; 3012 3013 if (amdgpu_asic_need_full_reset(adev)) 3014 return true; 3015 3016 for (i = 0; i < adev->num_ip_blocks; i++) { 3017 if (!adev->ip_blocks[i].status.valid) 3018 continue; 3019 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 3020 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 3021 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 3022 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 3023 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 3024 if (adev->ip_blocks[i].status.hang) { 3025 DRM_INFO("Some block need full reset!\n"); 3026 return true; 3027 } 3028 } 3029 } 3030 return false; 3031 } 3032 3033 /** 3034 * amdgpu_device_ip_soft_reset - do a soft reset 3035 * 3036 * @adev: amdgpu_device pointer 3037 * 3038 * The list of all the hardware IPs that make up the asic is walked and the 3039 * soft_reset callbacks are run if the block is hung. soft_reset handles any 3040 * IP specific hardware or software state changes that are necessary to soft 3041 * reset the IP. 3042 * Returns 0 on success, negative error code on failure. 3043 */ 3044 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 3045 { 3046 int i, r = 0; 3047 3048 for (i = 0; i < adev->num_ip_blocks; i++) { 3049 if (!adev->ip_blocks[i].status.valid) 3050 continue; 3051 if (adev->ip_blocks[i].status.hang && 3052 adev->ip_blocks[i].version->funcs->soft_reset) { 3053 r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 3054 if (r) 3055 return r; 3056 } 3057 } 3058 3059 return 0; 3060 } 3061 3062 /** 3063 * amdgpu_device_ip_post_soft_reset - clean up from soft reset 3064 * 3065 * @adev: amdgpu_device pointer 3066 * 3067 * The list of all the hardware IPs that make up the asic is walked and the 3068 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 3069 * handles any IP specific hardware or software state changes that are 3070 * necessary after the IP has been soft reset. 3071 * Returns 0 on success, negative error code on failure. 3072 */ 3073 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 3074 { 3075 int i, r = 0; 3076 3077 for (i = 0; i < adev->num_ip_blocks; i++) { 3078 if (!adev->ip_blocks[i].status.valid) 3079 continue; 3080 if (adev->ip_blocks[i].status.hang && 3081 adev->ip_blocks[i].version->funcs->post_soft_reset) 3082 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 3083 if (r) 3084 return r; 3085 } 3086 3087 return 0; 3088 } 3089 3090 /** 3091 * amdgpu_device_recover_vram - Recover some VRAM contents 3092 * 3093 * @adev: amdgpu_device pointer 3094 * 3095 * Restores the contents of VRAM buffers from the shadows in GTT. Used to 3096 * restore things like GPUVM page tables after a GPU reset where 3097 * the contents of VRAM might be lost. 3098 * 3099 * Returns: 3100 * 0 on success, negative error code on failure. 3101 */ 3102 static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 3103 { 3104 struct dma_fence *fence = NULL, *next = NULL; 3105 struct amdgpu_bo *shadow; 3106 long r = 1, tmo; 3107 3108 if (amdgpu_sriov_runtime(adev)) 3109 tmo = msecs_to_jiffies(8000); 3110 else 3111 tmo = msecs_to_jiffies(100); 3112 3113 DRM_INFO("recover vram bo from shadow start\n"); 3114 mutex_lock(&adev->shadow_list_lock); 3115 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) { 3116 3117 /* No need to recover an evicted BO */ 3118 if (shadow->tbo.mem.mem_type != TTM_PL_TT || 3119 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) 3120 continue; 3121 3122 r = amdgpu_bo_restore_shadow(shadow, &next); 3123 if (r) 3124 break; 3125 3126 if (fence) { 3127 r = dma_fence_wait_timeout(fence, false, tmo); 3128 dma_fence_put(fence); 3129 fence = next; 3130 if (r <= 0) 3131 break; 3132 } else { 3133 fence = next; 3134 } 3135 } 3136 mutex_unlock(&adev->shadow_list_lock); 3137 3138 if (fence) 3139 tmo = dma_fence_wait_timeout(fence, false, tmo); 3140 dma_fence_put(fence); 3141 3142 if (r <= 0 || tmo <= 0) { 3143 DRM_ERROR("recover vram bo from shadow failed\n"); 3144 return -EIO; 3145 } 3146 3147 DRM_INFO("recover vram bo from shadow done\n"); 3148 return 0; 3149 } 3150 3151 /** 3152 * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough 3153 * 3154 * @adev: amdgpu device pointer 3155 * 3156 * attempt to do soft-reset or full-reset and reinitialize Asic 3157 * return 0 means succeeded otherwise failed 3158 */ 3159 static int amdgpu_device_reset(struct amdgpu_device *adev) 3160 { 3161 bool need_full_reset, vram_lost = 0; 3162 int r; 3163 3164 need_full_reset = amdgpu_device_ip_need_full_reset(adev); 3165 3166 if (!need_full_reset) { 3167 amdgpu_device_ip_pre_soft_reset(adev); 3168 r = amdgpu_device_ip_soft_reset(adev); 3169 amdgpu_device_ip_post_soft_reset(adev); 3170 if (r || amdgpu_device_ip_check_soft_reset(adev)) { 3171 DRM_INFO("soft reset failed, will fallback to full reset!\n"); 3172 need_full_reset = true; 3173 } 3174 } 3175 3176 if (need_full_reset) { 3177 r = amdgpu_device_ip_suspend(adev); 3178 3179 retry: 3180 r = amdgpu_asic_reset(adev); 3181 /* post card */ 3182 amdgpu_atom_asic_init(adev->mode_info.atom_context); 3183 3184 if (!r) { 3185 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); 3186 r = amdgpu_device_ip_resume_phase1(adev); 3187 if (r) 3188 goto out; 3189 3190 vram_lost = amdgpu_device_check_vram_lost(adev); 3191 if (vram_lost) { 3192 DRM_ERROR("VRAM is lost!\n"); 3193 atomic_inc(&adev->vram_lost_counter); 3194 } 3195 3196 r = amdgpu_gtt_mgr_recover( 3197 &adev->mman.bdev.man[TTM_PL_TT]); 3198 if (r) 3199 goto out; 3200 3201 r = amdgpu_device_fw_loading(adev); 3202 if (r) 3203 return r; 3204 3205 r = amdgpu_device_ip_resume_phase2(adev); 3206 if (r) 3207 goto out; 3208 3209 if (vram_lost) 3210 amdgpu_device_fill_reset_magic(adev); 3211 } 3212 } 3213 3214 out: 3215 if (!r) { 3216 amdgpu_irq_gpu_reset_resume_helper(adev); 3217 r = amdgpu_ib_ring_tests(adev); 3218 if (r) { 3219 dev_err(adev->dev, "ib ring test failed (%d).\n", r); 3220 r = amdgpu_device_ip_suspend(adev); 3221 need_full_reset = true; 3222 goto retry; 3223 } 3224 } 3225 3226 if (!r) 3227 r = amdgpu_device_recover_vram(adev); 3228 3229 return r; 3230 } 3231 3232 /** 3233 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 3234 * 3235 * @adev: amdgpu device pointer 3236 * @from_hypervisor: request from hypervisor 3237 * 3238 * do VF FLR and reinitialize Asic 3239 * return 0 means succeeded otherwise failed 3240 */ 3241 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 3242 bool from_hypervisor) 3243 { 3244 int r; 3245 3246 if (from_hypervisor) 3247 r = amdgpu_virt_request_full_gpu(adev, true); 3248 else 3249 r = amdgpu_virt_reset_gpu(adev); 3250 if (r) 3251 return r; 3252 3253 /* Resume IP prior to SMC */ 3254 r = amdgpu_device_ip_reinit_early_sriov(adev); 3255 if (r) 3256 goto error; 3257 3258 /* we need recover gart prior to run SMC/CP/SDMA resume */ 3259 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); 3260 3261 r = amdgpu_device_fw_loading(adev); 3262 if (r) 3263 return r; 3264 3265 /* now we are okay to resume SMC/CP/SDMA */ 3266 r = amdgpu_device_ip_reinit_late_sriov(adev); 3267 if (r) 3268 goto error; 3269 3270 amdgpu_irq_gpu_reset_resume_helper(adev); 3271 r = amdgpu_ib_ring_tests(adev); 3272 3273 error: 3274 amdgpu_virt_release_full_gpu(adev, true); 3275 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 3276 atomic_inc(&adev->vram_lost_counter); 3277 r = amdgpu_device_recover_vram(adev); 3278 } 3279 3280 return r; 3281 } 3282 3283 /** 3284 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 3285 * 3286 * @adev: amdgpu device pointer 3287 * 3288 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 3289 * a hung GPU. 3290 */ 3291 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 3292 { 3293 if (!amdgpu_device_ip_check_soft_reset(adev)) { 3294 DRM_INFO("Timeout, but no hardware hang detected.\n"); 3295 return false; 3296 } 3297 3298 if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 && 3299 !amdgpu_sriov_vf(adev))) { 3300 DRM_INFO("GPU recovery disabled.\n"); 3301 return false; 3302 } 3303 3304 return true; 3305 } 3306 3307 /** 3308 * amdgpu_device_gpu_recover - reset the asic and recover scheduler 3309 * 3310 * @adev: amdgpu device pointer 3311 * @job: which job trigger hang 3312 * 3313 * Attempt to reset the GPU if it has hung (all asics). 3314 * Returns 0 for success or an error on failure. 3315 */ 3316 int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 3317 struct amdgpu_job *job) 3318 { 3319 int i, r, resched; 3320 3321 dev_info(adev->dev, "GPU reset begin!\n"); 3322 3323 mutex_lock(&adev->lock_reset); 3324 atomic_inc(&adev->gpu_reset_counter); 3325 adev->in_gpu_reset = 1; 3326 3327 /* Block kfd */ 3328 amdgpu_amdkfd_pre_reset(adev); 3329 3330 /* block TTM */ 3331 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); 3332 3333 /* block all schedulers and reset given job's ring */ 3334 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3335 struct amdgpu_ring *ring = adev->rings[i]; 3336 3337 if (!ring || !ring->sched.thread) 3338 continue; 3339 3340 kthread_park(ring->sched.thread); 3341 3342 if (job && job->base.sched != &ring->sched) 3343 continue; 3344 3345 drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); 3346 3347 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 3348 amdgpu_fence_driver_force_completion(ring); 3349 } 3350 3351 if (amdgpu_sriov_vf(adev)) 3352 r = amdgpu_device_reset_sriov(adev, job ? false : true); 3353 else 3354 r = amdgpu_device_reset(adev); 3355 3356 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3357 struct amdgpu_ring *ring = adev->rings[i]; 3358 3359 if (!ring || !ring->sched.thread) 3360 continue; 3361 3362 /* only need recovery sched of the given job's ring 3363 * or all rings (in the case @job is NULL) 3364 * after above amdgpu_reset accomplished 3365 */ 3366 if ((!job || job->base.sched == &ring->sched) && !r) 3367 drm_sched_job_recovery(&ring->sched); 3368 3369 kthread_unpark(ring->sched.thread); 3370 } 3371 3372 if (!amdgpu_device_has_dc_support(adev)) { 3373 drm_helper_resume_force_mode(adev->ddev); 3374 } 3375 3376 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); 3377 3378 if (r) { 3379 /* bad news, how to tell it to userspace ? */ 3380 dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); 3381 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 3382 } else { 3383 dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter)); 3384 } 3385 3386 /*unlock kfd */ 3387 amdgpu_amdkfd_post_reset(adev); 3388 amdgpu_vf_error_trans_all(adev); 3389 adev->in_gpu_reset = 0; 3390 mutex_unlock(&adev->lock_reset); 3391 return r; 3392 } 3393 3394 /** 3395 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 3396 * 3397 * @adev: amdgpu_device pointer 3398 * 3399 * Fetchs and stores in the driver the PCIE capabilities (gen speed 3400 * and lanes) of the slot the device is in. Handles APUs and 3401 * virtualized environments where PCIE config space may not be available. 3402 */ 3403 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 3404 { 3405 struct pci_dev *pdev; 3406 enum pci_bus_speed speed_cap; 3407 enum pcie_link_width link_width; 3408 3409 if (amdgpu_pcie_gen_cap) 3410 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 3411 3412 if (amdgpu_pcie_lane_cap) 3413 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 3414 3415 /* covers APUs as well */ 3416 if (pci_is_root_bus(adev->pdev->bus)) { 3417 if (adev->pm.pcie_gen_mask == 0) 3418 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 3419 if (adev->pm.pcie_mlw_mask == 0) 3420 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 3421 return; 3422 } 3423 3424 if (adev->pm.pcie_gen_mask == 0) { 3425 /* asic caps */ 3426 pdev = adev->pdev; 3427 speed_cap = pcie_get_speed_cap(pdev); 3428 if (speed_cap == PCI_SPEED_UNKNOWN) { 3429 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3430 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3431 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 3432 } else { 3433 if (speed_cap == PCIE_SPEED_16_0GT) 3434 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3435 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3436 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 3437 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 3438 else if (speed_cap == PCIE_SPEED_8_0GT) 3439 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3440 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3441 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 3442 else if (speed_cap == PCIE_SPEED_5_0GT) 3443 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3444 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 3445 else 3446 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 3447 } 3448 /* platform caps */ 3449 pdev = adev->ddev->pdev->bus->self; 3450 speed_cap = pcie_get_speed_cap(pdev); 3451 if (speed_cap == PCI_SPEED_UNKNOWN) { 3452 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3453 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 3454 } else { 3455 if (speed_cap == PCIE_SPEED_16_0GT) 3456 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3457 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3458 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 3459 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 3460 else if (speed_cap == PCIE_SPEED_8_0GT) 3461 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3462 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3463 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 3464 else if (speed_cap == PCIE_SPEED_5_0GT) 3465 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3466 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 3467 else 3468 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 3469 3470 } 3471 } 3472 if (adev->pm.pcie_mlw_mask == 0) { 3473 pdev = adev->ddev->pdev->bus->self; 3474 link_width = pcie_get_width_cap(pdev); 3475 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { 3476 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 3477 } else { 3478 switch (link_width) { 3479 case PCIE_LNK_X32: 3480 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 3481 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 3482 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3483 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3484 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3485 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3486 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3487 break; 3488 case PCIE_LNK_X16: 3489 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 3490 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3491 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3492 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3493 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3494 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3495 break; 3496 case PCIE_LNK_X12: 3497 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3498 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3499 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3500 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3501 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3502 break; 3503 case PCIE_LNK_X8: 3504 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3505 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3506 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3507 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3508 break; 3509 case PCIE_LNK_X4: 3510 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3511 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3512 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3513 break; 3514 case PCIE_LNK_X2: 3515 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3517 break; 3518 case PCIE_LNK_X1: 3519 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 3520 break; 3521 default: 3522 break; 3523 } 3524 } 3525 } 3526 } 3527 3528