1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/power_supply.h> 29 #include <linux/kthread.h> 30 #include <linux/console.h> 31 #include <linux/slab.h> 32 #include <drm/drmP.h> 33 #include <drm/drm_crtc_helper.h> 34 #include <drm/drm_atomic_helper.h> 35 #include <drm/amdgpu_drm.h> 36 #include <linux/vgaarb.h> 37 #include <linux/vga_switcheroo.h> 38 #include <linux/efi.h> 39 #include "amdgpu.h" 40 #include "amdgpu_trace.h" 41 #include "amdgpu_i2c.h" 42 #include "atom.h" 43 #include "amdgpu_atombios.h" 44 #include "amdgpu_atomfirmware.h" 45 #include "amd_pcie.h" 46 #ifdef CONFIG_DRM_AMDGPU_SI 47 #include "si.h" 48 #endif 49 #ifdef CONFIG_DRM_AMDGPU_CIK 50 #include "cik.h" 51 #endif 52 #include "vi.h" 53 #include "soc15.h" 54 #include "bif/bif_4_1_d.h" 55 #include <linux/pci.h> 56 #include <linux/firmware.h> 57 #include "amdgpu_vf_error.h" 58 59 #include "amdgpu_amdkfd.h" 60 #include "amdgpu_pm.h" 61 62 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 63 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 64 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 65 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 66 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 67 68 #define AMDGPU_RESUME_MS 2000 69 70 static const char *amdgpu_asic_name[] = { 71 "TAHITI", 72 "PITCAIRN", 73 "VERDE", 74 "OLAND", 75 "HAINAN", 76 "BONAIRE", 77 "KAVERI", 78 "KABINI", 79 "HAWAII", 80 "MULLINS", 81 "TOPAZ", 82 "TONGA", 83 "FIJI", 84 "CARRIZO", 85 "STONEY", 86 "POLARIS10", 87 "POLARIS11", 88 "POLARIS12", 89 "VEGAM", 90 "VEGA10", 91 "VEGA12", 92 "VEGA20", 93 "RAVEN", 94 "LAST", 95 }; 96 97 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 98 99 /** 100 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control 101 * 102 * @dev: drm_device pointer 103 * 104 * Returns true if the device is a dGPU with HG/PX power control, 105 * otherwise return false. 106 */ 107 bool amdgpu_device_is_px(struct drm_device *dev) 108 { 109 struct amdgpu_device *adev = dev->dev_private; 110 111 if (adev->flags & AMD_IS_PX) 112 return true; 113 return false; 114 } 115 116 /* 117 * MMIO register access helper functions. 118 */ 119 /** 120 * amdgpu_mm_rreg - read a memory mapped IO register 121 * 122 * @adev: amdgpu_device pointer 123 * @reg: dword aligned register offset 124 * @acc_flags: access flags which require special behavior 125 * 126 * Returns the 32 bit value from the offset specified. 127 */ 128 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, 129 uint32_t acc_flags) 130 { 131 uint32_t ret; 132 133 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 134 return amdgpu_virt_kiq_rreg(adev, reg); 135 136 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) 137 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 138 else { 139 unsigned long flags; 140 141 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 142 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); 143 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); 144 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 145 } 146 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret); 147 return ret; 148 } 149 150 /* 151 * MMIO register read with bytes helper functions 152 * @offset:bytes offset from MMIO start 153 * 154 */ 155 156 /** 157 * amdgpu_mm_rreg8 - read a memory mapped IO register 158 * 159 * @adev: amdgpu_device pointer 160 * @offset: byte aligned register offset 161 * 162 * Returns the 8 bit value from the offset specified. 163 */ 164 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { 165 if (offset < adev->rmmio_size) 166 return (readb(adev->rmmio + offset)); 167 BUG(); 168 } 169 170 /* 171 * MMIO register write with bytes helper functions 172 * @offset:bytes offset from MMIO start 173 * @value: the value want to be written to the register 174 * 175 */ 176 /** 177 * amdgpu_mm_wreg8 - read a memory mapped IO register 178 * 179 * @adev: amdgpu_device pointer 180 * @offset: byte aligned register offset 181 * @value: 8 bit value to write 182 * 183 * Writes the value specified to the offset specified. 184 */ 185 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { 186 if (offset < adev->rmmio_size) 187 writeb(value, adev->rmmio + offset); 188 else 189 BUG(); 190 } 191 192 /** 193 * amdgpu_mm_wreg - write to a memory mapped IO register 194 * 195 * @adev: amdgpu_device pointer 196 * @reg: dword aligned register offset 197 * @v: 32 bit value to write to the register 198 * @acc_flags: access flags which require special behavior 199 * 200 * Writes the value specified to the offset specified. 201 */ 202 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 203 uint32_t acc_flags) 204 { 205 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); 206 207 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 208 adev->last_mm_index = v; 209 } 210 211 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 212 return amdgpu_virt_kiq_wreg(adev, reg, v); 213 214 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) 215 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 216 else { 217 unsigned long flags; 218 219 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 220 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); 221 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); 222 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 223 } 224 225 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { 226 udelay(500); 227 } 228 } 229 230 /** 231 * amdgpu_io_rreg - read an IO register 232 * 233 * @adev: amdgpu_device pointer 234 * @reg: dword aligned register offset 235 * 236 * Returns the 32 bit value from the offset specified. 237 */ 238 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) 239 { 240 if ((reg * 4) < adev->rio_mem_size) 241 return ioread32(adev->rio_mem + (reg * 4)); 242 else { 243 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 244 return ioread32(adev->rio_mem + (mmMM_DATA * 4)); 245 } 246 } 247 248 /** 249 * amdgpu_io_wreg - write to an IO register 250 * 251 * @adev: amdgpu_device pointer 252 * @reg: dword aligned register offset 253 * @v: 32 bit value to write to the register 254 * 255 * Writes the value specified to the offset specified. 256 */ 257 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) 258 { 259 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 260 adev->last_mm_index = v; 261 } 262 263 if ((reg * 4) < adev->rio_mem_size) 264 iowrite32(v, adev->rio_mem + (reg * 4)); 265 else { 266 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 267 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); 268 } 269 270 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { 271 udelay(500); 272 } 273 } 274 275 /** 276 * amdgpu_mm_rdoorbell - read a doorbell dword 277 * 278 * @adev: amdgpu_device pointer 279 * @index: doorbell index 280 * 281 * Returns the value in the doorbell aperture at the 282 * requested doorbell index (CIK). 283 */ 284 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) 285 { 286 if (index < adev->doorbell.num_doorbells) { 287 return readl(adev->doorbell.ptr + index); 288 } else { 289 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 290 return 0; 291 } 292 } 293 294 /** 295 * amdgpu_mm_wdoorbell - write a doorbell dword 296 * 297 * @adev: amdgpu_device pointer 298 * @index: doorbell index 299 * @v: value to write 300 * 301 * Writes @v to the doorbell aperture at the 302 * requested doorbell index (CIK). 303 */ 304 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) 305 { 306 if (index < adev->doorbell.num_doorbells) { 307 writel(v, adev->doorbell.ptr + index); 308 } else { 309 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 310 } 311 } 312 313 /** 314 * amdgpu_mm_rdoorbell64 - read a doorbell Qword 315 * 316 * @adev: amdgpu_device pointer 317 * @index: doorbell index 318 * 319 * Returns the value in the doorbell aperture at the 320 * requested doorbell index (VEGA10+). 321 */ 322 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) 323 { 324 if (index < adev->doorbell.num_doorbells) { 325 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); 326 } else { 327 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 328 return 0; 329 } 330 } 331 332 /** 333 * amdgpu_mm_wdoorbell64 - write a doorbell Qword 334 * 335 * @adev: amdgpu_device pointer 336 * @index: doorbell index 337 * @v: value to write 338 * 339 * Writes @v to the doorbell aperture at the 340 * requested doorbell index (VEGA10+). 341 */ 342 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) 343 { 344 if (index < adev->doorbell.num_doorbells) { 345 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); 346 } else { 347 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 348 } 349 } 350 351 /** 352 * amdgpu_invalid_rreg - dummy reg read function 353 * 354 * @adev: amdgpu device pointer 355 * @reg: offset of register 356 * 357 * Dummy register read function. Used for register blocks 358 * that certain asics don't have (all asics). 359 * Returns the value in the register. 360 */ 361 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 362 { 363 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 364 BUG(); 365 return 0; 366 } 367 368 /** 369 * amdgpu_invalid_wreg - dummy reg write function 370 * 371 * @adev: amdgpu device pointer 372 * @reg: offset of register 373 * @v: value to write to the register 374 * 375 * Dummy register read function. Used for register blocks 376 * that certain asics don't have (all asics). 377 */ 378 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 379 { 380 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 381 reg, v); 382 BUG(); 383 } 384 385 /** 386 * amdgpu_block_invalid_rreg - dummy reg read function 387 * 388 * @adev: amdgpu device pointer 389 * @block: offset of instance 390 * @reg: offset of register 391 * 392 * Dummy register read function. Used for register blocks 393 * that certain asics don't have (all asics). 394 * Returns the value in the register. 395 */ 396 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 397 uint32_t block, uint32_t reg) 398 { 399 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 400 reg, block); 401 BUG(); 402 return 0; 403 } 404 405 /** 406 * amdgpu_block_invalid_wreg - dummy reg write function 407 * 408 * @adev: amdgpu device pointer 409 * @block: offset of instance 410 * @reg: offset of register 411 * @v: value to write to the register 412 * 413 * Dummy register read function. Used for register blocks 414 * that certain asics don't have (all asics). 415 */ 416 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 417 uint32_t block, 418 uint32_t reg, uint32_t v) 419 { 420 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 421 reg, block, v); 422 BUG(); 423 } 424 425 /** 426 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page 427 * 428 * @adev: amdgpu device pointer 429 * 430 * Allocates a scratch page of VRAM for use by various things in the 431 * driver. 432 */ 433 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) 434 { 435 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, 436 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 437 &adev->vram_scratch.robj, 438 &adev->vram_scratch.gpu_addr, 439 (void **)&adev->vram_scratch.ptr); 440 } 441 442 /** 443 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page 444 * 445 * @adev: amdgpu device pointer 446 * 447 * Frees the VRAM scratch page. 448 */ 449 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) 450 { 451 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); 452 } 453 454 /** 455 * amdgpu_device_program_register_sequence - program an array of registers. 456 * 457 * @adev: amdgpu_device pointer 458 * @registers: pointer to the register array 459 * @array_size: size of the register array 460 * 461 * Programs an array or registers with and and or masks. 462 * This is a helper for setting golden registers. 463 */ 464 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 465 const u32 *registers, 466 const u32 array_size) 467 { 468 u32 tmp, reg, and_mask, or_mask; 469 int i; 470 471 if (array_size % 3) 472 return; 473 474 for (i = 0; i < array_size; i +=3) { 475 reg = registers[i + 0]; 476 and_mask = registers[i + 1]; 477 or_mask = registers[i + 2]; 478 479 if (and_mask == 0xffffffff) { 480 tmp = or_mask; 481 } else { 482 tmp = RREG32(reg); 483 tmp &= ~and_mask; 484 tmp |= or_mask; 485 } 486 WREG32(reg, tmp); 487 } 488 } 489 490 /** 491 * amdgpu_device_pci_config_reset - reset the GPU 492 * 493 * @adev: amdgpu_device pointer 494 * 495 * Resets the GPU using the pci config reset sequence. 496 * Only applicable to asics prior to vega10. 497 */ 498 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 499 { 500 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 501 } 502 503 /* 504 * GPU doorbell aperture helpers function. 505 */ 506 /** 507 * amdgpu_device_doorbell_init - Init doorbell driver information. 508 * 509 * @adev: amdgpu_device pointer 510 * 511 * Init doorbell driver information (CIK) 512 * Returns 0 on success, error on failure. 513 */ 514 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) 515 { 516 /* No doorbell on SI hardware generation */ 517 if (adev->asic_type < CHIP_BONAIRE) { 518 adev->doorbell.base = 0; 519 adev->doorbell.size = 0; 520 adev->doorbell.num_doorbells = 0; 521 adev->doorbell.ptr = NULL; 522 return 0; 523 } 524 525 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) 526 return -EINVAL; 527 528 /* doorbell bar mapping */ 529 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 530 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 531 532 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), 533 AMDGPU_DOORBELL_MAX_ASSIGNMENT+1); 534 if (adev->doorbell.num_doorbells == 0) 535 return -EINVAL; 536 537 adev->doorbell.ptr = ioremap(adev->doorbell.base, 538 adev->doorbell.num_doorbells * 539 sizeof(u32)); 540 if (adev->doorbell.ptr == NULL) 541 return -ENOMEM; 542 543 return 0; 544 } 545 546 /** 547 * amdgpu_device_doorbell_fini - Tear down doorbell driver information. 548 * 549 * @adev: amdgpu_device pointer 550 * 551 * Tear down doorbell driver information (CIK) 552 */ 553 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) 554 { 555 iounmap(adev->doorbell.ptr); 556 adev->doorbell.ptr = NULL; 557 } 558 559 560 561 /* 562 * amdgpu_device_wb_*() 563 * Writeback is the method by which the GPU updates special pages in memory 564 * with the status of certain GPU events (fences, ring pointers,etc.). 565 */ 566 567 /** 568 * amdgpu_device_wb_fini - Disable Writeback and free memory 569 * 570 * @adev: amdgpu_device pointer 571 * 572 * Disables Writeback and frees the Writeback memory (all asics). 573 * Used at driver shutdown. 574 */ 575 static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 576 { 577 if (adev->wb.wb_obj) { 578 amdgpu_bo_free_kernel(&adev->wb.wb_obj, 579 &adev->wb.gpu_addr, 580 (void **)&adev->wb.wb); 581 adev->wb.wb_obj = NULL; 582 } 583 } 584 585 /** 586 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory 587 * 588 * @adev: amdgpu_device pointer 589 * 590 * Initializes writeback and allocates writeback memory (all asics). 591 * Used at driver startup. 592 * Returns 0 on success or an -error on failure. 593 */ 594 static int amdgpu_device_wb_init(struct amdgpu_device *adev) 595 { 596 int r; 597 598 if (adev->wb.wb_obj == NULL) { 599 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 600 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 601 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 602 &adev->wb.wb_obj, &adev->wb.gpu_addr, 603 (void **)&adev->wb.wb); 604 if (r) { 605 dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 606 return r; 607 } 608 609 adev->wb.num_wb = AMDGPU_MAX_WB; 610 memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 611 612 /* clear wb memory */ 613 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 614 } 615 616 return 0; 617 } 618 619 /** 620 * amdgpu_device_wb_get - Allocate a wb entry 621 * 622 * @adev: amdgpu_device pointer 623 * @wb: wb index 624 * 625 * Allocate a wb slot for use by the driver (all asics). 626 * Returns 0 on success or -EINVAL on failure. 627 */ 628 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 629 { 630 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 631 632 if (offset < adev->wb.num_wb) { 633 __set_bit(offset, adev->wb.used); 634 *wb = offset << 3; /* convert to dw offset */ 635 return 0; 636 } else { 637 return -EINVAL; 638 } 639 } 640 641 /** 642 * amdgpu_device_wb_free - Free a wb entry 643 * 644 * @adev: amdgpu_device pointer 645 * @wb: wb index 646 * 647 * Free a wb slot allocated for use by the driver (all asics) 648 */ 649 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 650 { 651 wb >>= 3; 652 if (wb < adev->wb.num_wb) 653 __clear_bit(wb, adev->wb.used); 654 } 655 656 /** 657 * amdgpu_device_resize_fb_bar - try to resize FB BAR 658 * 659 * @adev: amdgpu_device pointer 660 * 661 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 662 * to fail, but if any of the BARs is not accessible after the size we abort 663 * driver loading by returning -ENODEV. 664 */ 665 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 666 { 667 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size); 668 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; 669 struct pci_bus *root; 670 struct resource *res; 671 unsigned i; 672 u16 cmd; 673 int r; 674 675 /* Bypass for VF */ 676 if (amdgpu_sriov_vf(adev)) 677 return 0; 678 679 /* Check if the root BUS has 64bit memory resources */ 680 root = adev->pdev->bus; 681 while (root->parent) 682 root = root->parent; 683 684 pci_bus_for_each_resource(root, res, i) { 685 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 686 res->start > 0x100000000ull) 687 break; 688 } 689 690 /* Trying to resize is pointless without a root hub window above 4GB */ 691 if (!res) 692 return 0; 693 694 /* Disable memory decoding while we change the BAR addresses and size */ 695 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 696 pci_write_config_word(adev->pdev, PCI_COMMAND, 697 cmd & ~PCI_COMMAND_MEMORY); 698 699 /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 700 amdgpu_device_doorbell_fini(adev); 701 if (adev->asic_type >= CHIP_BONAIRE) 702 pci_release_resource(adev->pdev, 2); 703 704 pci_release_resource(adev->pdev, 0); 705 706 r = pci_resize_resource(adev->pdev, 0, rbar_size); 707 if (r == -ENOSPC) 708 DRM_INFO("Not enough PCI address space for a large BAR."); 709 else if (r && r != -ENOTSUPP) 710 DRM_ERROR("Problem resizing BAR0 (%d).", r); 711 712 pci_assign_unassigned_bus_resources(adev->pdev->bus); 713 714 /* When the doorbell or fb BAR isn't available we have no chance of 715 * using the device. 716 */ 717 r = amdgpu_device_doorbell_init(adev); 718 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 719 return -ENODEV; 720 721 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 722 723 return 0; 724 } 725 726 /* 727 * GPU helpers function. 728 */ 729 /** 730 * amdgpu_device_need_post - check if the hw need post or not 731 * 732 * @adev: amdgpu_device pointer 733 * 734 * Check if the asic has been initialized (all asics) at driver startup 735 * or post is needed if hw reset is performed. 736 * Returns true if need or false if not. 737 */ 738 bool amdgpu_device_need_post(struct amdgpu_device *adev) 739 { 740 uint32_t reg; 741 742 if (amdgpu_sriov_vf(adev)) 743 return false; 744 745 if (amdgpu_passthrough(adev)) { 746 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 747 * some old smc fw still need driver do vPost otherwise gpu hang, while 748 * those smc fw version above 22.15 doesn't have this flaw, so we force 749 * vpost executed for smc version below 22.15 750 */ 751 if (adev->asic_type == CHIP_FIJI) { 752 int err; 753 uint32_t fw_ver; 754 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 755 /* force vPost if error occured */ 756 if (err) 757 return true; 758 759 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 760 if (fw_ver < 0x00160e00) 761 return true; 762 } 763 } 764 765 if (adev->has_hw_reset) { 766 adev->has_hw_reset = false; 767 return true; 768 } 769 770 /* bios scratch used on CIK+ */ 771 if (adev->asic_type >= CHIP_BONAIRE) 772 return amdgpu_atombios_scratch_need_asic_init(adev); 773 774 /* check MEM_SIZE for older asics */ 775 reg = amdgpu_asic_get_config_memsize(adev); 776 777 if ((reg != 0) && (reg != 0xffffffff)) 778 return false; 779 780 return true; 781 } 782 783 /* if we get transitioned to only one device, take VGA back */ 784 /** 785 * amdgpu_device_vga_set_decode - enable/disable vga decode 786 * 787 * @cookie: amdgpu_device pointer 788 * @state: enable/disable vga decode 789 * 790 * Enable/disable vga decode (all asics). 791 * Returns VGA resource flags. 792 */ 793 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) 794 { 795 struct amdgpu_device *adev = cookie; 796 amdgpu_asic_set_vga_state(adev, state); 797 if (state) 798 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 799 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 800 else 801 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 802 } 803 804 /** 805 * amdgpu_device_check_block_size - validate the vm block size 806 * 807 * @adev: amdgpu_device pointer 808 * 809 * Validates the vm block size specified via module parameter. 810 * The vm block size defines number of bits in page table versus page directory, 811 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 812 * page table and the remaining bits are in the page directory. 813 */ 814 static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 815 { 816 /* defines number of bits in page table versus page directory, 817 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 818 * page table and the remaining bits are in the page directory */ 819 if (amdgpu_vm_block_size == -1) 820 return; 821 822 if (amdgpu_vm_block_size < 9) { 823 dev_warn(adev->dev, "VM page table size (%d) too small\n", 824 amdgpu_vm_block_size); 825 amdgpu_vm_block_size = -1; 826 } 827 } 828 829 /** 830 * amdgpu_device_check_vm_size - validate the vm size 831 * 832 * @adev: amdgpu_device pointer 833 * 834 * Validates the vm size in GB specified via module parameter. 835 * The VM size is the size of the GPU virtual memory space in GB. 836 */ 837 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 838 { 839 /* no need to check the default value */ 840 if (amdgpu_vm_size == -1) 841 return; 842 843 if (amdgpu_vm_size < 1) { 844 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 845 amdgpu_vm_size); 846 amdgpu_vm_size = -1; 847 } 848 } 849 850 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 851 { 852 struct sysinfo si; 853 bool is_os_64 = (sizeof(void *) == 8) ? true : false; 854 uint64_t total_memory; 855 uint64_t dram_size_seven_GB = 0x1B8000000; 856 uint64_t dram_size_three_GB = 0xB8000000; 857 858 if (amdgpu_smu_memory_pool_size == 0) 859 return; 860 861 if (!is_os_64) { 862 DRM_WARN("Not 64-bit OS, feature not supported\n"); 863 goto def_value; 864 } 865 si_meminfo(&si); 866 total_memory = (uint64_t)si.totalram * si.mem_unit; 867 868 if ((amdgpu_smu_memory_pool_size == 1) || 869 (amdgpu_smu_memory_pool_size == 2)) { 870 if (total_memory < dram_size_three_GB) 871 goto def_value1; 872 } else if ((amdgpu_smu_memory_pool_size == 4) || 873 (amdgpu_smu_memory_pool_size == 8)) { 874 if (total_memory < dram_size_seven_GB) 875 goto def_value1; 876 } else { 877 DRM_WARN("Smu memory pool size not supported\n"); 878 goto def_value; 879 } 880 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 881 882 return; 883 884 def_value1: 885 DRM_WARN("No enough system memory\n"); 886 def_value: 887 adev->pm.smu_prv_buffer_size = 0; 888 } 889 890 /** 891 * amdgpu_device_check_arguments - validate module params 892 * 893 * @adev: amdgpu_device pointer 894 * 895 * Validates certain module parameters and updates 896 * the associated values used by the driver (all asics). 897 */ 898 static void amdgpu_device_check_arguments(struct amdgpu_device *adev) 899 { 900 if (amdgpu_sched_jobs < 4) { 901 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 902 amdgpu_sched_jobs); 903 amdgpu_sched_jobs = 4; 904 } else if (!is_power_of_2(amdgpu_sched_jobs)){ 905 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 906 amdgpu_sched_jobs); 907 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 908 } 909 910 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 911 /* gart size must be greater or equal to 32M */ 912 dev_warn(adev->dev, "gart size (%d) too small\n", 913 amdgpu_gart_size); 914 amdgpu_gart_size = -1; 915 } 916 917 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 918 /* gtt size must be greater or equal to 32M */ 919 dev_warn(adev->dev, "gtt size (%d) too small\n", 920 amdgpu_gtt_size); 921 amdgpu_gtt_size = -1; 922 } 923 924 /* valid range is between 4 and 9 inclusive */ 925 if (amdgpu_vm_fragment_size != -1 && 926 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 927 dev_warn(adev->dev, "valid range is between 4 and 9\n"); 928 amdgpu_vm_fragment_size = -1; 929 } 930 931 amdgpu_device_check_smu_prv_buffer_size(adev); 932 933 amdgpu_device_check_vm_size(adev); 934 935 amdgpu_device_check_block_size(adev); 936 937 if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 || 938 !is_power_of_2(amdgpu_vram_page_split))) { 939 dev_warn(adev->dev, "invalid VRAM page split (%d)\n", 940 amdgpu_vram_page_split); 941 amdgpu_vram_page_split = 1024; 942 } 943 944 if (amdgpu_lockup_timeout == 0) { 945 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); 946 amdgpu_lockup_timeout = 10000; 947 } 948 949 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 950 } 951 952 /** 953 * amdgpu_switcheroo_set_state - set switcheroo state 954 * 955 * @pdev: pci dev pointer 956 * @state: vga_switcheroo state 957 * 958 * Callback for the switcheroo driver. Suspends or resumes the 959 * the asics before or after it is powered up using ACPI methods. 960 */ 961 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) 962 { 963 struct drm_device *dev = pci_get_drvdata(pdev); 964 965 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF) 966 return; 967 968 if (state == VGA_SWITCHEROO_ON) { 969 pr_info("amdgpu: switched on\n"); 970 /* don't suspend or resume card normally */ 971 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 972 973 amdgpu_device_resume(dev, true, true); 974 975 dev->switch_power_state = DRM_SWITCH_POWER_ON; 976 drm_kms_helper_poll_enable(dev); 977 } else { 978 pr_info("amdgpu: switched off\n"); 979 drm_kms_helper_poll_disable(dev); 980 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 981 amdgpu_device_suspend(dev, true, true); 982 dev->switch_power_state = DRM_SWITCH_POWER_OFF; 983 } 984 } 985 986 /** 987 * amdgpu_switcheroo_can_switch - see if switcheroo state can change 988 * 989 * @pdev: pci dev pointer 990 * 991 * Callback for the switcheroo driver. Check of the switcheroo 992 * state can be changed. 993 * Returns true if the state can be changed, false if not. 994 */ 995 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 996 { 997 struct drm_device *dev = pci_get_drvdata(pdev); 998 999 /* 1000 * FIXME: open_count is protected by drm_global_mutex but that would lead to 1001 * locking inversion with the driver load path. And the access here is 1002 * completely racy anyway. So don't bother with locking for now. 1003 */ 1004 return dev->open_count == 0; 1005 } 1006 1007 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 1008 .set_gpu_state = amdgpu_switcheroo_set_state, 1009 .reprobe = NULL, 1010 .can_switch = amdgpu_switcheroo_can_switch, 1011 }; 1012 1013 /** 1014 * amdgpu_device_ip_set_clockgating_state - set the CG state 1015 * 1016 * @dev: amdgpu_device pointer 1017 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1018 * @state: clockgating state (gate or ungate) 1019 * 1020 * Sets the requested clockgating state for all instances of 1021 * the hardware IP specified. 1022 * Returns the error code from the last instance. 1023 */ 1024 int amdgpu_device_ip_set_clockgating_state(void *dev, 1025 enum amd_ip_block_type block_type, 1026 enum amd_clockgating_state state) 1027 { 1028 struct amdgpu_device *adev = dev; 1029 int i, r = 0; 1030 1031 for (i = 0; i < adev->num_ip_blocks; i++) { 1032 if (!adev->ip_blocks[i].status.valid) 1033 continue; 1034 if (adev->ip_blocks[i].version->type != block_type) 1035 continue; 1036 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1037 continue; 1038 r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1039 (void *)adev, state); 1040 if (r) 1041 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1042 adev->ip_blocks[i].version->funcs->name, r); 1043 } 1044 return r; 1045 } 1046 1047 /** 1048 * amdgpu_device_ip_set_powergating_state - set the PG state 1049 * 1050 * @dev: amdgpu_device pointer 1051 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1052 * @state: powergating state (gate or ungate) 1053 * 1054 * Sets the requested powergating state for all instances of 1055 * the hardware IP specified. 1056 * Returns the error code from the last instance. 1057 */ 1058 int amdgpu_device_ip_set_powergating_state(void *dev, 1059 enum amd_ip_block_type block_type, 1060 enum amd_powergating_state state) 1061 { 1062 struct amdgpu_device *adev = dev; 1063 int i, r = 0; 1064 1065 for (i = 0; i < adev->num_ip_blocks; i++) { 1066 if (!adev->ip_blocks[i].status.valid) 1067 continue; 1068 if (adev->ip_blocks[i].version->type != block_type) 1069 continue; 1070 if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1071 continue; 1072 r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1073 (void *)adev, state); 1074 if (r) 1075 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1076 adev->ip_blocks[i].version->funcs->name, r); 1077 } 1078 return r; 1079 } 1080 1081 /** 1082 * amdgpu_device_ip_get_clockgating_state - get the CG state 1083 * 1084 * @adev: amdgpu_device pointer 1085 * @flags: clockgating feature flags 1086 * 1087 * Walks the list of IPs on the device and updates the clockgating 1088 * flags for each IP. 1089 * Updates @flags with the feature flags for each hardware IP where 1090 * clockgating is enabled. 1091 */ 1092 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1093 u32 *flags) 1094 { 1095 int i; 1096 1097 for (i = 0; i < adev->num_ip_blocks; i++) { 1098 if (!adev->ip_blocks[i].status.valid) 1099 continue; 1100 if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1101 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1102 } 1103 } 1104 1105 /** 1106 * amdgpu_device_ip_wait_for_idle - wait for idle 1107 * 1108 * @adev: amdgpu_device pointer 1109 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1110 * 1111 * Waits for the request hardware IP to be idle. 1112 * Returns 0 for success or a negative error code on failure. 1113 */ 1114 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1115 enum amd_ip_block_type block_type) 1116 { 1117 int i, r; 1118 1119 for (i = 0; i < adev->num_ip_blocks; i++) { 1120 if (!adev->ip_blocks[i].status.valid) 1121 continue; 1122 if (adev->ip_blocks[i].version->type == block_type) { 1123 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1124 if (r) 1125 return r; 1126 break; 1127 } 1128 } 1129 return 0; 1130 1131 } 1132 1133 /** 1134 * amdgpu_device_ip_is_idle - is the hardware IP idle 1135 * 1136 * @adev: amdgpu_device pointer 1137 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1138 * 1139 * Check if the hardware IP is idle or not. 1140 * Returns true if it the IP is idle, false if not. 1141 */ 1142 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1143 enum amd_ip_block_type block_type) 1144 { 1145 int i; 1146 1147 for (i = 0; i < adev->num_ip_blocks; i++) { 1148 if (!adev->ip_blocks[i].status.valid) 1149 continue; 1150 if (adev->ip_blocks[i].version->type == block_type) 1151 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1152 } 1153 return true; 1154 1155 } 1156 1157 /** 1158 * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1159 * 1160 * @adev: amdgpu_device pointer 1161 * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1162 * 1163 * Returns a pointer to the hardware IP block structure 1164 * if it exists for the asic, otherwise NULL. 1165 */ 1166 struct amdgpu_ip_block * 1167 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1168 enum amd_ip_block_type type) 1169 { 1170 int i; 1171 1172 for (i = 0; i < adev->num_ip_blocks; i++) 1173 if (adev->ip_blocks[i].version->type == type) 1174 return &adev->ip_blocks[i]; 1175 1176 return NULL; 1177 } 1178 1179 /** 1180 * amdgpu_device_ip_block_version_cmp 1181 * 1182 * @adev: amdgpu_device pointer 1183 * @type: enum amd_ip_block_type 1184 * @major: major version 1185 * @minor: minor version 1186 * 1187 * return 0 if equal or greater 1188 * return 1 if smaller or the ip_block doesn't exist 1189 */ 1190 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1191 enum amd_ip_block_type type, 1192 u32 major, u32 minor) 1193 { 1194 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1195 1196 if (ip_block && ((ip_block->version->major > major) || 1197 ((ip_block->version->major == major) && 1198 (ip_block->version->minor >= minor)))) 1199 return 0; 1200 1201 return 1; 1202 } 1203 1204 /** 1205 * amdgpu_device_ip_block_add 1206 * 1207 * @adev: amdgpu_device pointer 1208 * @ip_block_version: pointer to the IP to add 1209 * 1210 * Adds the IP block driver information to the collection of IPs 1211 * on the asic. 1212 */ 1213 int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1214 const struct amdgpu_ip_block_version *ip_block_version) 1215 { 1216 if (!ip_block_version) 1217 return -EINVAL; 1218 1219 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1220 ip_block_version->funcs->name); 1221 1222 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1223 1224 return 0; 1225 } 1226 1227 /** 1228 * amdgpu_device_enable_virtual_display - enable virtual display feature 1229 * 1230 * @adev: amdgpu_device pointer 1231 * 1232 * Enabled the virtual display feature if the user has enabled it via 1233 * the module parameter virtual_display. This feature provides a virtual 1234 * display hardware on headless boards or in virtualized environments. 1235 * This function parses and validates the configuration string specified by 1236 * the user and configues the virtual display configuration (number of 1237 * virtual connectors, crtcs, etc.) specified. 1238 */ 1239 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1240 { 1241 adev->enable_virtual_display = false; 1242 1243 if (amdgpu_virtual_display) { 1244 struct drm_device *ddev = adev->ddev; 1245 const char *pci_address_name = pci_name(ddev->pdev); 1246 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 1247 1248 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 1249 pciaddstr_tmp = pciaddstr; 1250 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 1251 pciaddname = strsep(&pciaddname_tmp, ","); 1252 if (!strcmp("all", pciaddname) 1253 || !strcmp(pci_address_name, pciaddname)) { 1254 long num_crtc; 1255 int res = -1; 1256 1257 adev->enable_virtual_display = true; 1258 1259 if (pciaddname_tmp) 1260 res = kstrtol(pciaddname_tmp, 10, 1261 &num_crtc); 1262 1263 if (!res) { 1264 if (num_crtc < 1) 1265 num_crtc = 1; 1266 if (num_crtc > 6) 1267 num_crtc = 6; 1268 adev->mode_info.num_crtc = num_crtc; 1269 } else { 1270 adev->mode_info.num_crtc = 1; 1271 } 1272 break; 1273 } 1274 } 1275 1276 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 1277 amdgpu_virtual_display, pci_address_name, 1278 adev->enable_virtual_display, adev->mode_info.num_crtc); 1279 1280 kfree(pciaddstr); 1281 } 1282 } 1283 1284 /** 1285 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 1286 * 1287 * @adev: amdgpu_device pointer 1288 * 1289 * Parses the asic configuration parameters specified in the gpu info 1290 * firmware and makes them availale to the driver for use in configuring 1291 * the asic. 1292 * Returns 0 on success, -EINVAL on failure. 1293 */ 1294 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1295 { 1296 const char *chip_name; 1297 char fw_name[30]; 1298 int err; 1299 const struct gpu_info_firmware_header_v1_0 *hdr; 1300 1301 adev->firmware.gpu_info_fw = NULL; 1302 1303 switch (adev->asic_type) { 1304 case CHIP_TOPAZ: 1305 case CHIP_TONGA: 1306 case CHIP_FIJI: 1307 case CHIP_POLARIS10: 1308 case CHIP_POLARIS11: 1309 case CHIP_POLARIS12: 1310 case CHIP_VEGAM: 1311 case CHIP_CARRIZO: 1312 case CHIP_STONEY: 1313 #ifdef CONFIG_DRM_AMDGPU_SI 1314 case CHIP_VERDE: 1315 case CHIP_TAHITI: 1316 case CHIP_PITCAIRN: 1317 case CHIP_OLAND: 1318 case CHIP_HAINAN: 1319 #endif 1320 #ifdef CONFIG_DRM_AMDGPU_CIK 1321 case CHIP_BONAIRE: 1322 case CHIP_HAWAII: 1323 case CHIP_KAVERI: 1324 case CHIP_KABINI: 1325 case CHIP_MULLINS: 1326 #endif 1327 case CHIP_VEGA20: 1328 default: 1329 return 0; 1330 case CHIP_VEGA10: 1331 chip_name = "vega10"; 1332 break; 1333 case CHIP_VEGA12: 1334 chip_name = "vega12"; 1335 break; 1336 case CHIP_RAVEN: 1337 if (adev->rev_id >= 8) 1338 chip_name = "raven2"; 1339 else if (adev->pdev->device == 0x15d8) 1340 chip_name = "picasso"; 1341 else 1342 chip_name = "raven"; 1343 break; 1344 } 1345 1346 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 1347 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); 1348 if (err) { 1349 dev_err(adev->dev, 1350 "Failed to load gpu_info firmware \"%s\"\n", 1351 fw_name); 1352 goto out; 1353 } 1354 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); 1355 if (err) { 1356 dev_err(adev->dev, 1357 "Failed to validate gpu_info firmware \"%s\"\n", 1358 fw_name); 1359 goto out; 1360 } 1361 1362 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 1363 amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 1364 1365 switch (hdr->version_major) { 1366 case 1: 1367 { 1368 const struct gpu_info_firmware_v1_0 *gpu_info_fw = 1369 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 1370 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1371 1372 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 1373 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 1374 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 1375 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 1376 adev->gfx.config.max_texture_channel_caches = 1377 le32_to_cpu(gpu_info_fw->gc_num_tccs); 1378 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 1379 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 1380 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 1381 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 1382 adev->gfx.config.double_offchip_lds_buf = 1383 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 1384 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 1385 adev->gfx.cu_info.max_waves_per_simd = 1386 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 1387 adev->gfx.cu_info.max_scratch_slots_per_cu = 1388 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 1389 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 1390 break; 1391 } 1392 default: 1393 dev_err(adev->dev, 1394 "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 1395 err = -EINVAL; 1396 goto out; 1397 } 1398 out: 1399 return err; 1400 } 1401 1402 /** 1403 * amdgpu_device_ip_early_init - run early init for hardware IPs 1404 * 1405 * @adev: amdgpu_device pointer 1406 * 1407 * Early initialization pass for hardware IPs. The hardware IPs that make 1408 * up each asic are discovered each IP's early_init callback is run. This 1409 * is the first stage in initializing the asic. 1410 * Returns 0 on success, negative error code on failure. 1411 */ 1412 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 1413 { 1414 int i, r; 1415 1416 amdgpu_device_enable_virtual_display(adev); 1417 1418 switch (adev->asic_type) { 1419 case CHIP_TOPAZ: 1420 case CHIP_TONGA: 1421 case CHIP_FIJI: 1422 case CHIP_POLARIS10: 1423 case CHIP_POLARIS11: 1424 case CHIP_POLARIS12: 1425 case CHIP_VEGAM: 1426 case CHIP_CARRIZO: 1427 case CHIP_STONEY: 1428 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) 1429 adev->family = AMDGPU_FAMILY_CZ; 1430 else 1431 adev->family = AMDGPU_FAMILY_VI; 1432 1433 r = vi_set_ip_blocks(adev); 1434 if (r) 1435 return r; 1436 break; 1437 #ifdef CONFIG_DRM_AMDGPU_SI 1438 case CHIP_VERDE: 1439 case CHIP_TAHITI: 1440 case CHIP_PITCAIRN: 1441 case CHIP_OLAND: 1442 case CHIP_HAINAN: 1443 adev->family = AMDGPU_FAMILY_SI; 1444 r = si_set_ip_blocks(adev); 1445 if (r) 1446 return r; 1447 break; 1448 #endif 1449 #ifdef CONFIG_DRM_AMDGPU_CIK 1450 case CHIP_BONAIRE: 1451 case CHIP_HAWAII: 1452 case CHIP_KAVERI: 1453 case CHIP_KABINI: 1454 case CHIP_MULLINS: 1455 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII)) 1456 adev->family = AMDGPU_FAMILY_CI; 1457 else 1458 adev->family = AMDGPU_FAMILY_KV; 1459 1460 r = cik_set_ip_blocks(adev); 1461 if (r) 1462 return r; 1463 break; 1464 #endif 1465 case CHIP_VEGA10: 1466 case CHIP_VEGA12: 1467 case CHIP_VEGA20: 1468 case CHIP_RAVEN: 1469 if (adev->asic_type == CHIP_RAVEN) 1470 adev->family = AMDGPU_FAMILY_RV; 1471 else 1472 adev->family = AMDGPU_FAMILY_AI; 1473 1474 r = soc15_set_ip_blocks(adev); 1475 if (r) 1476 return r; 1477 break; 1478 default: 1479 /* FIXME: not supported yet */ 1480 return -EINVAL; 1481 } 1482 1483 r = amdgpu_device_parse_gpu_info_fw(adev); 1484 if (r) 1485 return r; 1486 1487 amdgpu_amdkfd_device_probe(adev); 1488 1489 if (amdgpu_sriov_vf(adev)) { 1490 r = amdgpu_virt_request_full_gpu(adev, true); 1491 if (r) 1492 return -EAGAIN; 1493 } 1494 1495 adev->powerplay.pp_feature = amdgpu_pp_feature_mask; 1496 1497 for (i = 0; i < adev->num_ip_blocks; i++) { 1498 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 1499 DRM_ERROR("disabled ip block: %d <%s>\n", 1500 i, adev->ip_blocks[i].version->funcs->name); 1501 adev->ip_blocks[i].status.valid = false; 1502 } else { 1503 if (adev->ip_blocks[i].version->funcs->early_init) { 1504 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 1505 if (r == -ENOENT) { 1506 adev->ip_blocks[i].status.valid = false; 1507 } else if (r) { 1508 DRM_ERROR("early_init of IP block <%s> failed %d\n", 1509 adev->ip_blocks[i].version->funcs->name, r); 1510 return r; 1511 } else { 1512 adev->ip_blocks[i].status.valid = true; 1513 } 1514 } else { 1515 adev->ip_blocks[i].status.valid = true; 1516 } 1517 } 1518 } 1519 1520 adev->cg_flags &= amdgpu_cg_mask; 1521 adev->pg_flags &= amdgpu_pg_mask; 1522 1523 return 0; 1524 } 1525 1526 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) 1527 { 1528 int i, r; 1529 1530 for (i = 0; i < adev->num_ip_blocks; i++) { 1531 if (!adev->ip_blocks[i].status.sw) 1532 continue; 1533 if (adev->ip_blocks[i].status.hw) 1534 continue; 1535 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 1536 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 1537 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1538 if (r) { 1539 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1540 adev->ip_blocks[i].version->funcs->name, r); 1541 return r; 1542 } 1543 adev->ip_blocks[i].status.hw = true; 1544 } 1545 } 1546 1547 return 0; 1548 } 1549 1550 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) 1551 { 1552 int i, r; 1553 1554 for (i = 0; i < adev->num_ip_blocks; i++) { 1555 if (!adev->ip_blocks[i].status.sw) 1556 continue; 1557 if (adev->ip_blocks[i].status.hw) 1558 continue; 1559 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1560 if (r) { 1561 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1562 adev->ip_blocks[i].version->funcs->name, r); 1563 return r; 1564 } 1565 adev->ip_blocks[i].status.hw = true; 1566 } 1567 1568 return 0; 1569 } 1570 1571 static int amdgpu_device_fw_loading(struct amdgpu_device *adev) 1572 { 1573 int r = 0; 1574 int i; 1575 1576 if (adev->asic_type >= CHIP_VEGA10) { 1577 for (i = 0; i < adev->num_ip_blocks; i++) { 1578 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 1579 if (adev->in_gpu_reset || adev->in_suspend) { 1580 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) 1581 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */ 1582 r = adev->ip_blocks[i].version->funcs->resume(adev); 1583 if (r) { 1584 DRM_ERROR("resume of IP block <%s> failed %d\n", 1585 adev->ip_blocks[i].version->funcs->name, r); 1586 return r; 1587 } 1588 } else { 1589 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1590 if (r) { 1591 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1592 adev->ip_blocks[i].version->funcs->name, r); 1593 return r; 1594 } 1595 } 1596 adev->ip_blocks[i].status.hw = true; 1597 } 1598 } 1599 } 1600 1601 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) { 1602 r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle); 1603 if (r) { 1604 pr_err("firmware loading failed\n"); 1605 return r; 1606 } 1607 } 1608 1609 return 0; 1610 } 1611 1612 /** 1613 * amdgpu_device_ip_init - run init for hardware IPs 1614 * 1615 * @adev: amdgpu_device pointer 1616 * 1617 * Main initialization pass for hardware IPs. The list of all the hardware 1618 * IPs that make up the asic is walked and the sw_init and hw_init callbacks 1619 * are run. sw_init initializes the software state associated with each IP 1620 * and hw_init initializes the hardware associated with each IP. 1621 * Returns 0 on success, negative error code on failure. 1622 */ 1623 static int amdgpu_device_ip_init(struct amdgpu_device *adev) 1624 { 1625 int i, r; 1626 1627 for (i = 0; i < adev->num_ip_blocks; i++) { 1628 if (!adev->ip_blocks[i].status.valid) 1629 continue; 1630 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 1631 if (r) { 1632 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 1633 adev->ip_blocks[i].version->funcs->name, r); 1634 return r; 1635 } 1636 adev->ip_blocks[i].status.sw = true; 1637 1638 /* need to do gmc hw init early so we can allocate gpu mem */ 1639 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1640 r = amdgpu_device_vram_scratch_init(adev); 1641 if (r) { 1642 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); 1643 return r; 1644 } 1645 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 1646 if (r) { 1647 DRM_ERROR("hw_init %d failed %d\n", i, r); 1648 return r; 1649 } 1650 r = amdgpu_device_wb_init(adev); 1651 if (r) { 1652 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 1653 return r; 1654 } 1655 adev->ip_blocks[i].status.hw = true; 1656 1657 /* right after GMC hw init, we create CSA */ 1658 if (amdgpu_sriov_vf(adev)) { 1659 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, 1660 AMDGPU_GEM_DOMAIN_VRAM, 1661 AMDGPU_CSA_SIZE); 1662 if (r) { 1663 DRM_ERROR("allocate CSA failed %d\n", r); 1664 return r; 1665 } 1666 } 1667 } 1668 } 1669 1670 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 1671 if (r) 1672 return r; 1673 1674 r = amdgpu_device_ip_hw_init_phase1(adev); 1675 if (r) 1676 return r; 1677 1678 r = amdgpu_device_fw_loading(adev); 1679 if (r) 1680 return r; 1681 1682 r = amdgpu_device_ip_hw_init_phase2(adev); 1683 if (r) 1684 return r; 1685 1686 if (adev->gmc.xgmi.num_physical_nodes > 1) 1687 amdgpu_xgmi_add_device(adev); 1688 amdgpu_amdkfd_device_init(adev); 1689 1690 if (amdgpu_sriov_vf(adev)) 1691 amdgpu_virt_release_full_gpu(adev, true); 1692 1693 return 0; 1694 } 1695 1696 /** 1697 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 1698 * 1699 * @adev: amdgpu_device pointer 1700 * 1701 * Writes a reset magic value to the gart pointer in VRAM. The driver calls 1702 * this function before a GPU reset. If the value is retained after a 1703 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 1704 */ 1705 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 1706 { 1707 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 1708 } 1709 1710 /** 1711 * amdgpu_device_check_vram_lost - check if vram is valid 1712 * 1713 * @adev: amdgpu_device pointer 1714 * 1715 * Checks the reset magic value written to the gart pointer in VRAM. 1716 * The driver calls this after a GPU reset to see if the contents of 1717 * VRAM is lost or now. 1718 * returns true if vram is lost, false if not. 1719 */ 1720 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 1721 { 1722 return !!memcmp(adev->gart.ptr, adev->reset_magic, 1723 AMDGPU_RESET_MAGIC_NUM); 1724 } 1725 1726 /** 1727 * amdgpu_device_set_cg_state - set clockgating for amdgpu device 1728 * 1729 * @adev: amdgpu_device pointer 1730 * 1731 * The list of all the hardware IPs that make up the asic is walked and the 1732 * set_clockgating_state callbacks are run. 1733 * Late initialization pass enabling clockgating for hardware IPs. 1734 * Fini or suspend, pass disabling clockgating for hardware IPs. 1735 * Returns 0 on success, negative error code on failure. 1736 */ 1737 1738 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 1739 enum amd_clockgating_state state) 1740 { 1741 int i, j, r; 1742 1743 if (amdgpu_emu_mode == 1) 1744 return 0; 1745 1746 for (j = 0; j < adev->num_ip_blocks; j++) { 1747 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 1748 if (!adev->ip_blocks[i].status.late_initialized) 1749 continue; 1750 /* skip CG for VCE/UVD, it's handled specially */ 1751 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1752 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 1753 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 1754 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 1755 /* enable clockgating to save power */ 1756 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 1757 state); 1758 if (r) { 1759 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 1760 adev->ip_blocks[i].version->funcs->name, r); 1761 return r; 1762 } 1763 } 1764 } 1765 1766 return 0; 1767 } 1768 1769 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state) 1770 { 1771 int i, j, r; 1772 1773 if (amdgpu_emu_mode == 1) 1774 return 0; 1775 1776 for (j = 0; j < adev->num_ip_blocks; j++) { 1777 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 1778 if (!adev->ip_blocks[i].status.late_initialized) 1779 continue; 1780 /* skip CG for VCE/UVD, it's handled specially */ 1781 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1782 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 1783 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 1784 adev->ip_blocks[i].version->funcs->set_powergating_state) { 1785 /* enable powergating to save power */ 1786 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 1787 state); 1788 if (r) { 1789 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 1790 adev->ip_blocks[i].version->funcs->name, r); 1791 return r; 1792 } 1793 } 1794 } 1795 return 0; 1796 } 1797 1798 /** 1799 * amdgpu_device_ip_late_init - run late init for hardware IPs 1800 * 1801 * @adev: amdgpu_device pointer 1802 * 1803 * Late initialization pass for hardware IPs. The list of all the hardware 1804 * IPs that make up the asic is walked and the late_init callbacks are run. 1805 * late_init covers any special initialization that an IP requires 1806 * after all of the have been initialized or something that needs to happen 1807 * late in the init process. 1808 * Returns 0 on success, negative error code on failure. 1809 */ 1810 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 1811 { 1812 int i = 0, r; 1813 1814 for (i = 0; i < adev->num_ip_blocks; i++) { 1815 if (!adev->ip_blocks[i].status.hw) 1816 continue; 1817 if (adev->ip_blocks[i].version->funcs->late_init) { 1818 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 1819 if (r) { 1820 DRM_ERROR("late_init of IP block <%s> failed %d\n", 1821 adev->ip_blocks[i].version->funcs->name, r); 1822 return r; 1823 } 1824 } 1825 adev->ip_blocks[i].status.late_initialized = true; 1826 } 1827 1828 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 1829 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 1830 1831 queue_delayed_work(system_wq, &adev->late_init_work, 1832 msecs_to_jiffies(AMDGPU_RESUME_MS)); 1833 1834 amdgpu_device_fill_reset_magic(adev); 1835 1836 return 0; 1837 } 1838 1839 /** 1840 * amdgpu_device_ip_fini - run fini for hardware IPs 1841 * 1842 * @adev: amdgpu_device pointer 1843 * 1844 * Main teardown pass for hardware IPs. The list of all the hardware 1845 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 1846 * are run. hw_fini tears down the hardware associated with each IP 1847 * and sw_fini tears down any software state associated with each IP. 1848 * Returns 0 on success, negative error code on failure. 1849 */ 1850 static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 1851 { 1852 int i, r; 1853 1854 amdgpu_amdkfd_device_fini(adev); 1855 1856 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 1857 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 1858 1859 /* need to disable SMC first */ 1860 for (i = 0; i < adev->num_ip_blocks; i++) { 1861 if (!adev->ip_blocks[i].status.hw) 1862 continue; 1863 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 1864 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 1865 /* XXX handle errors */ 1866 if (r) { 1867 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 1868 adev->ip_blocks[i].version->funcs->name, r); 1869 } 1870 adev->ip_blocks[i].status.hw = false; 1871 break; 1872 } 1873 } 1874 1875 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1876 if (!adev->ip_blocks[i].status.hw) 1877 continue; 1878 1879 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 1880 /* XXX handle errors */ 1881 if (r) { 1882 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 1883 adev->ip_blocks[i].version->funcs->name, r); 1884 } 1885 1886 adev->ip_blocks[i].status.hw = false; 1887 } 1888 1889 1890 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1891 if (!adev->ip_blocks[i].status.sw) 1892 continue; 1893 1894 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1895 amdgpu_ucode_free_bo(adev); 1896 amdgpu_free_static_csa(&adev->virt.csa_obj); 1897 amdgpu_device_wb_fini(adev); 1898 amdgpu_device_vram_scratch_fini(adev); 1899 } 1900 1901 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 1902 /* XXX handle errors */ 1903 if (r) { 1904 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 1905 adev->ip_blocks[i].version->funcs->name, r); 1906 } 1907 adev->ip_blocks[i].status.sw = false; 1908 adev->ip_blocks[i].status.valid = false; 1909 } 1910 1911 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1912 if (!adev->ip_blocks[i].status.late_initialized) 1913 continue; 1914 if (adev->ip_blocks[i].version->funcs->late_fini) 1915 adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 1916 adev->ip_blocks[i].status.late_initialized = false; 1917 } 1918 1919 if (amdgpu_sriov_vf(adev)) 1920 if (amdgpu_virt_release_full_gpu(adev, false)) 1921 DRM_ERROR("failed to release exclusive mode on fini\n"); 1922 1923 return 0; 1924 } 1925 1926 static int amdgpu_device_enable_mgpu_fan_boost(void) 1927 { 1928 struct amdgpu_gpu_instance *gpu_ins; 1929 struct amdgpu_device *adev; 1930 int i, ret = 0; 1931 1932 mutex_lock(&mgpu_info.mutex); 1933 1934 /* 1935 * MGPU fan boost feature should be enabled 1936 * only when there are two or more dGPUs in 1937 * the system 1938 */ 1939 if (mgpu_info.num_dgpu < 2) 1940 goto out; 1941 1942 for (i = 0; i < mgpu_info.num_dgpu; i++) { 1943 gpu_ins = &(mgpu_info.gpu_ins[i]); 1944 adev = gpu_ins->adev; 1945 if (!(adev->flags & AMD_IS_APU) && 1946 !gpu_ins->mgpu_fan_enabled && 1947 adev->powerplay.pp_funcs && 1948 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) { 1949 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); 1950 if (ret) 1951 break; 1952 1953 gpu_ins->mgpu_fan_enabled = 1; 1954 } 1955 } 1956 1957 out: 1958 mutex_unlock(&mgpu_info.mutex); 1959 1960 return ret; 1961 } 1962 1963 /** 1964 * amdgpu_device_ip_late_init_func_handler - work handler for ib test 1965 * 1966 * @work: work_struct. 1967 */ 1968 static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) 1969 { 1970 struct amdgpu_device *adev = 1971 container_of(work, struct amdgpu_device, late_init_work.work); 1972 int r; 1973 1974 r = amdgpu_ib_ring_tests(adev); 1975 if (r) 1976 DRM_ERROR("ib ring test failed (%d).\n", r); 1977 1978 r = amdgpu_device_enable_mgpu_fan_boost(); 1979 if (r) 1980 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 1981 } 1982 1983 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 1984 { 1985 struct amdgpu_device *adev = 1986 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 1987 1988 mutex_lock(&adev->gfx.gfx_off_mutex); 1989 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { 1990 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 1991 adev->gfx.gfx_off_state = true; 1992 } 1993 mutex_unlock(&adev->gfx.gfx_off_mutex); 1994 } 1995 1996 /** 1997 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 1998 * 1999 * @adev: amdgpu_device pointer 2000 * 2001 * Main suspend function for hardware IPs. The list of all the hardware 2002 * IPs that make up the asic is walked, clockgating is disabled and the 2003 * suspend callbacks are run. suspend puts the hardware and software state 2004 * in each IP into a state suitable for suspend. 2005 * Returns 0 on success, negative error code on failure. 2006 */ 2007 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 2008 { 2009 int i, r; 2010 2011 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2012 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2013 2014 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2015 if (!adev->ip_blocks[i].status.valid) 2016 continue; 2017 /* displays are handled separately */ 2018 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { 2019 /* XXX handle errors */ 2020 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2021 /* XXX handle errors */ 2022 if (r) { 2023 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2024 adev->ip_blocks[i].version->funcs->name, r); 2025 } 2026 } 2027 } 2028 2029 return 0; 2030 } 2031 2032 /** 2033 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 2034 * 2035 * @adev: amdgpu_device pointer 2036 * 2037 * Main suspend function for hardware IPs. The list of all the hardware 2038 * IPs that make up the asic is walked, clockgating is disabled and the 2039 * suspend callbacks are run. suspend puts the hardware and software state 2040 * in each IP into a state suitable for suspend. 2041 * Returns 0 on success, negative error code on failure. 2042 */ 2043 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 2044 { 2045 int i, r; 2046 2047 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2048 if (!adev->ip_blocks[i].status.valid) 2049 continue; 2050 /* displays are handled in phase1 */ 2051 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 2052 continue; 2053 /* XXX handle errors */ 2054 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2055 /* XXX handle errors */ 2056 if (r) { 2057 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2058 adev->ip_blocks[i].version->funcs->name, r); 2059 } 2060 } 2061 2062 return 0; 2063 } 2064 2065 /** 2066 * amdgpu_device_ip_suspend - run suspend for hardware IPs 2067 * 2068 * @adev: amdgpu_device pointer 2069 * 2070 * Main suspend function for hardware IPs. The list of all the hardware 2071 * IPs that make up the asic is walked, clockgating is disabled and the 2072 * suspend callbacks are run. suspend puts the hardware and software state 2073 * in each IP into a state suitable for suspend. 2074 * Returns 0 on success, negative error code on failure. 2075 */ 2076 int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 2077 { 2078 int r; 2079 2080 if (amdgpu_sriov_vf(adev)) 2081 amdgpu_virt_request_full_gpu(adev, false); 2082 2083 r = amdgpu_device_ip_suspend_phase1(adev); 2084 if (r) 2085 return r; 2086 r = amdgpu_device_ip_suspend_phase2(adev); 2087 2088 if (amdgpu_sriov_vf(adev)) 2089 amdgpu_virt_release_full_gpu(adev, false); 2090 2091 return r; 2092 } 2093 2094 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 2095 { 2096 int i, r; 2097 2098 static enum amd_ip_block_type ip_order[] = { 2099 AMD_IP_BLOCK_TYPE_GMC, 2100 AMD_IP_BLOCK_TYPE_COMMON, 2101 AMD_IP_BLOCK_TYPE_PSP, 2102 AMD_IP_BLOCK_TYPE_IH, 2103 }; 2104 2105 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2106 int j; 2107 struct amdgpu_ip_block *block; 2108 2109 for (j = 0; j < adev->num_ip_blocks; j++) { 2110 block = &adev->ip_blocks[j]; 2111 2112 if (block->version->type != ip_order[i] || 2113 !block->status.valid) 2114 continue; 2115 2116 r = block->version->funcs->hw_init(adev); 2117 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2118 if (r) 2119 return r; 2120 } 2121 } 2122 2123 return 0; 2124 } 2125 2126 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 2127 { 2128 int i, r; 2129 2130 static enum amd_ip_block_type ip_order[] = { 2131 AMD_IP_BLOCK_TYPE_SMC, 2132 AMD_IP_BLOCK_TYPE_DCE, 2133 AMD_IP_BLOCK_TYPE_GFX, 2134 AMD_IP_BLOCK_TYPE_SDMA, 2135 AMD_IP_BLOCK_TYPE_UVD, 2136 AMD_IP_BLOCK_TYPE_VCE 2137 }; 2138 2139 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2140 int j; 2141 struct amdgpu_ip_block *block; 2142 2143 for (j = 0; j < adev->num_ip_blocks; j++) { 2144 block = &adev->ip_blocks[j]; 2145 2146 if (block->version->type != ip_order[i] || 2147 !block->status.valid) 2148 continue; 2149 2150 r = block->version->funcs->hw_init(adev); 2151 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2152 if (r) 2153 return r; 2154 } 2155 } 2156 2157 return 0; 2158 } 2159 2160 /** 2161 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 2162 * 2163 * @adev: amdgpu_device pointer 2164 * 2165 * First resume function for hardware IPs. The list of all the hardware 2166 * IPs that make up the asic is walked and the resume callbacks are run for 2167 * COMMON, GMC, and IH. resume puts the hardware into a functional state 2168 * after a suspend and updates the software state as necessary. This 2169 * function is also used for restoring the GPU after a GPU reset. 2170 * Returns 0 on success, negative error code on failure. 2171 */ 2172 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 2173 { 2174 int i, r; 2175 2176 for (i = 0; i < adev->num_ip_blocks; i++) { 2177 if (!adev->ip_blocks[i].status.valid) 2178 continue; 2179 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2180 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2181 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2182 r = adev->ip_blocks[i].version->funcs->resume(adev); 2183 if (r) { 2184 DRM_ERROR("resume of IP block <%s> failed %d\n", 2185 adev->ip_blocks[i].version->funcs->name, r); 2186 return r; 2187 } 2188 } 2189 } 2190 2191 return 0; 2192 } 2193 2194 /** 2195 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 2196 * 2197 * @adev: amdgpu_device pointer 2198 * 2199 * First resume function for hardware IPs. The list of all the hardware 2200 * IPs that make up the asic is walked and the resume callbacks are run for 2201 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 2202 * functional state after a suspend and updates the software state as 2203 * necessary. This function is also used for restoring the GPU after a GPU 2204 * reset. 2205 * Returns 0 on success, negative error code on failure. 2206 */ 2207 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 2208 { 2209 int i, r; 2210 2211 for (i = 0; i < adev->num_ip_blocks; i++) { 2212 if (!adev->ip_blocks[i].status.valid) 2213 continue; 2214 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2215 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2216 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 2217 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 2218 continue; 2219 r = adev->ip_blocks[i].version->funcs->resume(adev); 2220 if (r) { 2221 DRM_ERROR("resume of IP block <%s> failed %d\n", 2222 adev->ip_blocks[i].version->funcs->name, r); 2223 return r; 2224 } 2225 } 2226 2227 return 0; 2228 } 2229 2230 /** 2231 * amdgpu_device_ip_resume - run resume for hardware IPs 2232 * 2233 * @adev: amdgpu_device pointer 2234 * 2235 * Main resume function for hardware IPs. The hardware IPs 2236 * are split into two resume functions because they are 2237 * are also used in in recovering from a GPU reset and some additional 2238 * steps need to be take between them. In this case (S3/S4) they are 2239 * run sequentially. 2240 * Returns 0 on success, negative error code on failure. 2241 */ 2242 static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 2243 { 2244 int r; 2245 2246 r = amdgpu_device_ip_resume_phase1(adev); 2247 if (r) 2248 return r; 2249 2250 r = amdgpu_device_fw_loading(adev); 2251 if (r) 2252 return r; 2253 2254 r = amdgpu_device_ip_resume_phase2(adev); 2255 2256 return r; 2257 } 2258 2259 /** 2260 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 2261 * 2262 * @adev: amdgpu_device pointer 2263 * 2264 * Query the VBIOS data tables to determine if the board supports SR-IOV. 2265 */ 2266 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 2267 { 2268 if (amdgpu_sriov_vf(adev)) { 2269 if (adev->is_atom_fw) { 2270 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) 2271 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2272 } else { 2273 if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 2274 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2275 } 2276 2277 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 2278 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 2279 } 2280 } 2281 2282 /** 2283 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 2284 * 2285 * @asic_type: AMD asic type 2286 * 2287 * Check if there is DC (new modesetting infrastructre) support for an asic. 2288 * returns true if DC has support, false if not. 2289 */ 2290 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 2291 { 2292 switch (asic_type) { 2293 #if defined(CONFIG_DRM_AMD_DC) 2294 case CHIP_BONAIRE: 2295 case CHIP_KAVERI: 2296 case CHIP_KABINI: 2297 case CHIP_MULLINS: 2298 /* 2299 * We have systems in the wild with these ASICs that require 2300 * LVDS and VGA support which is not supported with DC. 2301 * 2302 * Fallback to the non-DC driver here by default so as not to 2303 * cause regressions. 2304 */ 2305 return amdgpu_dc > 0; 2306 case CHIP_HAWAII: 2307 case CHIP_CARRIZO: 2308 case CHIP_STONEY: 2309 case CHIP_POLARIS10: 2310 case CHIP_POLARIS11: 2311 case CHIP_POLARIS12: 2312 case CHIP_VEGAM: 2313 case CHIP_TONGA: 2314 case CHIP_FIJI: 2315 case CHIP_VEGA10: 2316 case CHIP_VEGA12: 2317 case CHIP_VEGA20: 2318 #if defined(CONFIG_DRM_AMD_DC_DCN1_0) 2319 case CHIP_RAVEN: 2320 #endif 2321 return amdgpu_dc != 0; 2322 #endif 2323 default: 2324 return false; 2325 } 2326 } 2327 2328 /** 2329 * amdgpu_device_has_dc_support - check if dc is supported 2330 * 2331 * @adev: amdgpu_device_pointer 2332 * 2333 * Returns true for supported, false for not supported 2334 */ 2335 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 2336 { 2337 if (amdgpu_sriov_vf(adev)) 2338 return false; 2339 2340 return amdgpu_device_asic_has_dc_support(adev->asic_type); 2341 } 2342 2343 /** 2344 * amdgpu_device_init - initialize the driver 2345 * 2346 * @adev: amdgpu_device pointer 2347 * @ddev: drm dev pointer 2348 * @pdev: pci dev pointer 2349 * @flags: driver flags 2350 * 2351 * Initializes the driver info and hw (all asics). 2352 * Returns 0 for success or an error on failure. 2353 * Called at driver startup. 2354 */ 2355 int amdgpu_device_init(struct amdgpu_device *adev, 2356 struct drm_device *ddev, 2357 struct pci_dev *pdev, 2358 uint32_t flags) 2359 { 2360 int r, i; 2361 bool runtime = false; 2362 u32 max_MBps; 2363 2364 adev->shutdown = false; 2365 adev->dev = &pdev->dev; 2366 adev->ddev = ddev; 2367 adev->pdev = pdev; 2368 adev->flags = flags; 2369 adev->asic_type = flags & AMD_ASIC_MASK; 2370 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 2371 if (amdgpu_emu_mode == 1) 2372 adev->usec_timeout *= 2; 2373 adev->gmc.gart_size = 512 * 1024 * 1024; 2374 adev->accel_working = false; 2375 adev->num_rings = 0; 2376 adev->mman.buffer_funcs = NULL; 2377 adev->mman.buffer_funcs_ring = NULL; 2378 adev->vm_manager.vm_pte_funcs = NULL; 2379 adev->vm_manager.vm_pte_num_rqs = 0; 2380 adev->gmc.gmc_funcs = NULL; 2381 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2382 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 2383 2384 adev->smc_rreg = &amdgpu_invalid_rreg; 2385 adev->smc_wreg = &amdgpu_invalid_wreg; 2386 adev->pcie_rreg = &amdgpu_invalid_rreg; 2387 adev->pcie_wreg = &amdgpu_invalid_wreg; 2388 adev->pciep_rreg = &amdgpu_invalid_rreg; 2389 adev->pciep_wreg = &amdgpu_invalid_wreg; 2390 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 2391 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 2392 adev->didt_rreg = &amdgpu_invalid_rreg; 2393 adev->didt_wreg = &amdgpu_invalid_wreg; 2394 adev->gc_cac_rreg = &amdgpu_invalid_rreg; 2395 adev->gc_cac_wreg = &amdgpu_invalid_wreg; 2396 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 2397 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 2398 2399 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 2400 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 2401 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 2402 2403 /* mutex initialization are all done here so we 2404 * can recall function without having locking issues */ 2405 atomic_set(&adev->irq.ih.lock, 0); 2406 mutex_init(&adev->firmware.mutex); 2407 mutex_init(&adev->pm.mutex); 2408 mutex_init(&adev->gfx.gpu_clock_mutex); 2409 mutex_init(&adev->srbm_mutex); 2410 mutex_init(&adev->gfx.pipe_reserve_mutex); 2411 mutex_init(&adev->gfx.gfx_off_mutex); 2412 mutex_init(&adev->grbm_idx_mutex); 2413 mutex_init(&adev->mn_lock); 2414 mutex_init(&adev->virt.vf_errors.lock); 2415 hash_init(adev->mn_hash); 2416 mutex_init(&adev->lock_reset); 2417 2418 amdgpu_device_check_arguments(adev); 2419 2420 spin_lock_init(&adev->mmio_idx_lock); 2421 spin_lock_init(&adev->smc_idx_lock); 2422 spin_lock_init(&adev->pcie_idx_lock); 2423 spin_lock_init(&adev->uvd_ctx_idx_lock); 2424 spin_lock_init(&adev->didt_idx_lock); 2425 spin_lock_init(&adev->gc_cac_idx_lock); 2426 spin_lock_init(&adev->se_cac_idx_lock); 2427 spin_lock_init(&adev->audio_endpt_idx_lock); 2428 spin_lock_init(&adev->mm_stats.lock); 2429 2430 INIT_LIST_HEAD(&adev->shadow_list); 2431 mutex_init(&adev->shadow_list_lock); 2432 2433 INIT_LIST_HEAD(&adev->ring_lru_list); 2434 spin_lock_init(&adev->ring_lru_list_lock); 2435 2436 INIT_DELAYED_WORK(&adev->late_init_work, 2437 amdgpu_device_ip_late_init_func_handler); 2438 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 2439 amdgpu_device_delay_enable_gfx_off); 2440 2441 adev->gfx.gfx_off_req_count = 1; 2442 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; 2443 2444 /* Registers mapping */ 2445 /* TODO: block userspace mapping of io register */ 2446 if (adev->asic_type >= CHIP_BONAIRE) { 2447 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 2448 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 2449 } else { 2450 adev->rmmio_base = pci_resource_start(adev->pdev, 2); 2451 adev->rmmio_size = pci_resource_len(adev->pdev, 2); 2452 } 2453 2454 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 2455 if (adev->rmmio == NULL) { 2456 return -ENOMEM; 2457 } 2458 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 2459 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); 2460 2461 /* doorbell bar mapping */ 2462 amdgpu_device_doorbell_init(adev); 2463 2464 /* io port mapping */ 2465 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 2466 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) { 2467 adev->rio_mem_size = pci_resource_len(adev->pdev, i); 2468 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size); 2469 break; 2470 } 2471 } 2472 if (adev->rio_mem == NULL) 2473 DRM_INFO("PCI I/O BAR is not found.\n"); 2474 2475 amdgpu_device_get_pcie_info(adev); 2476 2477 /* early init functions */ 2478 r = amdgpu_device_ip_early_init(adev); 2479 if (r) 2480 return r; 2481 2482 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 2483 /* this will fail for cards that aren't VGA class devices, just 2484 * ignore it */ 2485 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); 2486 2487 if (amdgpu_device_is_px(ddev)) 2488 runtime = true; 2489 if (!pci_is_thunderbolt_attached(adev->pdev)) 2490 vga_switcheroo_register_client(adev->pdev, 2491 &amdgpu_switcheroo_ops, runtime); 2492 if (runtime) 2493 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 2494 2495 if (amdgpu_emu_mode == 1) { 2496 /* post the asic on emulation mode */ 2497 emu_soc_asic_init(adev); 2498 goto fence_driver_init; 2499 } 2500 2501 /* Read BIOS */ 2502 if (!amdgpu_get_bios(adev)) { 2503 r = -EINVAL; 2504 goto failed; 2505 } 2506 2507 r = amdgpu_atombios_init(adev); 2508 if (r) { 2509 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 2510 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 2511 goto failed; 2512 } 2513 2514 /* detect if we are with an SRIOV vbios */ 2515 amdgpu_device_detect_sriov_bios(adev); 2516 2517 /* Post card if necessary */ 2518 if (amdgpu_device_need_post(adev)) { 2519 if (!adev->bios) { 2520 dev_err(adev->dev, "no vBIOS found\n"); 2521 r = -EINVAL; 2522 goto failed; 2523 } 2524 DRM_INFO("GPU posting now...\n"); 2525 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 2526 if (r) { 2527 dev_err(adev->dev, "gpu post error!\n"); 2528 goto failed; 2529 } 2530 } 2531 2532 if (adev->is_atom_fw) { 2533 /* Initialize clocks */ 2534 r = amdgpu_atomfirmware_get_clock_info(adev); 2535 if (r) { 2536 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 2537 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 2538 goto failed; 2539 } 2540 } else { 2541 /* Initialize clocks */ 2542 r = amdgpu_atombios_get_clock_info(adev); 2543 if (r) { 2544 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 2545 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 2546 goto failed; 2547 } 2548 /* init i2c buses */ 2549 if (!amdgpu_device_has_dc_support(adev)) 2550 amdgpu_atombios_i2c_init(adev); 2551 } 2552 2553 fence_driver_init: 2554 /* Fence driver */ 2555 r = amdgpu_fence_driver_init(adev); 2556 if (r) { 2557 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); 2558 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 2559 goto failed; 2560 } 2561 2562 /* init the mode config */ 2563 drm_mode_config_init(adev->ddev); 2564 2565 r = amdgpu_device_ip_init(adev); 2566 if (r) { 2567 /* failed in exclusive mode due to timeout */ 2568 if (amdgpu_sriov_vf(adev) && 2569 !amdgpu_sriov_runtime(adev) && 2570 amdgpu_virt_mmio_blocked(adev) && 2571 !amdgpu_virt_wait_reset(adev)) { 2572 dev_err(adev->dev, "VF exclusive mode timeout\n"); 2573 /* Don't send request since VF is inactive. */ 2574 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 2575 adev->virt.ops = NULL; 2576 r = -EAGAIN; 2577 goto failed; 2578 } 2579 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 2580 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 2581 goto failed; 2582 } 2583 2584 adev->accel_working = true; 2585 2586 amdgpu_vm_check_compute_bug(adev); 2587 2588 /* Initialize the buffer migration limit. */ 2589 if (amdgpu_moverate >= 0) 2590 max_MBps = amdgpu_moverate; 2591 else 2592 max_MBps = 8; /* Allow 8 MB/s. */ 2593 /* Get a log2 for easy divisions. */ 2594 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 2595 2596 r = amdgpu_ib_pool_init(adev); 2597 if (r) { 2598 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2599 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2600 goto failed; 2601 } 2602 2603 if (amdgpu_sriov_vf(adev)) 2604 amdgpu_virt_init_data_exchange(adev); 2605 2606 amdgpu_fbdev_init(adev); 2607 2608 r = amdgpu_pm_sysfs_init(adev); 2609 if (r) 2610 DRM_ERROR("registering pm debugfs failed (%d).\n", r); 2611 2612 r = amdgpu_debugfs_gem_init(adev); 2613 if (r) 2614 DRM_ERROR("registering gem debugfs failed (%d).\n", r); 2615 2616 r = amdgpu_debugfs_regs_init(adev); 2617 if (r) 2618 DRM_ERROR("registering register debugfs failed (%d).\n", r); 2619 2620 r = amdgpu_debugfs_firmware_init(adev); 2621 if (r) 2622 DRM_ERROR("registering firmware debugfs failed (%d).\n", r); 2623 2624 r = amdgpu_debugfs_init(adev); 2625 if (r) 2626 DRM_ERROR("Creating debugfs files failed (%d).\n", r); 2627 2628 if ((amdgpu_testing & 1)) { 2629 if (adev->accel_working) 2630 amdgpu_test_moves(adev); 2631 else 2632 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); 2633 } 2634 if (amdgpu_benchmarking) { 2635 if (adev->accel_working) 2636 amdgpu_benchmark(adev, amdgpu_benchmarking); 2637 else 2638 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); 2639 } 2640 2641 /* enable clockgating, etc. after ib tests, etc. since some blocks require 2642 * explicit gating rather than handling it automatically. 2643 */ 2644 r = amdgpu_device_ip_late_init(adev); 2645 if (r) { 2646 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 2647 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 2648 goto failed; 2649 } 2650 2651 return 0; 2652 2653 failed: 2654 amdgpu_vf_error_trans_all(adev); 2655 if (runtime) 2656 vga_switcheroo_fini_domain_pm_ops(adev->dev); 2657 2658 return r; 2659 } 2660 2661 /** 2662 * amdgpu_device_fini - tear down the driver 2663 * 2664 * @adev: amdgpu_device pointer 2665 * 2666 * Tear down the driver info (all asics). 2667 * Called at driver shutdown. 2668 */ 2669 void amdgpu_device_fini(struct amdgpu_device *adev) 2670 { 2671 int r; 2672 2673 DRM_INFO("amdgpu: finishing device.\n"); 2674 adev->shutdown = true; 2675 /* disable all interrupts */ 2676 amdgpu_irq_disable_all(adev); 2677 if (adev->mode_info.mode_config_initialized){ 2678 if (!amdgpu_device_has_dc_support(adev)) 2679 drm_crtc_force_disable_all(adev->ddev); 2680 else 2681 drm_atomic_helper_shutdown(adev->ddev); 2682 } 2683 amdgpu_ib_pool_fini(adev); 2684 amdgpu_fence_driver_fini(adev); 2685 amdgpu_pm_sysfs_fini(adev); 2686 amdgpu_fbdev_fini(adev); 2687 r = amdgpu_device_ip_fini(adev); 2688 if (adev->firmware.gpu_info_fw) { 2689 release_firmware(adev->firmware.gpu_info_fw); 2690 adev->firmware.gpu_info_fw = NULL; 2691 } 2692 adev->accel_working = false; 2693 cancel_delayed_work_sync(&adev->late_init_work); 2694 /* free i2c buses */ 2695 if (!amdgpu_device_has_dc_support(adev)) 2696 amdgpu_i2c_fini(adev); 2697 2698 if (amdgpu_emu_mode != 1) 2699 amdgpu_atombios_fini(adev); 2700 2701 kfree(adev->bios); 2702 adev->bios = NULL; 2703 if (!pci_is_thunderbolt_attached(adev->pdev)) 2704 vga_switcheroo_unregister_client(adev->pdev); 2705 if (adev->flags & AMD_IS_PX) 2706 vga_switcheroo_fini_domain_pm_ops(adev->dev); 2707 vga_client_register(adev->pdev, NULL, NULL, NULL); 2708 if (adev->rio_mem) 2709 pci_iounmap(adev->pdev, adev->rio_mem); 2710 adev->rio_mem = NULL; 2711 iounmap(adev->rmmio); 2712 adev->rmmio = NULL; 2713 amdgpu_device_doorbell_fini(adev); 2714 amdgpu_debugfs_regs_cleanup(adev); 2715 } 2716 2717 2718 /* 2719 * Suspend & resume. 2720 */ 2721 /** 2722 * amdgpu_device_suspend - initiate device suspend 2723 * 2724 * @dev: drm dev pointer 2725 * @suspend: suspend state 2726 * @fbcon : notify the fbdev of suspend 2727 * 2728 * Puts the hw in the suspend state (all asics). 2729 * Returns 0 for success or an error on failure. 2730 * Called at driver suspend. 2731 */ 2732 int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) 2733 { 2734 struct amdgpu_device *adev; 2735 struct drm_crtc *crtc; 2736 struct drm_connector *connector; 2737 int r; 2738 2739 if (dev == NULL || dev->dev_private == NULL) { 2740 return -ENODEV; 2741 } 2742 2743 adev = dev->dev_private; 2744 2745 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 2746 return 0; 2747 2748 adev->in_suspend = true; 2749 drm_kms_helper_poll_disable(dev); 2750 2751 if (fbcon) 2752 amdgpu_fbdev_set_suspend(adev, 1); 2753 2754 cancel_delayed_work_sync(&adev->late_init_work); 2755 2756 if (!amdgpu_device_has_dc_support(adev)) { 2757 /* turn off display hw */ 2758 drm_modeset_lock_all(dev); 2759 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 2760 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); 2761 } 2762 drm_modeset_unlock_all(dev); 2763 /* unpin the front buffers and cursors */ 2764 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 2765 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2766 struct drm_framebuffer *fb = crtc->primary->fb; 2767 struct amdgpu_bo *robj; 2768 2769 if (amdgpu_crtc->cursor_bo) { 2770 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2771 r = amdgpu_bo_reserve(aobj, true); 2772 if (r == 0) { 2773 amdgpu_bo_unpin(aobj); 2774 amdgpu_bo_unreserve(aobj); 2775 } 2776 } 2777 2778 if (fb == NULL || fb->obj[0] == NULL) { 2779 continue; 2780 } 2781 robj = gem_to_amdgpu_bo(fb->obj[0]); 2782 /* don't unpin kernel fb objects */ 2783 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { 2784 r = amdgpu_bo_reserve(robj, true); 2785 if (r == 0) { 2786 amdgpu_bo_unpin(robj); 2787 amdgpu_bo_unreserve(robj); 2788 } 2789 } 2790 } 2791 } 2792 2793 amdgpu_amdkfd_suspend(adev); 2794 2795 r = amdgpu_device_ip_suspend_phase1(adev); 2796 2797 /* evict vram memory */ 2798 amdgpu_bo_evict_vram(adev); 2799 2800 amdgpu_fence_driver_suspend(adev); 2801 2802 r = amdgpu_device_ip_suspend_phase2(adev); 2803 2804 /* evict remaining vram memory 2805 * This second call to evict vram is to evict the gart page table 2806 * using the CPU. 2807 */ 2808 amdgpu_bo_evict_vram(adev); 2809 2810 pci_save_state(dev->pdev); 2811 if (suspend) { 2812 /* Shut down the device */ 2813 pci_disable_device(dev->pdev); 2814 pci_set_power_state(dev->pdev, PCI_D3hot); 2815 } else { 2816 r = amdgpu_asic_reset(adev); 2817 if (r) 2818 DRM_ERROR("amdgpu asic reset failed\n"); 2819 } 2820 2821 return 0; 2822 } 2823 2824 /** 2825 * amdgpu_device_resume - initiate device resume 2826 * 2827 * @dev: drm dev pointer 2828 * @resume: resume state 2829 * @fbcon : notify the fbdev of resume 2830 * 2831 * Bring the hw back to operating state (all asics). 2832 * Returns 0 for success or an error on failure. 2833 * Called at driver resume. 2834 */ 2835 int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) 2836 { 2837 struct drm_connector *connector; 2838 struct amdgpu_device *adev = dev->dev_private; 2839 struct drm_crtc *crtc; 2840 int r = 0; 2841 2842 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 2843 return 0; 2844 2845 if (resume) { 2846 pci_set_power_state(dev->pdev, PCI_D0); 2847 pci_restore_state(dev->pdev); 2848 r = pci_enable_device(dev->pdev); 2849 if (r) 2850 return r; 2851 } 2852 2853 /* post card */ 2854 if (amdgpu_device_need_post(adev)) { 2855 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 2856 if (r) 2857 DRM_ERROR("amdgpu asic init failed\n"); 2858 } 2859 2860 r = amdgpu_device_ip_resume(adev); 2861 if (r) { 2862 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r); 2863 return r; 2864 } 2865 amdgpu_fence_driver_resume(adev); 2866 2867 2868 r = amdgpu_device_ip_late_init(adev); 2869 if (r) 2870 return r; 2871 2872 if (!amdgpu_device_has_dc_support(adev)) { 2873 /* pin cursors */ 2874 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 2875 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2876 2877 if (amdgpu_crtc->cursor_bo) { 2878 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2879 r = amdgpu_bo_reserve(aobj, true); 2880 if (r == 0) { 2881 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 2882 if (r != 0) 2883 DRM_ERROR("Failed to pin cursor BO (%d)\n", r); 2884 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); 2885 amdgpu_bo_unreserve(aobj); 2886 } 2887 } 2888 } 2889 } 2890 r = amdgpu_amdkfd_resume(adev); 2891 if (r) 2892 return r; 2893 2894 /* Make sure IB tests flushed */ 2895 flush_delayed_work(&adev->late_init_work); 2896 2897 /* blat the mode back in */ 2898 if (fbcon) { 2899 if (!amdgpu_device_has_dc_support(adev)) { 2900 /* pre DCE11 */ 2901 drm_helper_resume_force_mode(dev); 2902 2903 /* turn on display hw */ 2904 drm_modeset_lock_all(dev); 2905 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 2906 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); 2907 } 2908 drm_modeset_unlock_all(dev); 2909 } 2910 amdgpu_fbdev_set_suspend(adev, 0); 2911 } 2912 2913 drm_kms_helper_poll_enable(dev); 2914 2915 /* 2916 * Most of the connector probing functions try to acquire runtime pm 2917 * refs to ensure that the GPU is powered on when connector polling is 2918 * performed. Since we're calling this from a runtime PM callback, 2919 * trying to acquire rpm refs will cause us to deadlock. 2920 * 2921 * Since we're guaranteed to be holding the rpm lock, it's safe to 2922 * temporarily disable the rpm helpers so this doesn't deadlock us. 2923 */ 2924 #ifdef CONFIG_PM 2925 dev->dev->power.disable_depth++; 2926 #endif 2927 if (!amdgpu_device_has_dc_support(adev)) 2928 drm_helper_hpd_irq_event(dev); 2929 else 2930 drm_kms_helper_hotplug_event(dev); 2931 #ifdef CONFIG_PM 2932 dev->dev->power.disable_depth--; 2933 #endif 2934 adev->in_suspend = false; 2935 2936 return 0; 2937 } 2938 2939 /** 2940 * amdgpu_device_ip_check_soft_reset - did soft reset succeed 2941 * 2942 * @adev: amdgpu_device pointer 2943 * 2944 * The list of all the hardware IPs that make up the asic is walked and 2945 * the check_soft_reset callbacks are run. check_soft_reset determines 2946 * if the asic is still hung or not. 2947 * Returns true if any of the IPs are still in a hung state, false if not. 2948 */ 2949 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 2950 { 2951 int i; 2952 bool asic_hang = false; 2953 2954 if (amdgpu_sriov_vf(adev)) 2955 return true; 2956 2957 if (amdgpu_asic_need_full_reset(adev)) 2958 return true; 2959 2960 for (i = 0; i < adev->num_ip_blocks; i++) { 2961 if (!adev->ip_blocks[i].status.valid) 2962 continue; 2963 if (adev->ip_blocks[i].version->funcs->check_soft_reset) 2964 adev->ip_blocks[i].status.hang = 2965 adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 2966 if (adev->ip_blocks[i].status.hang) { 2967 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 2968 asic_hang = true; 2969 } 2970 } 2971 return asic_hang; 2972 } 2973 2974 /** 2975 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 2976 * 2977 * @adev: amdgpu_device pointer 2978 * 2979 * The list of all the hardware IPs that make up the asic is walked and the 2980 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 2981 * handles any IP specific hardware or software state changes that are 2982 * necessary for a soft reset to succeed. 2983 * Returns 0 on success, negative error code on failure. 2984 */ 2985 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 2986 { 2987 int i, r = 0; 2988 2989 for (i = 0; i < adev->num_ip_blocks; i++) { 2990 if (!adev->ip_blocks[i].status.valid) 2991 continue; 2992 if (adev->ip_blocks[i].status.hang && 2993 adev->ip_blocks[i].version->funcs->pre_soft_reset) { 2994 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 2995 if (r) 2996 return r; 2997 } 2998 } 2999 3000 return 0; 3001 } 3002 3003 /** 3004 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 3005 * 3006 * @adev: amdgpu_device pointer 3007 * 3008 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 3009 * reset is necessary to recover. 3010 * Returns true if a full asic reset is required, false if not. 3011 */ 3012 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 3013 { 3014 int i; 3015 3016 if (amdgpu_asic_need_full_reset(adev)) 3017 return true; 3018 3019 for (i = 0; i < adev->num_ip_blocks; i++) { 3020 if (!adev->ip_blocks[i].status.valid) 3021 continue; 3022 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 3023 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 3024 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 3025 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 3026 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 3027 if (adev->ip_blocks[i].status.hang) { 3028 DRM_INFO("Some block need full reset!\n"); 3029 return true; 3030 } 3031 } 3032 } 3033 return false; 3034 } 3035 3036 /** 3037 * amdgpu_device_ip_soft_reset - do a soft reset 3038 * 3039 * @adev: amdgpu_device pointer 3040 * 3041 * The list of all the hardware IPs that make up the asic is walked and the 3042 * soft_reset callbacks are run if the block is hung. soft_reset handles any 3043 * IP specific hardware or software state changes that are necessary to soft 3044 * reset the IP. 3045 * Returns 0 on success, negative error code on failure. 3046 */ 3047 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 3048 { 3049 int i, r = 0; 3050 3051 for (i = 0; i < adev->num_ip_blocks; i++) { 3052 if (!adev->ip_blocks[i].status.valid) 3053 continue; 3054 if (adev->ip_blocks[i].status.hang && 3055 adev->ip_blocks[i].version->funcs->soft_reset) { 3056 r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 3057 if (r) 3058 return r; 3059 } 3060 } 3061 3062 return 0; 3063 } 3064 3065 /** 3066 * amdgpu_device_ip_post_soft_reset - clean up from soft reset 3067 * 3068 * @adev: amdgpu_device pointer 3069 * 3070 * The list of all the hardware IPs that make up the asic is walked and the 3071 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 3072 * handles any IP specific hardware or software state changes that are 3073 * necessary after the IP has been soft reset. 3074 * Returns 0 on success, negative error code on failure. 3075 */ 3076 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 3077 { 3078 int i, r = 0; 3079 3080 for (i = 0; i < adev->num_ip_blocks; i++) { 3081 if (!adev->ip_blocks[i].status.valid) 3082 continue; 3083 if (adev->ip_blocks[i].status.hang && 3084 adev->ip_blocks[i].version->funcs->post_soft_reset) 3085 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 3086 if (r) 3087 return r; 3088 } 3089 3090 return 0; 3091 } 3092 3093 /** 3094 * amdgpu_device_recover_vram - Recover some VRAM contents 3095 * 3096 * @adev: amdgpu_device pointer 3097 * 3098 * Restores the contents of VRAM buffers from the shadows in GTT. Used to 3099 * restore things like GPUVM page tables after a GPU reset where 3100 * the contents of VRAM might be lost. 3101 * 3102 * Returns: 3103 * 0 on success, negative error code on failure. 3104 */ 3105 static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 3106 { 3107 struct dma_fence *fence = NULL, *next = NULL; 3108 struct amdgpu_bo *shadow; 3109 long r = 1, tmo; 3110 3111 if (amdgpu_sriov_runtime(adev)) 3112 tmo = msecs_to_jiffies(8000); 3113 else 3114 tmo = msecs_to_jiffies(100); 3115 3116 DRM_INFO("recover vram bo from shadow start\n"); 3117 mutex_lock(&adev->shadow_list_lock); 3118 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) { 3119 3120 /* No need to recover an evicted BO */ 3121 if (shadow->tbo.mem.mem_type != TTM_PL_TT || 3122 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) 3123 continue; 3124 3125 r = amdgpu_bo_restore_shadow(shadow, &next); 3126 if (r) 3127 break; 3128 3129 if (fence) { 3130 r = dma_fence_wait_timeout(fence, false, tmo); 3131 dma_fence_put(fence); 3132 fence = next; 3133 if (r <= 0) 3134 break; 3135 } else { 3136 fence = next; 3137 } 3138 } 3139 mutex_unlock(&adev->shadow_list_lock); 3140 3141 if (fence) 3142 tmo = dma_fence_wait_timeout(fence, false, tmo); 3143 dma_fence_put(fence); 3144 3145 if (r <= 0 || tmo <= 0) { 3146 DRM_ERROR("recover vram bo from shadow failed\n"); 3147 return -EIO; 3148 } 3149 3150 DRM_INFO("recover vram bo from shadow done\n"); 3151 return 0; 3152 } 3153 3154 /** 3155 * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough 3156 * 3157 * @adev: amdgpu device pointer 3158 * 3159 * attempt to do soft-reset or full-reset and reinitialize Asic 3160 * return 0 means succeeded otherwise failed 3161 */ 3162 static int amdgpu_device_reset(struct amdgpu_device *adev) 3163 { 3164 bool need_full_reset, vram_lost = 0; 3165 int r; 3166 3167 need_full_reset = amdgpu_device_ip_need_full_reset(adev); 3168 3169 if (!need_full_reset) { 3170 amdgpu_device_ip_pre_soft_reset(adev); 3171 r = amdgpu_device_ip_soft_reset(adev); 3172 amdgpu_device_ip_post_soft_reset(adev); 3173 if (r || amdgpu_device_ip_check_soft_reset(adev)) { 3174 DRM_INFO("soft reset failed, will fallback to full reset!\n"); 3175 need_full_reset = true; 3176 } 3177 } 3178 3179 if (need_full_reset) { 3180 r = amdgpu_device_ip_suspend(adev); 3181 3182 retry: 3183 r = amdgpu_asic_reset(adev); 3184 /* post card */ 3185 amdgpu_atom_asic_init(adev->mode_info.atom_context); 3186 3187 if (!r) { 3188 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); 3189 r = amdgpu_device_ip_resume_phase1(adev); 3190 if (r) 3191 goto out; 3192 3193 vram_lost = amdgpu_device_check_vram_lost(adev); 3194 if (vram_lost) { 3195 DRM_ERROR("VRAM is lost!\n"); 3196 atomic_inc(&adev->vram_lost_counter); 3197 } 3198 3199 r = amdgpu_gtt_mgr_recover( 3200 &adev->mman.bdev.man[TTM_PL_TT]); 3201 if (r) 3202 goto out; 3203 3204 r = amdgpu_device_fw_loading(adev); 3205 if (r) 3206 return r; 3207 3208 r = amdgpu_device_ip_resume_phase2(adev); 3209 if (r) 3210 goto out; 3211 3212 if (vram_lost) 3213 amdgpu_device_fill_reset_magic(adev); 3214 } 3215 } 3216 3217 out: 3218 if (!r) { 3219 amdgpu_irq_gpu_reset_resume_helper(adev); 3220 r = amdgpu_ib_ring_tests(adev); 3221 if (r) { 3222 dev_err(adev->dev, "ib ring test failed (%d).\n", r); 3223 r = amdgpu_device_ip_suspend(adev); 3224 need_full_reset = true; 3225 goto retry; 3226 } 3227 } 3228 3229 if (!r) 3230 r = amdgpu_device_recover_vram(adev); 3231 3232 return r; 3233 } 3234 3235 /** 3236 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 3237 * 3238 * @adev: amdgpu device pointer 3239 * @from_hypervisor: request from hypervisor 3240 * 3241 * do VF FLR and reinitialize Asic 3242 * return 0 means succeeded otherwise failed 3243 */ 3244 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 3245 bool from_hypervisor) 3246 { 3247 int r; 3248 3249 if (from_hypervisor) 3250 r = amdgpu_virt_request_full_gpu(adev, true); 3251 else 3252 r = amdgpu_virt_reset_gpu(adev); 3253 if (r) 3254 return r; 3255 3256 /* Resume IP prior to SMC */ 3257 r = amdgpu_device_ip_reinit_early_sriov(adev); 3258 if (r) 3259 goto error; 3260 3261 /* we need recover gart prior to run SMC/CP/SDMA resume */ 3262 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); 3263 3264 r = amdgpu_device_fw_loading(adev); 3265 if (r) 3266 return r; 3267 3268 /* now we are okay to resume SMC/CP/SDMA */ 3269 r = amdgpu_device_ip_reinit_late_sriov(adev); 3270 if (r) 3271 goto error; 3272 3273 amdgpu_irq_gpu_reset_resume_helper(adev); 3274 r = amdgpu_ib_ring_tests(adev); 3275 3276 error: 3277 amdgpu_virt_release_full_gpu(adev, true); 3278 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 3279 atomic_inc(&adev->vram_lost_counter); 3280 r = amdgpu_device_recover_vram(adev); 3281 } 3282 3283 return r; 3284 } 3285 3286 /** 3287 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 3288 * 3289 * @adev: amdgpu device pointer 3290 * 3291 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 3292 * a hung GPU. 3293 */ 3294 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 3295 { 3296 if (!amdgpu_device_ip_check_soft_reset(adev)) { 3297 DRM_INFO("Timeout, but no hardware hang detected.\n"); 3298 return false; 3299 } 3300 3301 if (amdgpu_gpu_recovery == 0) 3302 goto disabled; 3303 3304 if (amdgpu_sriov_vf(adev)) 3305 return true; 3306 3307 if (amdgpu_gpu_recovery == -1) { 3308 switch (adev->asic_type) { 3309 case CHIP_TOPAZ: 3310 case CHIP_TONGA: 3311 case CHIP_FIJI: 3312 case CHIP_POLARIS10: 3313 case CHIP_POLARIS11: 3314 case CHIP_POLARIS12: 3315 case CHIP_VEGAM: 3316 case CHIP_VEGA20: 3317 case CHIP_VEGA10: 3318 case CHIP_VEGA12: 3319 break; 3320 default: 3321 goto disabled; 3322 } 3323 } 3324 3325 return true; 3326 3327 disabled: 3328 DRM_INFO("GPU recovery disabled.\n"); 3329 return false; 3330 } 3331 3332 /** 3333 * amdgpu_device_gpu_recover - reset the asic and recover scheduler 3334 * 3335 * @adev: amdgpu device pointer 3336 * @job: which job trigger hang 3337 * 3338 * Attempt to reset the GPU if it has hung (all asics). 3339 * Returns 0 for success or an error on failure. 3340 */ 3341 int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 3342 struct amdgpu_job *job) 3343 { 3344 int i, r, resched; 3345 3346 dev_info(adev->dev, "GPU reset begin!\n"); 3347 3348 mutex_lock(&adev->lock_reset); 3349 atomic_inc(&adev->gpu_reset_counter); 3350 adev->in_gpu_reset = 1; 3351 3352 /* Block kfd */ 3353 amdgpu_amdkfd_pre_reset(adev); 3354 3355 /* block TTM */ 3356 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); 3357 3358 /* block all schedulers and reset given job's ring */ 3359 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3360 struct amdgpu_ring *ring = adev->rings[i]; 3361 3362 if (!ring || !ring->sched.thread) 3363 continue; 3364 3365 kthread_park(ring->sched.thread); 3366 3367 if (job && job->base.sched != &ring->sched) 3368 continue; 3369 3370 drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); 3371 3372 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 3373 amdgpu_fence_driver_force_completion(ring); 3374 } 3375 3376 if (amdgpu_sriov_vf(adev)) 3377 r = amdgpu_device_reset_sriov(adev, job ? false : true); 3378 else 3379 r = amdgpu_device_reset(adev); 3380 3381 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3382 struct amdgpu_ring *ring = adev->rings[i]; 3383 3384 if (!ring || !ring->sched.thread) 3385 continue; 3386 3387 /* only need recovery sched of the given job's ring 3388 * or all rings (in the case @job is NULL) 3389 * after above amdgpu_reset accomplished 3390 */ 3391 if ((!job || job->base.sched == &ring->sched) && !r) 3392 drm_sched_job_recovery(&ring->sched); 3393 3394 kthread_unpark(ring->sched.thread); 3395 } 3396 3397 if (!amdgpu_device_has_dc_support(adev)) { 3398 drm_helper_resume_force_mode(adev->ddev); 3399 } 3400 3401 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); 3402 3403 if (r) { 3404 /* bad news, how to tell it to userspace ? */ 3405 dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); 3406 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 3407 } else { 3408 dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter)); 3409 } 3410 3411 /*unlock kfd */ 3412 amdgpu_amdkfd_post_reset(adev); 3413 amdgpu_vf_error_trans_all(adev); 3414 adev->in_gpu_reset = 0; 3415 mutex_unlock(&adev->lock_reset); 3416 return r; 3417 } 3418 3419 /** 3420 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 3421 * 3422 * @adev: amdgpu_device pointer 3423 * 3424 * Fetchs and stores in the driver the PCIE capabilities (gen speed 3425 * and lanes) of the slot the device is in. Handles APUs and 3426 * virtualized environments where PCIE config space may not be available. 3427 */ 3428 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 3429 { 3430 struct pci_dev *pdev; 3431 enum pci_bus_speed speed_cap; 3432 enum pcie_link_width link_width; 3433 3434 if (amdgpu_pcie_gen_cap) 3435 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 3436 3437 if (amdgpu_pcie_lane_cap) 3438 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 3439 3440 /* covers APUs as well */ 3441 if (pci_is_root_bus(adev->pdev->bus)) { 3442 if (adev->pm.pcie_gen_mask == 0) 3443 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 3444 if (adev->pm.pcie_mlw_mask == 0) 3445 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 3446 return; 3447 } 3448 3449 if (adev->pm.pcie_gen_mask == 0) { 3450 /* asic caps */ 3451 pdev = adev->pdev; 3452 speed_cap = pcie_get_speed_cap(pdev); 3453 if (speed_cap == PCI_SPEED_UNKNOWN) { 3454 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3455 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3456 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 3457 } else { 3458 if (speed_cap == PCIE_SPEED_16_0GT) 3459 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3460 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3461 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 3462 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 3463 else if (speed_cap == PCIE_SPEED_8_0GT) 3464 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3465 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3466 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 3467 else if (speed_cap == PCIE_SPEED_5_0GT) 3468 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3469 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 3470 else 3471 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 3472 } 3473 /* platform caps */ 3474 pdev = adev->ddev->pdev->bus->self; 3475 speed_cap = pcie_get_speed_cap(pdev); 3476 if (speed_cap == PCI_SPEED_UNKNOWN) { 3477 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3478 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 3479 } else { 3480 if (speed_cap == PCIE_SPEED_16_0GT) 3481 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3482 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3483 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 3484 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 3485 else if (speed_cap == PCIE_SPEED_8_0GT) 3486 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3487 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3488 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 3489 else if (speed_cap == PCIE_SPEED_5_0GT) 3490 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3491 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 3492 else 3493 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 3494 3495 } 3496 } 3497 if (adev->pm.pcie_mlw_mask == 0) { 3498 pdev = adev->ddev->pdev->bus->self; 3499 link_width = pcie_get_width_cap(pdev); 3500 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { 3501 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 3502 } else { 3503 switch (link_width) { 3504 case PCIE_LNK_X32: 3505 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 3506 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 3507 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3508 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3509 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3510 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3511 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3512 break; 3513 case PCIE_LNK_X16: 3514 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 3515 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3517 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3518 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3519 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3520 break; 3521 case PCIE_LNK_X12: 3522 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3524 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3525 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3526 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3527 break; 3528 case PCIE_LNK_X8: 3529 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3530 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3531 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3532 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3533 break; 3534 case PCIE_LNK_X4: 3535 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3536 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3537 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3538 break; 3539 case PCIE_LNK_X2: 3540 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3541 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3542 break; 3543 case PCIE_LNK_X1: 3544 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 3545 break; 3546 default: 3547 break; 3548 } 3549 } 3550 } 3551 } 3552 3553