1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/power_supply.h> 29 #include <linux/kthread.h> 30 #include <linux/console.h> 31 #include <linux/slab.h> 32 #include <drm/drmP.h> 33 #include <drm/drm_crtc_helper.h> 34 #include <drm/drm_atomic_helper.h> 35 #include <drm/amdgpu_drm.h> 36 #include <linux/vgaarb.h> 37 #include <linux/vga_switcheroo.h> 38 #include <linux/efi.h> 39 #include "amdgpu.h" 40 #include "amdgpu_trace.h" 41 #include "amdgpu_i2c.h" 42 #include "atom.h" 43 #include "amdgpu_atombios.h" 44 #include "amdgpu_atomfirmware.h" 45 #include "amd_pcie.h" 46 #ifdef CONFIG_DRM_AMDGPU_SI 47 #include "si.h" 48 #endif 49 #ifdef CONFIG_DRM_AMDGPU_CIK 50 #include "cik.h" 51 #endif 52 #include "vi.h" 53 #include "soc15.h" 54 #include "bif/bif_4_1_d.h" 55 #include <linux/pci.h> 56 #include <linux/firmware.h> 57 #include "amdgpu_vf_error.h" 58 59 #include "amdgpu_amdkfd.h" 60 #include "amdgpu_pm.h" 61 62 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 63 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 64 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 65 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 66 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 67 68 #define AMDGPU_RESUME_MS 2000 69 70 static const char *amdgpu_asic_name[] = { 71 "TAHITI", 72 "PITCAIRN", 73 "VERDE", 74 "OLAND", 75 "HAINAN", 76 "BONAIRE", 77 "KAVERI", 78 "KABINI", 79 "HAWAII", 80 "MULLINS", 81 "TOPAZ", 82 "TONGA", 83 "FIJI", 84 "CARRIZO", 85 "STONEY", 86 "POLARIS10", 87 "POLARIS11", 88 "POLARIS12", 89 "VEGAM", 90 "VEGA10", 91 "VEGA12", 92 "VEGA20", 93 "RAVEN", 94 "LAST", 95 }; 96 97 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 98 99 /** 100 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control 101 * 102 * @dev: drm_device pointer 103 * 104 * Returns true if the device is a dGPU with HG/PX power control, 105 * otherwise return false. 106 */ 107 bool amdgpu_device_is_px(struct drm_device *dev) 108 { 109 struct amdgpu_device *adev = dev->dev_private; 110 111 if (adev->flags & AMD_IS_PX) 112 return true; 113 return false; 114 } 115 116 /* 117 * MMIO register access helper functions. 118 */ 119 /** 120 * amdgpu_mm_rreg - read a memory mapped IO register 121 * 122 * @adev: amdgpu_device pointer 123 * @reg: dword aligned register offset 124 * @acc_flags: access flags which require special behavior 125 * 126 * Returns the 32 bit value from the offset specified. 127 */ 128 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, 129 uint32_t acc_flags) 130 { 131 uint32_t ret; 132 133 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 134 return amdgpu_virt_kiq_rreg(adev, reg); 135 136 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) 137 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 138 else { 139 unsigned long flags; 140 141 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 142 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); 143 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); 144 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 145 } 146 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret); 147 return ret; 148 } 149 150 /* 151 * MMIO register read with bytes helper functions 152 * @offset:bytes offset from MMIO start 153 * 154 */ 155 156 /** 157 * amdgpu_mm_rreg8 - read a memory mapped IO register 158 * 159 * @adev: amdgpu_device pointer 160 * @offset: byte aligned register offset 161 * 162 * Returns the 8 bit value from the offset specified. 163 */ 164 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { 165 if (offset < adev->rmmio_size) 166 return (readb(adev->rmmio + offset)); 167 BUG(); 168 } 169 170 /* 171 * MMIO register write with bytes helper functions 172 * @offset:bytes offset from MMIO start 173 * @value: the value want to be written to the register 174 * 175 */ 176 /** 177 * amdgpu_mm_wreg8 - read a memory mapped IO register 178 * 179 * @adev: amdgpu_device pointer 180 * @offset: byte aligned register offset 181 * @value: 8 bit value to write 182 * 183 * Writes the value specified to the offset specified. 184 */ 185 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { 186 if (offset < adev->rmmio_size) 187 writeb(value, adev->rmmio + offset); 188 else 189 BUG(); 190 } 191 192 /** 193 * amdgpu_mm_wreg - write to a memory mapped IO register 194 * 195 * @adev: amdgpu_device pointer 196 * @reg: dword aligned register offset 197 * @v: 32 bit value to write to the register 198 * @acc_flags: access flags which require special behavior 199 * 200 * Writes the value specified to the offset specified. 201 */ 202 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 203 uint32_t acc_flags) 204 { 205 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); 206 207 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 208 adev->last_mm_index = v; 209 } 210 211 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 212 return amdgpu_virt_kiq_wreg(adev, reg, v); 213 214 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) 215 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 216 else { 217 unsigned long flags; 218 219 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 220 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); 221 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); 222 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 223 } 224 225 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { 226 udelay(500); 227 } 228 } 229 230 /** 231 * amdgpu_io_rreg - read an IO register 232 * 233 * @adev: amdgpu_device pointer 234 * @reg: dword aligned register offset 235 * 236 * Returns the 32 bit value from the offset specified. 237 */ 238 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) 239 { 240 if ((reg * 4) < adev->rio_mem_size) 241 return ioread32(adev->rio_mem + (reg * 4)); 242 else { 243 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 244 return ioread32(adev->rio_mem + (mmMM_DATA * 4)); 245 } 246 } 247 248 /** 249 * amdgpu_io_wreg - write to an IO register 250 * 251 * @adev: amdgpu_device pointer 252 * @reg: dword aligned register offset 253 * @v: 32 bit value to write to the register 254 * 255 * Writes the value specified to the offset specified. 256 */ 257 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) 258 { 259 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 260 adev->last_mm_index = v; 261 } 262 263 if ((reg * 4) < adev->rio_mem_size) 264 iowrite32(v, adev->rio_mem + (reg * 4)); 265 else { 266 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 267 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); 268 } 269 270 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { 271 udelay(500); 272 } 273 } 274 275 /** 276 * amdgpu_mm_rdoorbell - read a doorbell dword 277 * 278 * @adev: amdgpu_device pointer 279 * @index: doorbell index 280 * 281 * Returns the value in the doorbell aperture at the 282 * requested doorbell index (CIK). 283 */ 284 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) 285 { 286 if (index < adev->doorbell.num_doorbells) { 287 return readl(adev->doorbell.ptr + index); 288 } else { 289 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 290 return 0; 291 } 292 } 293 294 /** 295 * amdgpu_mm_wdoorbell - write a doorbell dword 296 * 297 * @adev: amdgpu_device pointer 298 * @index: doorbell index 299 * @v: value to write 300 * 301 * Writes @v to the doorbell aperture at the 302 * requested doorbell index (CIK). 303 */ 304 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) 305 { 306 if (index < adev->doorbell.num_doorbells) { 307 writel(v, adev->doorbell.ptr + index); 308 } else { 309 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 310 } 311 } 312 313 /** 314 * amdgpu_mm_rdoorbell64 - read a doorbell Qword 315 * 316 * @adev: amdgpu_device pointer 317 * @index: doorbell index 318 * 319 * Returns the value in the doorbell aperture at the 320 * requested doorbell index (VEGA10+). 321 */ 322 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) 323 { 324 if (index < adev->doorbell.num_doorbells) { 325 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); 326 } else { 327 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 328 return 0; 329 } 330 } 331 332 /** 333 * amdgpu_mm_wdoorbell64 - write a doorbell Qword 334 * 335 * @adev: amdgpu_device pointer 336 * @index: doorbell index 337 * @v: value to write 338 * 339 * Writes @v to the doorbell aperture at the 340 * requested doorbell index (VEGA10+). 341 */ 342 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) 343 { 344 if (index < adev->doorbell.num_doorbells) { 345 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); 346 } else { 347 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 348 } 349 } 350 351 /** 352 * amdgpu_invalid_rreg - dummy reg read function 353 * 354 * @adev: amdgpu device pointer 355 * @reg: offset of register 356 * 357 * Dummy register read function. Used for register blocks 358 * that certain asics don't have (all asics). 359 * Returns the value in the register. 360 */ 361 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 362 { 363 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 364 BUG(); 365 return 0; 366 } 367 368 /** 369 * amdgpu_invalid_wreg - dummy reg write function 370 * 371 * @adev: amdgpu device pointer 372 * @reg: offset of register 373 * @v: value to write to the register 374 * 375 * Dummy register read function. Used for register blocks 376 * that certain asics don't have (all asics). 377 */ 378 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 379 { 380 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 381 reg, v); 382 BUG(); 383 } 384 385 /** 386 * amdgpu_block_invalid_rreg - dummy reg read function 387 * 388 * @adev: amdgpu device pointer 389 * @block: offset of instance 390 * @reg: offset of register 391 * 392 * Dummy register read function. Used for register blocks 393 * that certain asics don't have (all asics). 394 * Returns the value in the register. 395 */ 396 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 397 uint32_t block, uint32_t reg) 398 { 399 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 400 reg, block); 401 BUG(); 402 return 0; 403 } 404 405 /** 406 * amdgpu_block_invalid_wreg - dummy reg write function 407 * 408 * @adev: amdgpu device pointer 409 * @block: offset of instance 410 * @reg: offset of register 411 * @v: value to write to the register 412 * 413 * Dummy register read function. Used for register blocks 414 * that certain asics don't have (all asics). 415 */ 416 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 417 uint32_t block, 418 uint32_t reg, uint32_t v) 419 { 420 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 421 reg, block, v); 422 BUG(); 423 } 424 425 /** 426 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page 427 * 428 * @adev: amdgpu device pointer 429 * 430 * Allocates a scratch page of VRAM for use by various things in the 431 * driver. 432 */ 433 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) 434 { 435 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, 436 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 437 &adev->vram_scratch.robj, 438 &adev->vram_scratch.gpu_addr, 439 (void **)&adev->vram_scratch.ptr); 440 } 441 442 /** 443 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page 444 * 445 * @adev: amdgpu device pointer 446 * 447 * Frees the VRAM scratch page. 448 */ 449 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) 450 { 451 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); 452 } 453 454 /** 455 * amdgpu_device_program_register_sequence - program an array of registers. 456 * 457 * @adev: amdgpu_device pointer 458 * @registers: pointer to the register array 459 * @array_size: size of the register array 460 * 461 * Programs an array or registers with and and or masks. 462 * This is a helper for setting golden registers. 463 */ 464 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 465 const u32 *registers, 466 const u32 array_size) 467 { 468 u32 tmp, reg, and_mask, or_mask; 469 int i; 470 471 if (array_size % 3) 472 return; 473 474 for (i = 0; i < array_size; i +=3) { 475 reg = registers[i + 0]; 476 and_mask = registers[i + 1]; 477 or_mask = registers[i + 2]; 478 479 if (and_mask == 0xffffffff) { 480 tmp = or_mask; 481 } else { 482 tmp = RREG32(reg); 483 tmp &= ~and_mask; 484 tmp |= or_mask; 485 } 486 WREG32(reg, tmp); 487 } 488 } 489 490 /** 491 * amdgpu_device_pci_config_reset - reset the GPU 492 * 493 * @adev: amdgpu_device pointer 494 * 495 * Resets the GPU using the pci config reset sequence. 496 * Only applicable to asics prior to vega10. 497 */ 498 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 499 { 500 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 501 } 502 503 /* 504 * GPU doorbell aperture helpers function. 505 */ 506 /** 507 * amdgpu_device_doorbell_init - Init doorbell driver information. 508 * 509 * @adev: amdgpu_device pointer 510 * 511 * Init doorbell driver information (CIK) 512 * Returns 0 on success, error on failure. 513 */ 514 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) 515 { 516 /* No doorbell on SI hardware generation */ 517 if (adev->asic_type < CHIP_BONAIRE) { 518 adev->doorbell.base = 0; 519 adev->doorbell.size = 0; 520 adev->doorbell.num_doorbells = 0; 521 adev->doorbell.ptr = NULL; 522 return 0; 523 } 524 525 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) 526 return -EINVAL; 527 528 /* doorbell bar mapping */ 529 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 530 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 531 532 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), 533 AMDGPU_DOORBELL_MAX_ASSIGNMENT+1); 534 if (adev->doorbell.num_doorbells == 0) 535 return -EINVAL; 536 537 adev->doorbell.ptr = ioremap(adev->doorbell.base, 538 adev->doorbell.num_doorbells * 539 sizeof(u32)); 540 if (adev->doorbell.ptr == NULL) 541 return -ENOMEM; 542 543 return 0; 544 } 545 546 /** 547 * amdgpu_device_doorbell_fini - Tear down doorbell driver information. 548 * 549 * @adev: amdgpu_device pointer 550 * 551 * Tear down doorbell driver information (CIK) 552 */ 553 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) 554 { 555 iounmap(adev->doorbell.ptr); 556 adev->doorbell.ptr = NULL; 557 } 558 559 560 561 /* 562 * amdgpu_device_wb_*() 563 * Writeback is the method by which the GPU updates special pages in memory 564 * with the status of certain GPU events (fences, ring pointers,etc.). 565 */ 566 567 /** 568 * amdgpu_device_wb_fini - Disable Writeback and free memory 569 * 570 * @adev: amdgpu_device pointer 571 * 572 * Disables Writeback and frees the Writeback memory (all asics). 573 * Used at driver shutdown. 574 */ 575 static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 576 { 577 if (adev->wb.wb_obj) { 578 amdgpu_bo_free_kernel(&adev->wb.wb_obj, 579 &adev->wb.gpu_addr, 580 (void **)&adev->wb.wb); 581 adev->wb.wb_obj = NULL; 582 } 583 } 584 585 /** 586 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory 587 * 588 * @adev: amdgpu_device pointer 589 * 590 * Initializes writeback and allocates writeback memory (all asics). 591 * Used at driver startup. 592 * Returns 0 on success or an -error on failure. 593 */ 594 static int amdgpu_device_wb_init(struct amdgpu_device *adev) 595 { 596 int r; 597 598 if (adev->wb.wb_obj == NULL) { 599 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 600 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 601 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 602 &adev->wb.wb_obj, &adev->wb.gpu_addr, 603 (void **)&adev->wb.wb); 604 if (r) { 605 dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 606 return r; 607 } 608 609 adev->wb.num_wb = AMDGPU_MAX_WB; 610 memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 611 612 /* clear wb memory */ 613 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 614 } 615 616 return 0; 617 } 618 619 /** 620 * amdgpu_device_wb_get - Allocate a wb entry 621 * 622 * @adev: amdgpu_device pointer 623 * @wb: wb index 624 * 625 * Allocate a wb slot for use by the driver (all asics). 626 * Returns 0 on success or -EINVAL on failure. 627 */ 628 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 629 { 630 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 631 632 if (offset < adev->wb.num_wb) { 633 __set_bit(offset, adev->wb.used); 634 *wb = offset << 3; /* convert to dw offset */ 635 return 0; 636 } else { 637 return -EINVAL; 638 } 639 } 640 641 /** 642 * amdgpu_device_wb_free - Free a wb entry 643 * 644 * @adev: amdgpu_device pointer 645 * @wb: wb index 646 * 647 * Free a wb slot allocated for use by the driver (all asics) 648 */ 649 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 650 { 651 wb >>= 3; 652 if (wb < adev->wb.num_wb) 653 __clear_bit(wb, adev->wb.used); 654 } 655 656 /** 657 * amdgpu_device_resize_fb_bar - try to resize FB BAR 658 * 659 * @adev: amdgpu_device pointer 660 * 661 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 662 * to fail, but if any of the BARs is not accessible after the size we abort 663 * driver loading by returning -ENODEV. 664 */ 665 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 666 { 667 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size); 668 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; 669 struct pci_bus *root; 670 struct resource *res; 671 unsigned i; 672 u16 cmd; 673 int r; 674 675 /* Bypass for VF */ 676 if (amdgpu_sriov_vf(adev)) 677 return 0; 678 679 /* Check if the root BUS has 64bit memory resources */ 680 root = adev->pdev->bus; 681 while (root->parent) 682 root = root->parent; 683 684 pci_bus_for_each_resource(root, res, i) { 685 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 686 res->start > 0x100000000ull) 687 break; 688 } 689 690 /* Trying to resize is pointless without a root hub window above 4GB */ 691 if (!res) 692 return 0; 693 694 /* Disable memory decoding while we change the BAR addresses and size */ 695 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 696 pci_write_config_word(adev->pdev, PCI_COMMAND, 697 cmd & ~PCI_COMMAND_MEMORY); 698 699 /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 700 amdgpu_device_doorbell_fini(adev); 701 if (adev->asic_type >= CHIP_BONAIRE) 702 pci_release_resource(adev->pdev, 2); 703 704 pci_release_resource(adev->pdev, 0); 705 706 r = pci_resize_resource(adev->pdev, 0, rbar_size); 707 if (r == -ENOSPC) 708 DRM_INFO("Not enough PCI address space for a large BAR."); 709 else if (r && r != -ENOTSUPP) 710 DRM_ERROR("Problem resizing BAR0 (%d).", r); 711 712 pci_assign_unassigned_bus_resources(adev->pdev->bus); 713 714 /* When the doorbell or fb BAR isn't available we have no chance of 715 * using the device. 716 */ 717 r = amdgpu_device_doorbell_init(adev); 718 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 719 return -ENODEV; 720 721 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 722 723 return 0; 724 } 725 726 /* 727 * GPU helpers function. 728 */ 729 /** 730 * amdgpu_device_need_post - check if the hw need post or not 731 * 732 * @adev: amdgpu_device pointer 733 * 734 * Check if the asic has been initialized (all asics) at driver startup 735 * or post is needed if hw reset is performed. 736 * Returns true if need or false if not. 737 */ 738 bool amdgpu_device_need_post(struct amdgpu_device *adev) 739 { 740 uint32_t reg; 741 742 if (amdgpu_sriov_vf(adev)) 743 return false; 744 745 if (amdgpu_passthrough(adev)) { 746 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 747 * some old smc fw still need driver do vPost otherwise gpu hang, while 748 * those smc fw version above 22.15 doesn't have this flaw, so we force 749 * vpost executed for smc version below 22.15 750 */ 751 if (adev->asic_type == CHIP_FIJI) { 752 int err; 753 uint32_t fw_ver; 754 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 755 /* force vPost if error occured */ 756 if (err) 757 return true; 758 759 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 760 if (fw_ver < 0x00160e00) 761 return true; 762 } 763 } 764 765 if (adev->has_hw_reset) { 766 adev->has_hw_reset = false; 767 return true; 768 } 769 770 /* bios scratch used on CIK+ */ 771 if (adev->asic_type >= CHIP_BONAIRE) 772 return amdgpu_atombios_scratch_need_asic_init(adev); 773 774 /* check MEM_SIZE for older asics */ 775 reg = amdgpu_asic_get_config_memsize(adev); 776 777 if ((reg != 0) && (reg != 0xffffffff)) 778 return false; 779 780 return true; 781 } 782 783 /* if we get transitioned to only one device, take VGA back */ 784 /** 785 * amdgpu_device_vga_set_decode - enable/disable vga decode 786 * 787 * @cookie: amdgpu_device pointer 788 * @state: enable/disable vga decode 789 * 790 * Enable/disable vga decode (all asics). 791 * Returns VGA resource flags. 792 */ 793 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) 794 { 795 struct amdgpu_device *adev = cookie; 796 amdgpu_asic_set_vga_state(adev, state); 797 if (state) 798 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 799 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 800 else 801 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 802 } 803 804 /** 805 * amdgpu_device_check_block_size - validate the vm block size 806 * 807 * @adev: amdgpu_device pointer 808 * 809 * Validates the vm block size specified via module parameter. 810 * The vm block size defines number of bits in page table versus page directory, 811 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 812 * page table and the remaining bits are in the page directory. 813 */ 814 static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 815 { 816 /* defines number of bits in page table versus page directory, 817 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 818 * page table and the remaining bits are in the page directory */ 819 if (amdgpu_vm_block_size == -1) 820 return; 821 822 if (amdgpu_vm_block_size < 9) { 823 dev_warn(adev->dev, "VM page table size (%d) too small\n", 824 amdgpu_vm_block_size); 825 amdgpu_vm_block_size = -1; 826 } 827 } 828 829 /** 830 * amdgpu_device_check_vm_size - validate the vm size 831 * 832 * @adev: amdgpu_device pointer 833 * 834 * Validates the vm size in GB specified via module parameter. 835 * The VM size is the size of the GPU virtual memory space in GB. 836 */ 837 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 838 { 839 /* no need to check the default value */ 840 if (amdgpu_vm_size == -1) 841 return; 842 843 if (amdgpu_vm_size < 1) { 844 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 845 amdgpu_vm_size); 846 amdgpu_vm_size = -1; 847 } 848 } 849 850 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 851 { 852 struct sysinfo si; 853 bool is_os_64 = (sizeof(void *) == 8) ? true : false; 854 uint64_t total_memory; 855 uint64_t dram_size_seven_GB = 0x1B8000000; 856 uint64_t dram_size_three_GB = 0xB8000000; 857 858 if (amdgpu_smu_memory_pool_size == 0) 859 return; 860 861 if (!is_os_64) { 862 DRM_WARN("Not 64-bit OS, feature not supported\n"); 863 goto def_value; 864 } 865 si_meminfo(&si); 866 total_memory = (uint64_t)si.totalram * si.mem_unit; 867 868 if ((amdgpu_smu_memory_pool_size == 1) || 869 (amdgpu_smu_memory_pool_size == 2)) { 870 if (total_memory < dram_size_three_GB) 871 goto def_value1; 872 } else if ((amdgpu_smu_memory_pool_size == 4) || 873 (amdgpu_smu_memory_pool_size == 8)) { 874 if (total_memory < dram_size_seven_GB) 875 goto def_value1; 876 } else { 877 DRM_WARN("Smu memory pool size not supported\n"); 878 goto def_value; 879 } 880 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 881 882 return; 883 884 def_value1: 885 DRM_WARN("No enough system memory\n"); 886 def_value: 887 adev->pm.smu_prv_buffer_size = 0; 888 } 889 890 /** 891 * amdgpu_device_check_arguments - validate module params 892 * 893 * @adev: amdgpu_device pointer 894 * 895 * Validates certain module parameters and updates 896 * the associated values used by the driver (all asics). 897 */ 898 static void amdgpu_device_check_arguments(struct amdgpu_device *adev) 899 { 900 if (amdgpu_sched_jobs < 4) { 901 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 902 amdgpu_sched_jobs); 903 amdgpu_sched_jobs = 4; 904 } else if (!is_power_of_2(amdgpu_sched_jobs)){ 905 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 906 amdgpu_sched_jobs); 907 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 908 } 909 910 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 911 /* gart size must be greater or equal to 32M */ 912 dev_warn(adev->dev, "gart size (%d) too small\n", 913 amdgpu_gart_size); 914 amdgpu_gart_size = -1; 915 } 916 917 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 918 /* gtt size must be greater or equal to 32M */ 919 dev_warn(adev->dev, "gtt size (%d) too small\n", 920 amdgpu_gtt_size); 921 amdgpu_gtt_size = -1; 922 } 923 924 /* valid range is between 4 and 9 inclusive */ 925 if (amdgpu_vm_fragment_size != -1 && 926 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 927 dev_warn(adev->dev, "valid range is between 4 and 9\n"); 928 amdgpu_vm_fragment_size = -1; 929 } 930 931 amdgpu_device_check_smu_prv_buffer_size(adev); 932 933 amdgpu_device_check_vm_size(adev); 934 935 amdgpu_device_check_block_size(adev); 936 937 if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 || 938 !is_power_of_2(amdgpu_vram_page_split))) { 939 dev_warn(adev->dev, "invalid VRAM page split (%d)\n", 940 amdgpu_vram_page_split); 941 amdgpu_vram_page_split = 1024; 942 } 943 944 if (amdgpu_lockup_timeout == 0) { 945 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); 946 amdgpu_lockup_timeout = 10000; 947 } 948 949 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 950 } 951 952 /** 953 * amdgpu_switcheroo_set_state - set switcheroo state 954 * 955 * @pdev: pci dev pointer 956 * @state: vga_switcheroo state 957 * 958 * Callback for the switcheroo driver. Suspends or resumes the 959 * the asics before or after it is powered up using ACPI methods. 960 */ 961 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) 962 { 963 struct drm_device *dev = pci_get_drvdata(pdev); 964 965 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF) 966 return; 967 968 if (state == VGA_SWITCHEROO_ON) { 969 pr_info("amdgpu: switched on\n"); 970 /* don't suspend or resume card normally */ 971 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 972 973 amdgpu_device_resume(dev, true, true); 974 975 dev->switch_power_state = DRM_SWITCH_POWER_ON; 976 drm_kms_helper_poll_enable(dev); 977 } else { 978 pr_info("amdgpu: switched off\n"); 979 drm_kms_helper_poll_disable(dev); 980 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 981 amdgpu_device_suspend(dev, true, true); 982 dev->switch_power_state = DRM_SWITCH_POWER_OFF; 983 } 984 } 985 986 /** 987 * amdgpu_switcheroo_can_switch - see if switcheroo state can change 988 * 989 * @pdev: pci dev pointer 990 * 991 * Callback for the switcheroo driver. Check of the switcheroo 992 * state can be changed. 993 * Returns true if the state can be changed, false if not. 994 */ 995 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 996 { 997 struct drm_device *dev = pci_get_drvdata(pdev); 998 999 /* 1000 * FIXME: open_count is protected by drm_global_mutex but that would lead to 1001 * locking inversion with the driver load path. And the access here is 1002 * completely racy anyway. So don't bother with locking for now. 1003 */ 1004 return dev->open_count == 0; 1005 } 1006 1007 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 1008 .set_gpu_state = amdgpu_switcheroo_set_state, 1009 .reprobe = NULL, 1010 .can_switch = amdgpu_switcheroo_can_switch, 1011 }; 1012 1013 /** 1014 * amdgpu_device_ip_set_clockgating_state - set the CG state 1015 * 1016 * @dev: amdgpu_device pointer 1017 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1018 * @state: clockgating state (gate or ungate) 1019 * 1020 * Sets the requested clockgating state for all instances of 1021 * the hardware IP specified. 1022 * Returns the error code from the last instance. 1023 */ 1024 int amdgpu_device_ip_set_clockgating_state(void *dev, 1025 enum amd_ip_block_type block_type, 1026 enum amd_clockgating_state state) 1027 { 1028 struct amdgpu_device *adev = dev; 1029 int i, r = 0; 1030 1031 for (i = 0; i < adev->num_ip_blocks; i++) { 1032 if (!adev->ip_blocks[i].status.valid) 1033 continue; 1034 if (adev->ip_blocks[i].version->type != block_type) 1035 continue; 1036 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1037 continue; 1038 r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1039 (void *)adev, state); 1040 if (r) 1041 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1042 adev->ip_blocks[i].version->funcs->name, r); 1043 } 1044 return r; 1045 } 1046 1047 /** 1048 * amdgpu_device_ip_set_powergating_state - set the PG state 1049 * 1050 * @dev: amdgpu_device pointer 1051 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1052 * @state: powergating state (gate or ungate) 1053 * 1054 * Sets the requested powergating state for all instances of 1055 * the hardware IP specified. 1056 * Returns the error code from the last instance. 1057 */ 1058 int amdgpu_device_ip_set_powergating_state(void *dev, 1059 enum amd_ip_block_type block_type, 1060 enum amd_powergating_state state) 1061 { 1062 struct amdgpu_device *adev = dev; 1063 int i, r = 0; 1064 1065 for (i = 0; i < adev->num_ip_blocks; i++) { 1066 if (!adev->ip_blocks[i].status.valid) 1067 continue; 1068 if (adev->ip_blocks[i].version->type != block_type) 1069 continue; 1070 if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1071 continue; 1072 r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1073 (void *)adev, state); 1074 if (r) 1075 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1076 adev->ip_blocks[i].version->funcs->name, r); 1077 } 1078 return r; 1079 } 1080 1081 /** 1082 * amdgpu_device_ip_get_clockgating_state - get the CG state 1083 * 1084 * @adev: amdgpu_device pointer 1085 * @flags: clockgating feature flags 1086 * 1087 * Walks the list of IPs on the device and updates the clockgating 1088 * flags for each IP. 1089 * Updates @flags with the feature flags for each hardware IP where 1090 * clockgating is enabled. 1091 */ 1092 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1093 u32 *flags) 1094 { 1095 int i; 1096 1097 for (i = 0; i < adev->num_ip_blocks; i++) { 1098 if (!adev->ip_blocks[i].status.valid) 1099 continue; 1100 if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1101 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1102 } 1103 } 1104 1105 /** 1106 * amdgpu_device_ip_wait_for_idle - wait for idle 1107 * 1108 * @adev: amdgpu_device pointer 1109 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1110 * 1111 * Waits for the request hardware IP to be idle. 1112 * Returns 0 for success or a negative error code on failure. 1113 */ 1114 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1115 enum amd_ip_block_type block_type) 1116 { 1117 int i, r; 1118 1119 for (i = 0; i < adev->num_ip_blocks; i++) { 1120 if (!adev->ip_blocks[i].status.valid) 1121 continue; 1122 if (adev->ip_blocks[i].version->type == block_type) { 1123 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1124 if (r) 1125 return r; 1126 break; 1127 } 1128 } 1129 return 0; 1130 1131 } 1132 1133 /** 1134 * amdgpu_device_ip_is_idle - is the hardware IP idle 1135 * 1136 * @adev: amdgpu_device pointer 1137 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1138 * 1139 * Check if the hardware IP is idle or not. 1140 * Returns true if it the IP is idle, false if not. 1141 */ 1142 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1143 enum amd_ip_block_type block_type) 1144 { 1145 int i; 1146 1147 for (i = 0; i < adev->num_ip_blocks; i++) { 1148 if (!adev->ip_blocks[i].status.valid) 1149 continue; 1150 if (adev->ip_blocks[i].version->type == block_type) 1151 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1152 } 1153 return true; 1154 1155 } 1156 1157 /** 1158 * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1159 * 1160 * @adev: amdgpu_device pointer 1161 * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1162 * 1163 * Returns a pointer to the hardware IP block structure 1164 * if it exists for the asic, otherwise NULL. 1165 */ 1166 struct amdgpu_ip_block * 1167 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1168 enum amd_ip_block_type type) 1169 { 1170 int i; 1171 1172 for (i = 0; i < adev->num_ip_blocks; i++) 1173 if (adev->ip_blocks[i].version->type == type) 1174 return &adev->ip_blocks[i]; 1175 1176 return NULL; 1177 } 1178 1179 /** 1180 * amdgpu_device_ip_block_version_cmp 1181 * 1182 * @adev: amdgpu_device pointer 1183 * @type: enum amd_ip_block_type 1184 * @major: major version 1185 * @minor: minor version 1186 * 1187 * return 0 if equal or greater 1188 * return 1 if smaller or the ip_block doesn't exist 1189 */ 1190 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1191 enum amd_ip_block_type type, 1192 u32 major, u32 minor) 1193 { 1194 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1195 1196 if (ip_block && ((ip_block->version->major > major) || 1197 ((ip_block->version->major == major) && 1198 (ip_block->version->minor >= minor)))) 1199 return 0; 1200 1201 return 1; 1202 } 1203 1204 /** 1205 * amdgpu_device_ip_block_add 1206 * 1207 * @adev: amdgpu_device pointer 1208 * @ip_block_version: pointer to the IP to add 1209 * 1210 * Adds the IP block driver information to the collection of IPs 1211 * on the asic. 1212 */ 1213 int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1214 const struct amdgpu_ip_block_version *ip_block_version) 1215 { 1216 if (!ip_block_version) 1217 return -EINVAL; 1218 1219 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1220 ip_block_version->funcs->name); 1221 1222 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1223 1224 return 0; 1225 } 1226 1227 /** 1228 * amdgpu_device_enable_virtual_display - enable virtual display feature 1229 * 1230 * @adev: amdgpu_device pointer 1231 * 1232 * Enabled the virtual display feature if the user has enabled it via 1233 * the module parameter virtual_display. This feature provides a virtual 1234 * display hardware on headless boards or in virtualized environments. 1235 * This function parses and validates the configuration string specified by 1236 * the user and configues the virtual display configuration (number of 1237 * virtual connectors, crtcs, etc.) specified. 1238 */ 1239 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1240 { 1241 adev->enable_virtual_display = false; 1242 1243 if (amdgpu_virtual_display) { 1244 struct drm_device *ddev = adev->ddev; 1245 const char *pci_address_name = pci_name(ddev->pdev); 1246 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 1247 1248 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 1249 pciaddstr_tmp = pciaddstr; 1250 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 1251 pciaddname = strsep(&pciaddname_tmp, ","); 1252 if (!strcmp("all", pciaddname) 1253 || !strcmp(pci_address_name, pciaddname)) { 1254 long num_crtc; 1255 int res = -1; 1256 1257 adev->enable_virtual_display = true; 1258 1259 if (pciaddname_tmp) 1260 res = kstrtol(pciaddname_tmp, 10, 1261 &num_crtc); 1262 1263 if (!res) { 1264 if (num_crtc < 1) 1265 num_crtc = 1; 1266 if (num_crtc > 6) 1267 num_crtc = 6; 1268 adev->mode_info.num_crtc = num_crtc; 1269 } else { 1270 adev->mode_info.num_crtc = 1; 1271 } 1272 break; 1273 } 1274 } 1275 1276 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 1277 amdgpu_virtual_display, pci_address_name, 1278 adev->enable_virtual_display, adev->mode_info.num_crtc); 1279 1280 kfree(pciaddstr); 1281 } 1282 } 1283 1284 /** 1285 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 1286 * 1287 * @adev: amdgpu_device pointer 1288 * 1289 * Parses the asic configuration parameters specified in the gpu info 1290 * firmware and makes them availale to the driver for use in configuring 1291 * the asic. 1292 * Returns 0 on success, -EINVAL on failure. 1293 */ 1294 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1295 { 1296 const char *chip_name; 1297 char fw_name[30]; 1298 int err; 1299 const struct gpu_info_firmware_header_v1_0 *hdr; 1300 1301 adev->firmware.gpu_info_fw = NULL; 1302 1303 switch (adev->asic_type) { 1304 case CHIP_TOPAZ: 1305 case CHIP_TONGA: 1306 case CHIP_FIJI: 1307 case CHIP_POLARIS10: 1308 case CHIP_POLARIS11: 1309 case CHIP_POLARIS12: 1310 case CHIP_VEGAM: 1311 case CHIP_CARRIZO: 1312 case CHIP_STONEY: 1313 #ifdef CONFIG_DRM_AMDGPU_SI 1314 case CHIP_VERDE: 1315 case CHIP_TAHITI: 1316 case CHIP_PITCAIRN: 1317 case CHIP_OLAND: 1318 case CHIP_HAINAN: 1319 #endif 1320 #ifdef CONFIG_DRM_AMDGPU_CIK 1321 case CHIP_BONAIRE: 1322 case CHIP_HAWAII: 1323 case CHIP_KAVERI: 1324 case CHIP_KABINI: 1325 case CHIP_MULLINS: 1326 #endif 1327 case CHIP_VEGA20: 1328 default: 1329 return 0; 1330 case CHIP_VEGA10: 1331 chip_name = "vega10"; 1332 break; 1333 case CHIP_VEGA12: 1334 chip_name = "vega12"; 1335 break; 1336 case CHIP_RAVEN: 1337 if (adev->rev_id >= 8) 1338 chip_name = "raven2"; 1339 else if (adev->pdev->device == 0x15d8) 1340 chip_name = "picasso"; 1341 else 1342 chip_name = "raven"; 1343 break; 1344 } 1345 1346 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 1347 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); 1348 if (err) { 1349 dev_err(adev->dev, 1350 "Failed to load gpu_info firmware \"%s\"\n", 1351 fw_name); 1352 goto out; 1353 } 1354 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); 1355 if (err) { 1356 dev_err(adev->dev, 1357 "Failed to validate gpu_info firmware \"%s\"\n", 1358 fw_name); 1359 goto out; 1360 } 1361 1362 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 1363 amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 1364 1365 switch (hdr->version_major) { 1366 case 1: 1367 { 1368 const struct gpu_info_firmware_v1_0 *gpu_info_fw = 1369 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 1370 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1371 1372 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 1373 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 1374 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 1375 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 1376 adev->gfx.config.max_texture_channel_caches = 1377 le32_to_cpu(gpu_info_fw->gc_num_tccs); 1378 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 1379 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 1380 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 1381 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 1382 adev->gfx.config.double_offchip_lds_buf = 1383 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 1384 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 1385 adev->gfx.cu_info.max_waves_per_simd = 1386 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 1387 adev->gfx.cu_info.max_scratch_slots_per_cu = 1388 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 1389 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 1390 break; 1391 } 1392 default: 1393 dev_err(adev->dev, 1394 "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 1395 err = -EINVAL; 1396 goto out; 1397 } 1398 out: 1399 return err; 1400 } 1401 1402 /** 1403 * amdgpu_device_ip_early_init - run early init for hardware IPs 1404 * 1405 * @adev: amdgpu_device pointer 1406 * 1407 * Early initialization pass for hardware IPs. The hardware IPs that make 1408 * up each asic are discovered each IP's early_init callback is run. This 1409 * is the first stage in initializing the asic. 1410 * Returns 0 on success, negative error code on failure. 1411 */ 1412 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 1413 { 1414 int i, r; 1415 1416 amdgpu_device_enable_virtual_display(adev); 1417 1418 switch (adev->asic_type) { 1419 case CHIP_TOPAZ: 1420 case CHIP_TONGA: 1421 case CHIP_FIJI: 1422 case CHIP_POLARIS10: 1423 case CHIP_POLARIS11: 1424 case CHIP_POLARIS12: 1425 case CHIP_VEGAM: 1426 case CHIP_CARRIZO: 1427 case CHIP_STONEY: 1428 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) 1429 adev->family = AMDGPU_FAMILY_CZ; 1430 else 1431 adev->family = AMDGPU_FAMILY_VI; 1432 1433 r = vi_set_ip_blocks(adev); 1434 if (r) 1435 return r; 1436 break; 1437 #ifdef CONFIG_DRM_AMDGPU_SI 1438 case CHIP_VERDE: 1439 case CHIP_TAHITI: 1440 case CHIP_PITCAIRN: 1441 case CHIP_OLAND: 1442 case CHIP_HAINAN: 1443 adev->family = AMDGPU_FAMILY_SI; 1444 r = si_set_ip_blocks(adev); 1445 if (r) 1446 return r; 1447 break; 1448 #endif 1449 #ifdef CONFIG_DRM_AMDGPU_CIK 1450 case CHIP_BONAIRE: 1451 case CHIP_HAWAII: 1452 case CHIP_KAVERI: 1453 case CHIP_KABINI: 1454 case CHIP_MULLINS: 1455 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII)) 1456 adev->family = AMDGPU_FAMILY_CI; 1457 else 1458 adev->family = AMDGPU_FAMILY_KV; 1459 1460 r = cik_set_ip_blocks(adev); 1461 if (r) 1462 return r; 1463 break; 1464 #endif 1465 case CHIP_VEGA10: 1466 case CHIP_VEGA12: 1467 case CHIP_VEGA20: 1468 case CHIP_RAVEN: 1469 if (adev->asic_type == CHIP_RAVEN) 1470 adev->family = AMDGPU_FAMILY_RV; 1471 else 1472 adev->family = AMDGPU_FAMILY_AI; 1473 1474 r = soc15_set_ip_blocks(adev); 1475 if (r) 1476 return r; 1477 break; 1478 default: 1479 /* FIXME: not supported yet */ 1480 return -EINVAL; 1481 } 1482 1483 r = amdgpu_device_parse_gpu_info_fw(adev); 1484 if (r) 1485 return r; 1486 1487 amdgpu_amdkfd_device_probe(adev); 1488 1489 if (amdgpu_sriov_vf(adev)) { 1490 r = amdgpu_virt_request_full_gpu(adev, true); 1491 if (r) 1492 return -EAGAIN; 1493 } 1494 1495 adev->powerplay.pp_feature = amdgpu_pp_feature_mask; 1496 if (amdgpu_sriov_vf(adev)) 1497 adev->powerplay.pp_feature &= ~PP_GFXOFF_MASK; 1498 1499 for (i = 0; i < adev->num_ip_blocks; i++) { 1500 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 1501 DRM_ERROR("disabled ip block: %d <%s>\n", 1502 i, adev->ip_blocks[i].version->funcs->name); 1503 adev->ip_blocks[i].status.valid = false; 1504 } else { 1505 if (adev->ip_blocks[i].version->funcs->early_init) { 1506 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 1507 if (r == -ENOENT) { 1508 adev->ip_blocks[i].status.valid = false; 1509 } else if (r) { 1510 DRM_ERROR("early_init of IP block <%s> failed %d\n", 1511 adev->ip_blocks[i].version->funcs->name, r); 1512 return r; 1513 } else { 1514 adev->ip_blocks[i].status.valid = true; 1515 } 1516 } else { 1517 adev->ip_blocks[i].status.valid = true; 1518 } 1519 } 1520 } 1521 1522 adev->cg_flags &= amdgpu_cg_mask; 1523 adev->pg_flags &= amdgpu_pg_mask; 1524 1525 return 0; 1526 } 1527 1528 /** 1529 * amdgpu_device_ip_init - run init for hardware IPs 1530 * 1531 * @adev: amdgpu_device pointer 1532 * 1533 * Main initialization pass for hardware IPs. The list of all the hardware 1534 * IPs that make up the asic is walked and the sw_init and hw_init callbacks 1535 * are run. sw_init initializes the software state associated with each IP 1536 * and hw_init initializes the hardware associated with each IP. 1537 * Returns 0 on success, negative error code on failure. 1538 */ 1539 static int amdgpu_device_ip_init(struct amdgpu_device *adev) 1540 { 1541 int i, r; 1542 1543 for (i = 0; i < adev->num_ip_blocks; i++) { 1544 if (!adev->ip_blocks[i].status.valid) 1545 continue; 1546 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 1547 if (r) { 1548 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 1549 adev->ip_blocks[i].version->funcs->name, r); 1550 return r; 1551 } 1552 adev->ip_blocks[i].status.sw = true; 1553 1554 /* need to do gmc hw init early so we can allocate gpu mem */ 1555 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1556 r = amdgpu_device_vram_scratch_init(adev); 1557 if (r) { 1558 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); 1559 return r; 1560 } 1561 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 1562 if (r) { 1563 DRM_ERROR("hw_init %d failed %d\n", i, r); 1564 return r; 1565 } 1566 r = amdgpu_device_wb_init(adev); 1567 if (r) { 1568 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 1569 return r; 1570 } 1571 adev->ip_blocks[i].status.hw = true; 1572 1573 /* right after GMC hw init, we create CSA */ 1574 if (amdgpu_sriov_vf(adev)) { 1575 r = amdgpu_allocate_static_csa(adev); 1576 if (r) { 1577 DRM_ERROR("allocate CSA failed %d\n", r); 1578 return r; 1579 } 1580 } 1581 } 1582 } 1583 1584 for (i = 0; i < adev->num_ip_blocks; i++) { 1585 if (!adev->ip_blocks[i].status.sw) 1586 continue; 1587 if (adev->ip_blocks[i].status.hw) 1588 continue; 1589 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 1590 if (r) { 1591 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1592 adev->ip_blocks[i].version->funcs->name, r); 1593 return r; 1594 } 1595 adev->ip_blocks[i].status.hw = true; 1596 } 1597 1598 amdgpu_xgmi_add_device(adev); 1599 amdgpu_amdkfd_device_init(adev); 1600 1601 if (amdgpu_sriov_vf(adev)) 1602 amdgpu_virt_release_full_gpu(adev, true); 1603 1604 return 0; 1605 } 1606 1607 /** 1608 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 1609 * 1610 * @adev: amdgpu_device pointer 1611 * 1612 * Writes a reset magic value to the gart pointer in VRAM. The driver calls 1613 * this function before a GPU reset. If the value is retained after a 1614 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 1615 */ 1616 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 1617 { 1618 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 1619 } 1620 1621 /** 1622 * amdgpu_device_check_vram_lost - check if vram is valid 1623 * 1624 * @adev: amdgpu_device pointer 1625 * 1626 * Checks the reset magic value written to the gart pointer in VRAM. 1627 * The driver calls this after a GPU reset to see if the contents of 1628 * VRAM is lost or now. 1629 * returns true if vram is lost, false if not. 1630 */ 1631 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 1632 { 1633 return !!memcmp(adev->gart.ptr, adev->reset_magic, 1634 AMDGPU_RESET_MAGIC_NUM); 1635 } 1636 1637 /** 1638 * amdgpu_device_set_cg_state - set clockgating for amdgpu device 1639 * 1640 * @adev: amdgpu_device pointer 1641 * 1642 * The list of all the hardware IPs that make up the asic is walked and the 1643 * set_clockgating_state callbacks are run. 1644 * Late initialization pass enabling clockgating for hardware IPs. 1645 * Fini or suspend, pass disabling clockgating for hardware IPs. 1646 * Returns 0 on success, negative error code on failure. 1647 */ 1648 1649 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 1650 enum amd_clockgating_state state) 1651 { 1652 int i, j, r; 1653 1654 if (amdgpu_emu_mode == 1) 1655 return 0; 1656 1657 for (j = 0; j < adev->num_ip_blocks; j++) { 1658 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 1659 if (!adev->ip_blocks[i].status.valid) 1660 continue; 1661 /* skip CG for VCE/UVD, it's handled specially */ 1662 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1663 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 1664 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 1665 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 1666 /* enable clockgating to save power */ 1667 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 1668 state); 1669 if (r) { 1670 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 1671 adev->ip_blocks[i].version->funcs->name, r); 1672 return r; 1673 } 1674 } 1675 } 1676 1677 return 0; 1678 } 1679 1680 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state) 1681 { 1682 int i, j, r; 1683 1684 if (amdgpu_emu_mode == 1) 1685 return 0; 1686 1687 for (j = 0; j < adev->num_ip_blocks; j++) { 1688 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 1689 if (!adev->ip_blocks[i].status.valid) 1690 continue; 1691 /* skip CG for VCE/UVD, it's handled specially */ 1692 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1693 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 1694 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 1695 adev->ip_blocks[i].version->funcs->set_powergating_state) { 1696 /* enable powergating to save power */ 1697 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 1698 state); 1699 if (r) { 1700 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 1701 adev->ip_blocks[i].version->funcs->name, r); 1702 return r; 1703 } 1704 } 1705 } 1706 return 0; 1707 } 1708 1709 /** 1710 * amdgpu_device_ip_late_init - run late init for hardware IPs 1711 * 1712 * @adev: amdgpu_device pointer 1713 * 1714 * Late initialization pass for hardware IPs. The list of all the hardware 1715 * IPs that make up the asic is walked and the late_init callbacks are run. 1716 * late_init covers any special initialization that an IP requires 1717 * after all of the have been initialized or something that needs to happen 1718 * late in the init process. 1719 * Returns 0 on success, negative error code on failure. 1720 */ 1721 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 1722 { 1723 int i = 0, r; 1724 1725 for (i = 0; i < adev->num_ip_blocks; i++) { 1726 if (!adev->ip_blocks[i].status.valid) 1727 continue; 1728 if (adev->ip_blocks[i].version->funcs->late_init) { 1729 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 1730 if (r) { 1731 DRM_ERROR("late_init of IP block <%s> failed %d\n", 1732 adev->ip_blocks[i].version->funcs->name, r); 1733 return r; 1734 } 1735 adev->ip_blocks[i].status.late_initialized = true; 1736 } 1737 } 1738 1739 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 1740 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 1741 1742 queue_delayed_work(system_wq, &adev->late_init_work, 1743 msecs_to_jiffies(AMDGPU_RESUME_MS)); 1744 1745 amdgpu_device_fill_reset_magic(adev); 1746 1747 return 0; 1748 } 1749 1750 /** 1751 * amdgpu_device_ip_fini - run fini for hardware IPs 1752 * 1753 * @adev: amdgpu_device pointer 1754 * 1755 * Main teardown pass for hardware IPs. The list of all the hardware 1756 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 1757 * are run. hw_fini tears down the hardware associated with each IP 1758 * and sw_fini tears down any software state associated with each IP. 1759 * Returns 0 on success, negative error code on failure. 1760 */ 1761 static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 1762 { 1763 int i, r; 1764 1765 amdgpu_amdkfd_device_fini(adev); 1766 1767 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 1768 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 1769 1770 /* need to disable SMC first */ 1771 for (i = 0; i < adev->num_ip_blocks; i++) { 1772 if (!adev->ip_blocks[i].status.hw) 1773 continue; 1774 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 1775 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 1776 /* XXX handle errors */ 1777 if (r) { 1778 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 1779 adev->ip_blocks[i].version->funcs->name, r); 1780 } 1781 adev->ip_blocks[i].status.hw = false; 1782 break; 1783 } 1784 } 1785 1786 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1787 if (!adev->ip_blocks[i].status.hw) 1788 continue; 1789 1790 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 1791 /* XXX handle errors */ 1792 if (r) { 1793 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 1794 adev->ip_blocks[i].version->funcs->name, r); 1795 } 1796 1797 adev->ip_blocks[i].status.hw = false; 1798 } 1799 1800 1801 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1802 if (!adev->ip_blocks[i].status.sw) 1803 continue; 1804 1805 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1806 amdgpu_free_static_csa(adev); 1807 amdgpu_device_wb_fini(adev); 1808 amdgpu_device_vram_scratch_fini(adev); 1809 } 1810 1811 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 1812 /* XXX handle errors */ 1813 if (r) { 1814 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 1815 adev->ip_blocks[i].version->funcs->name, r); 1816 } 1817 adev->ip_blocks[i].status.sw = false; 1818 adev->ip_blocks[i].status.valid = false; 1819 } 1820 1821 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1822 if (!adev->ip_blocks[i].status.late_initialized) 1823 continue; 1824 if (adev->ip_blocks[i].version->funcs->late_fini) 1825 adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 1826 adev->ip_blocks[i].status.late_initialized = false; 1827 } 1828 1829 if (amdgpu_sriov_vf(adev)) 1830 if (amdgpu_virt_release_full_gpu(adev, false)) 1831 DRM_ERROR("failed to release exclusive mode on fini\n"); 1832 1833 return 0; 1834 } 1835 1836 /** 1837 * amdgpu_device_ip_late_init_func_handler - work handler for ib test 1838 * 1839 * @work: work_struct. 1840 */ 1841 static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) 1842 { 1843 struct amdgpu_device *adev = 1844 container_of(work, struct amdgpu_device, late_init_work.work); 1845 int r; 1846 1847 r = amdgpu_ib_ring_tests(adev); 1848 if (r) 1849 DRM_ERROR("ib ring test failed (%d).\n", r); 1850 } 1851 1852 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 1853 { 1854 struct amdgpu_device *adev = 1855 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 1856 1857 mutex_lock(&adev->gfx.gfx_off_mutex); 1858 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { 1859 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 1860 adev->gfx.gfx_off_state = true; 1861 } 1862 mutex_unlock(&adev->gfx.gfx_off_mutex); 1863 } 1864 1865 /** 1866 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 1867 * 1868 * @adev: amdgpu_device pointer 1869 * 1870 * Main suspend function for hardware IPs. The list of all the hardware 1871 * IPs that make up the asic is walked, clockgating is disabled and the 1872 * suspend callbacks are run. suspend puts the hardware and software state 1873 * in each IP into a state suitable for suspend. 1874 * Returns 0 on success, negative error code on failure. 1875 */ 1876 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 1877 { 1878 int i, r; 1879 1880 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 1881 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 1882 1883 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1884 if (!adev->ip_blocks[i].status.valid) 1885 continue; 1886 /* displays are handled separately */ 1887 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { 1888 /* XXX handle errors */ 1889 r = adev->ip_blocks[i].version->funcs->suspend(adev); 1890 /* XXX handle errors */ 1891 if (r) { 1892 DRM_ERROR("suspend of IP block <%s> failed %d\n", 1893 adev->ip_blocks[i].version->funcs->name, r); 1894 } 1895 } 1896 } 1897 1898 return 0; 1899 } 1900 1901 /** 1902 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 1903 * 1904 * @adev: amdgpu_device pointer 1905 * 1906 * Main suspend function for hardware IPs. The list of all the hardware 1907 * IPs that make up the asic is walked, clockgating is disabled and the 1908 * suspend callbacks are run. suspend puts the hardware and software state 1909 * in each IP into a state suitable for suspend. 1910 * Returns 0 on success, negative error code on failure. 1911 */ 1912 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 1913 { 1914 int i, r; 1915 1916 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1917 if (!adev->ip_blocks[i].status.valid) 1918 continue; 1919 /* displays are handled in phase1 */ 1920 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 1921 continue; 1922 /* XXX handle errors */ 1923 r = adev->ip_blocks[i].version->funcs->suspend(adev); 1924 /* XXX handle errors */ 1925 if (r) { 1926 DRM_ERROR("suspend of IP block <%s> failed %d\n", 1927 adev->ip_blocks[i].version->funcs->name, r); 1928 } 1929 } 1930 1931 return 0; 1932 } 1933 1934 /** 1935 * amdgpu_device_ip_suspend - run suspend for hardware IPs 1936 * 1937 * @adev: amdgpu_device pointer 1938 * 1939 * Main suspend function for hardware IPs. The list of all the hardware 1940 * IPs that make up the asic is walked, clockgating is disabled and the 1941 * suspend callbacks are run. suspend puts the hardware and software state 1942 * in each IP into a state suitable for suspend. 1943 * Returns 0 on success, negative error code on failure. 1944 */ 1945 int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 1946 { 1947 int r; 1948 1949 if (amdgpu_sriov_vf(adev)) 1950 amdgpu_virt_request_full_gpu(adev, false); 1951 1952 r = amdgpu_device_ip_suspend_phase1(adev); 1953 if (r) 1954 return r; 1955 r = amdgpu_device_ip_suspend_phase2(adev); 1956 1957 if (amdgpu_sriov_vf(adev)) 1958 amdgpu_virt_release_full_gpu(adev, false); 1959 1960 return r; 1961 } 1962 1963 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 1964 { 1965 int i, r; 1966 1967 static enum amd_ip_block_type ip_order[] = { 1968 AMD_IP_BLOCK_TYPE_GMC, 1969 AMD_IP_BLOCK_TYPE_COMMON, 1970 AMD_IP_BLOCK_TYPE_PSP, 1971 AMD_IP_BLOCK_TYPE_IH, 1972 }; 1973 1974 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 1975 int j; 1976 struct amdgpu_ip_block *block; 1977 1978 for (j = 0; j < adev->num_ip_blocks; j++) { 1979 block = &adev->ip_blocks[j]; 1980 1981 if (block->version->type != ip_order[i] || 1982 !block->status.valid) 1983 continue; 1984 1985 r = block->version->funcs->hw_init(adev); 1986 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 1987 if (r) 1988 return r; 1989 } 1990 } 1991 1992 return 0; 1993 } 1994 1995 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 1996 { 1997 int i, r; 1998 1999 static enum amd_ip_block_type ip_order[] = { 2000 AMD_IP_BLOCK_TYPE_SMC, 2001 AMD_IP_BLOCK_TYPE_DCE, 2002 AMD_IP_BLOCK_TYPE_GFX, 2003 AMD_IP_BLOCK_TYPE_SDMA, 2004 AMD_IP_BLOCK_TYPE_UVD, 2005 AMD_IP_BLOCK_TYPE_VCE 2006 }; 2007 2008 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2009 int j; 2010 struct amdgpu_ip_block *block; 2011 2012 for (j = 0; j < adev->num_ip_blocks; j++) { 2013 block = &adev->ip_blocks[j]; 2014 2015 if (block->version->type != ip_order[i] || 2016 !block->status.valid) 2017 continue; 2018 2019 r = block->version->funcs->hw_init(adev); 2020 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2021 if (r) 2022 return r; 2023 } 2024 } 2025 2026 return 0; 2027 } 2028 2029 /** 2030 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 2031 * 2032 * @adev: amdgpu_device pointer 2033 * 2034 * First resume function for hardware IPs. The list of all the hardware 2035 * IPs that make up the asic is walked and the resume callbacks are run for 2036 * COMMON, GMC, and IH. resume puts the hardware into a functional state 2037 * after a suspend and updates the software state as necessary. This 2038 * function is also used for restoring the GPU after a GPU reset. 2039 * Returns 0 on success, negative error code on failure. 2040 */ 2041 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 2042 { 2043 int i, r; 2044 2045 for (i = 0; i < adev->num_ip_blocks; i++) { 2046 if (!adev->ip_blocks[i].status.valid) 2047 continue; 2048 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2049 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2050 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2051 r = adev->ip_blocks[i].version->funcs->resume(adev); 2052 if (r) { 2053 DRM_ERROR("resume of IP block <%s> failed %d\n", 2054 adev->ip_blocks[i].version->funcs->name, r); 2055 return r; 2056 } 2057 } 2058 } 2059 2060 return 0; 2061 } 2062 2063 /** 2064 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 2065 * 2066 * @adev: amdgpu_device pointer 2067 * 2068 * First resume function for hardware IPs. The list of all the hardware 2069 * IPs that make up the asic is walked and the resume callbacks are run for 2070 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 2071 * functional state after a suspend and updates the software state as 2072 * necessary. This function is also used for restoring the GPU after a GPU 2073 * reset. 2074 * Returns 0 on success, negative error code on failure. 2075 */ 2076 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 2077 { 2078 int i, r; 2079 2080 for (i = 0; i < adev->num_ip_blocks; i++) { 2081 if (!adev->ip_blocks[i].status.valid) 2082 continue; 2083 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2084 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2085 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) 2086 continue; 2087 r = adev->ip_blocks[i].version->funcs->resume(adev); 2088 if (r) { 2089 DRM_ERROR("resume of IP block <%s> failed %d\n", 2090 adev->ip_blocks[i].version->funcs->name, r); 2091 return r; 2092 } 2093 } 2094 2095 return 0; 2096 } 2097 2098 /** 2099 * amdgpu_device_ip_resume - run resume for hardware IPs 2100 * 2101 * @adev: amdgpu_device pointer 2102 * 2103 * Main resume function for hardware IPs. The hardware IPs 2104 * are split into two resume functions because they are 2105 * are also used in in recovering from a GPU reset and some additional 2106 * steps need to be take between them. In this case (S3/S4) they are 2107 * run sequentially. 2108 * Returns 0 on success, negative error code on failure. 2109 */ 2110 static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 2111 { 2112 int r; 2113 2114 r = amdgpu_device_ip_resume_phase1(adev); 2115 if (r) 2116 return r; 2117 r = amdgpu_device_ip_resume_phase2(adev); 2118 2119 return r; 2120 } 2121 2122 /** 2123 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 2124 * 2125 * @adev: amdgpu_device pointer 2126 * 2127 * Query the VBIOS data tables to determine if the board supports SR-IOV. 2128 */ 2129 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 2130 { 2131 if (amdgpu_sriov_vf(adev)) { 2132 if (adev->is_atom_fw) { 2133 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) 2134 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2135 } else { 2136 if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 2137 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2138 } 2139 2140 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 2141 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 2142 } 2143 } 2144 2145 /** 2146 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 2147 * 2148 * @asic_type: AMD asic type 2149 * 2150 * Check if there is DC (new modesetting infrastructre) support for an asic. 2151 * returns true if DC has support, false if not. 2152 */ 2153 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 2154 { 2155 switch (asic_type) { 2156 #if defined(CONFIG_DRM_AMD_DC) 2157 case CHIP_BONAIRE: 2158 case CHIP_KAVERI: 2159 case CHIP_KABINI: 2160 case CHIP_MULLINS: 2161 /* 2162 * We have systems in the wild with these ASICs that require 2163 * LVDS and VGA support which is not supported with DC. 2164 * 2165 * Fallback to the non-DC driver here by default so as not to 2166 * cause regressions. 2167 */ 2168 return amdgpu_dc > 0; 2169 case CHIP_HAWAII: 2170 case CHIP_CARRIZO: 2171 case CHIP_STONEY: 2172 case CHIP_POLARIS10: 2173 case CHIP_POLARIS11: 2174 case CHIP_POLARIS12: 2175 case CHIP_VEGAM: 2176 case CHIP_TONGA: 2177 case CHIP_FIJI: 2178 case CHIP_VEGA10: 2179 case CHIP_VEGA12: 2180 case CHIP_VEGA20: 2181 #if defined(CONFIG_DRM_AMD_DC_DCN1_0) 2182 case CHIP_RAVEN: 2183 #endif 2184 return amdgpu_dc != 0; 2185 #endif 2186 default: 2187 return false; 2188 } 2189 } 2190 2191 /** 2192 * amdgpu_device_has_dc_support - check if dc is supported 2193 * 2194 * @adev: amdgpu_device_pointer 2195 * 2196 * Returns true for supported, false for not supported 2197 */ 2198 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 2199 { 2200 if (amdgpu_sriov_vf(adev)) 2201 return false; 2202 2203 return amdgpu_device_asic_has_dc_support(adev->asic_type); 2204 } 2205 2206 /** 2207 * amdgpu_device_init - initialize the driver 2208 * 2209 * @adev: amdgpu_device pointer 2210 * @ddev: drm dev pointer 2211 * @pdev: pci dev pointer 2212 * @flags: driver flags 2213 * 2214 * Initializes the driver info and hw (all asics). 2215 * Returns 0 for success or an error on failure. 2216 * Called at driver startup. 2217 */ 2218 int amdgpu_device_init(struct amdgpu_device *adev, 2219 struct drm_device *ddev, 2220 struct pci_dev *pdev, 2221 uint32_t flags) 2222 { 2223 int r, i; 2224 bool runtime = false; 2225 u32 max_MBps; 2226 2227 adev->shutdown = false; 2228 adev->dev = &pdev->dev; 2229 adev->ddev = ddev; 2230 adev->pdev = pdev; 2231 adev->flags = flags; 2232 adev->asic_type = flags & AMD_ASIC_MASK; 2233 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 2234 if (amdgpu_emu_mode == 1) 2235 adev->usec_timeout *= 2; 2236 adev->gmc.gart_size = 512 * 1024 * 1024; 2237 adev->accel_working = false; 2238 adev->num_rings = 0; 2239 adev->mman.buffer_funcs = NULL; 2240 adev->mman.buffer_funcs_ring = NULL; 2241 adev->vm_manager.vm_pte_funcs = NULL; 2242 adev->vm_manager.vm_pte_num_rqs = 0; 2243 adev->gmc.gmc_funcs = NULL; 2244 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2245 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 2246 2247 adev->smc_rreg = &amdgpu_invalid_rreg; 2248 adev->smc_wreg = &amdgpu_invalid_wreg; 2249 adev->pcie_rreg = &amdgpu_invalid_rreg; 2250 adev->pcie_wreg = &amdgpu_invalid_wreg; 2251 adev->pciep_rreg = &amdgpu_invalid_rreg; 2252 adev->pciep_wreg = &amdgpu_invalid_wreg; 2253 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 2254 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 2255 adev->didt_rreg = &amdgpu_invalid_rreg; 2256 adev->didt_wreg = &amdgpu_invalid_wreg; 2257 adev->gc_cac_rreg = &amdgpu_invalid_rreg; 2258 adev->gc_cac_wreg = &amdgpu_invalid_wreg; 2259 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 2260 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 2261 2262 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 2263 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 2264 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 2265 2266 /* mutex initialization are all done here so we 2267 * can recall function without having locking issues */ 2268 atomic_set(&adev->irq.ih.lock, 0); 2269 mutex_init(&adev->firmware.mutex); 2270 mutex_init(&adev->pm.mutex); 2271 mutex_init(&adev->gfx.gpu_clock_mutex); 2272 mutex_init(&adev->srbm_mutex); 2273 mutex_init(&adev->gfx.pipe_reserve_mutex); 2274 mutex_init(&adev->gfx.gfx_off_mutex); 2275 mutex_init(&adev->grbm_idx_mutex); 2276 mutex_init(&adev->mn_lock); 2277 mutex_init(&adev->virt.vf_errors.lock); 2278 hash_init(adev->mn_hash); 2279 mutex_init(&adev->lock_reset); 2280 2281 amdgpu_device_check_arguments(adev); 2282 2283 spin_lock_init(&adev->mmio_idx_lock); 2284 spin_lock_init(&adev->smc_idx_lock); 2285 spin_lock_init(&adev->pcie_idx_lock); 2286 spin_lock_init(&adev->uvd_ctx_idx_lock); 2287 spin_lock_init(&adev->didt_idx_lock); 2288 spin_lock_init(&adev->gc_cac_idx_lock); 2289 spin_lock_init(&adev->se_cac_idx_lock); 2290 spin_lock_init(&adev->audio_endpt_idx_lock); 2291 spin_lock_init(&adev->mm_stats.lock); 2292 2293 INIT_LIST_HEAD(&adev->shadow_list); 2294 mutex_init(&adev->shadow_list_lock); 2295 2296 INIT_LIST_HEAD(&adev->ring_lru_list); 2297 spin_lock_init(&adev->ring_lru_list_lock); 2298 2299 INIT_DELAYED_WORK(&adev->late_init_work, 2300 amdgpu_device_ip_late_init_func_handler); 2301 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 2302 amdgpu_device_delay_enable_gfx_off); 2303 2304 adev->gfx.gfx_off_req_count = 1; 2305 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; 2306 2307 /* Registers mapping */ 2308 /* TODO: block userspace mapping of io register */ 2309 if (adev->asic_type >= CHIP_BONAIRE) { 2310 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 2311 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 2312 } else { 2313 adev->rmmio_base = pci_resource_start(adev->pdev, 2); 2314 adev->rmmio_size = pci_resource_len(adev->pdev, 2); 2315 } 2316 2317 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 2318 if (adev->rmmio == NULL) { 2319 return -ENOMEM; 2320 } 2321 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 2322 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); 2323 2324 /* doorbell bar mapping */ 2325 amdgpu_device_doorbell_init(adev); 2326 2327 /* io port mapping */ 2328 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 2329 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) { 2330 adev->rio_mem_size = pci_resource_len(adev->pdev, i); 2331 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size); 2332 break; 2333 } 2334 } 2335 if (adev->rio_mem == NULL) 2336 DRM_INFO("PCI I/O BAR is not found.\n"); 2337 2338 amdgpu_device_get_pcie_info(adev); 2339 2340 /* early init functions */ 2341 r = amdgpu_device_ip_early_init(adev); 2342 if (r) 2343 return r; 2344 2345 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 2346 /* this will fail for cards that aren't VGA class devices, just 2347 * ignore it */ 2348 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); 2349 2350 if (amdgpu_device_is_px(ddev)) 2351 runtime = true; 2352 if (!pci_is_thunderbolt_attached(adev->pdev)) 2353 vga_switcheroo_register_client(adev->pdev, 2354 &amdgpu_switcheroo_ops, runtime); 2355 if (runtime) 2356 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 2357 2358 if (amdgpu_emu_mode == 1) { 2359 /* post the asic on emulation mode */ 2360 emu_soc_asic_init(adev); 2361 goto fence_driver_init; 2362 } 2363 2364 /* Read BIOS */ 2365 if (!amdgpu_get_bios(adev)) { 2366 r = -EINVAL; 2367 goto failed; 2368 } 2369 2370 r = amdgpu_atombios_init(adev); 2371 if (r) { 2372 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 2373 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 2374 goto failed; 2375 } 2376 2377 /* detect if we are with an SRIOV vbios */ 2378 amdgpu_device_detect_sriov_bios(adev); 2379 2380 /* Post card if necessary */ 2381 if (amdgpu_device_need_post(adev)) { 2382 if (!adev->bios) { 2383 dev_err(adev->dev, "no vBIOS found\n"); 2384 r = -EINVAL; 2385 goto failed; 2386 } 2387 DRM_INFO("GPU posting now...\n"); 2388 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 2389 if (r) { 2390 dev_err(adev->dev, "gpu post error!\n"); 2391 goto failed; 2392 } 2393 } 2394 2395 if (adev->is_atom_fw) { 2396 /* Initialize clocks */ 2397 r = amdgpu_atomfirmware_get_clock_info(adev); 2398 if (r) { 2399 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 2400 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 2401 goto failed; 2402 } 2403 } else { 2404 /* Initialize clocks */ 2405 r = amdgpu_atombios_get_clock_info(adev); 2406 if (r) { 2407 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 2408 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 2409 goto failed; 2410 } 2411 /* init i2c buses */ 2412 if (!amdgpu_device_has_dc_support(adev)) 2413 amdgpu_atombios_i2c_init(adev); 2414 } 2415 2416 fence_driver_init: 2417 /* Fence driver */ 2418 r = amdgpu_fence_driver_init(adev); 2419 if (r) { 2420 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); 2421 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 2422 goto failed; 2423 } 2424 2425 /* init the mode config */ 2426 drm_mode_config_init(adev->ddev); 2427 2428 r = amdgpu_device_ip_init(adev); 2429 if (r) { 2430 /* failed in exclusive mode due to timeout */ 2431 if (amdgpu_sriov_vf(adev) && 2432 !amdgpu_sriov_runtime(adev) && 2433 amdgpu_virt_mmio_blocked(adev) && 2434 !amdgpu_virt_wait_reset(adev)) { 2435 dev_err(adev->dev, "VF exclusive mode timeout\n"); 2436 /* Don't send request since VF is inactive. */ 2437 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 2438 adev->virt.ops = NULL; 2439 r = -EAGAIN; 2440 goto failed; 2441 } 2442 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 2443 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 2444 goto failed; 2445 } 2446 2447 adev->accel_working = true; 2448 2449 amdgpu_vm_check_compute_bug(adev); 2450 2451 /* Initialize the buffer migration limit. */ 2452 if (amdgpu_moverate >= 0) 2453 max_MBps = amdgpu_moverate; 2454 else 2455 max_MBps = 8; /* Allow 8 MB/s. */ 2456 /* Get a log2 for easy divisions. */ 2457 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 2458 2459 r = amdgpu_ib_pool_init(adev); 2460 if (r) { 2461 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2462 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2463 goto failed; 2464 } 2465 2466 if (amdgpu_sriov_vf(adev)) 2467 amdgpu_virt_init_data_exchange(adev); 2468 2469 amdgpu_fbdev_init(adev); 2470 2471 r = amdgpu_pm_sysfs_init(adev); 2472 if (r) 2473 DRM_ERROR("registering pm debugfs failed (%d).\n", r); 2474 2475 r = amdgpu_debugfs_gem_init(adev); 2476 if (r) 2477 DRM_ERROR("registering gem debugfs failed (%d).\n", r); 2478 2479 r = amdgpu_debugfs_regs_init(adev); 2480 if (r) 2481 DRM_ERROR("registering register debugfs failed (%d).\n", r); 2482 2483 r = amdgpu_debugfs_firmware_init(adev); 2484 if (r) 2485 DRM_ERROR("registering firmware debugfs failed (%d).\n", r); 2486 2487 r = amdgpu_debugfs_init(adev); 2488 if (r) 2489 DRM_ERROR("Creating debugfs files failed (%d).\n", r); 2490 2491 if ((amdgpu_testing & 1)) { 2492 if (adev->accel_working) 2493 amdgpu_test_moves(adev); 2494 else 2495 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); 2496 } 2497 if (amdgpu_benchmarking) { 2498 if (adev->accel_working) 2499 amdgpu_benchmark(adev, amdgpu_benchmarking); 2500 else 2501 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); 2502 } 2503 2504 /* enable clockgating, etc. after ib tests, etc. since some blocks require 2505 * explicit gating rather than handling it automatically. 2506 */ 2507 r = amdgpu_device_ip_late_init(adev); 2508 if (r) { 2509 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 2510 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 2511 goto failed; 2512 } 2513 2514 return 0; 2515 2516 failed: 2517 amdgpu_vf_error_trans_all(adev); 2518 if (runtime) 2519 vga_switcheroo_fini_domain_pm_ops(adev->dev); 2520 2521 return r; 2522 } 2523 2524 /** 2525 * amdgpu_device_fini - tear down the driver 2526 * 2527 * @adev: amdgpu_device pointer 2528 * 2529 * Tear down the driver info (all asics). 2530 * Called at driver shutdown. 2531 */ 2532 void amdgpu_device_fini(struct amdgpu_device *adev) 2533 { 2534 int r; 2535 2536 DRM_INFO("amdgpu: finishing device.\n"); 2537 adev->shutdown = true; 2538 /* disable all interrupts */ 2539 amdgpu_irq_disable_all(adev); 2540 if (adev->mode_info.mode_config_initialized){ 2541 if (!amdgpu_device_has_dc_support(adev)) 2542 drm_crtc_force_disable_all(adev->ddev); 2543 else 2544 drm_atomic_helper_shutdown(adev->ddev); 2545 } 2546 amdgpu_ib_pool_fini(adev); 2547 amdgpu_fence_driver_fini(adev); 2548 amdgpu_pm_sysfs_fini(adev); 2549 amdgpu_fbdev_fini(adev); 2550 r = amdgpu_device_ip_fini(adev); 2551 if (adev->firmware.gpu_info_fw) { 2552 release_firmware(adev->firmware.gpu_info_fw); 2553 adev->firmware.gpu_info_fw = NULL; 2554 } 2555 adev->accel_working = false; 2556 cancel_delayed_work_sync(&adev->late_init_work); 2557 /* free i2c buses */ 2558 if (!amdgpu_device_has_dc_support(adev)) 2559 amdgpu_i2c_fini(adev); 2560 2561 if (amdgpu_emu_mode != 1) 2562 amdgpu_atombios_fini(adev); 2563 2564 kfree(adev->bios); 2565 adev->bios = NULL; 2566 if (!pci_is_thunderbolt_attached(adev->pdev)) 2567 vga_switcheroo_unregister_client(adev->pdev); 2568 if (adev->flags & AMD_IS_PX) 2569 vga_switcheroo_fini_domain_pm_ops(adev->dev); 2570 vga_client_register(adev->pdev, NULL, NULL, NULL); 2571 if (adev->rio_mem) 2572 pci_iounmap(adev->pdev, adev->rio_mem); 2573 adev->rio_mem = NULL; 2574 iounmap(adev->rmmio); 2575 adev->rmmio = NULL; 2576 amdgpu_device_doorbell_fini(adev); 2577 amdgpu_debugfs_regs_cleanup(adev); 2578 } 2579 2580 2581 /* 2582 * Suspend & resume. 2583 */ 2584 /** 2585 * amdgpu_device_suspend - initiate device suspend 2586 * 2587 * @dev: drm dev pointer 2588 * @suspend: suspend state 2589 * @fbcon : notify the fbdev of suspend 2590 * 2591 * Puts the hw in the suspend state (all asics). 2592 * Returns 0 for success or an error on failure. 2593 * Called at driver suspend. 2594 */ 2595 int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) 2596 { 2597 struct amdgpu_device *adev; 2598 struct drm_crtc *crtc; 2599 struct drm_connector *connector; 2600 int r; 2601 2602 if (dev == NULL || dev->dev_private == NULL) { 2603 return -ENODEV; 2604 } 2605 2606 adev = dev->dev_private; 2607 2608 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 2609 return 0; 2610 2611 drm_kms_helper_poll_disable(dev); 2612 2613 if (fbcon) 2614 amdgpu_fbdev_set_suspend(adev, 1); 2615 2616 cancel_delayed_work_sync(&adev->late_init_work); 2617 2618 if (!amdgpu_device_has_dc_support(adev)) { 2619 /* turn off display hw */ 2620 drm_modeset_lock_all(dev); 2621 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 2622 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); 2623 } 2624 drm_modeset_unlock_all(dev); 2625 /* unpin the front buffers and cursors */ 2626 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 2627 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2628 struct drm_framebuffer *fb = crtc->primary->fb; 2629 struct amdgpu_bo *robj; 2630 2631 if (amdgpu_crtc->cursor_bo) { 2632 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2633 r = amdgpu_bo_reserve(aobj, true); 2634 if (r == 0) { 2635 amdgpu_bo_unpin(aobj); 2636 amdgpu_bo_unreserve(aobj); 2637 } 2638 } 2639 2640 if (fb == NULL || fb->obj[0] == NULL) { 2641 continue; 2642 } 2643 robj = gem_to_amdgpu_bo(fb->obj[0]); 2644 /* don't unpin kernel fb objects */ 2645 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { 2646 r = amdgpu_bo_reserve(robj, true); 2647 if (r == 0) { 2648 amdgpu_bo_unpin(robj); 2649 amdgpu_bo_unreserve(robj); 2650 } 2651 } 2652 } 2653 } 2654 2655 amdgpu_amdkfd_suspend(adev); 2656 2657 r = amdgpu_device_ip_suspend_phase1(adev); 2658 2659 /* evict vram memory */ 2660 amdgpu_bo_evict_vram(adev); 2661 2662 amdgpu_fence_driver_suspend(adev); 2663 2664 r = amdgpu_device_ip_suspend_phase2(adev); 2665 2666 /* evict remaining vram memory 2667 * This second call to evict vram is to evict the gart page table 2668 * using the CPU. 2669 */ 2670 amdgpu_bo_evict_vram(adev); 2671 2672 pci_save_state(dev->pdev); 2673 if (suspend) { 2674 /* Shut down the device */ 2675 pci_disable_device(dev->pdev); 2676 pci_set_power_state(dev->pdev, PCI_D3hot); 2677 } else { 2678 r = amdgpu_asic_reset(adev); 2679 if (r) 2680 DRM_ERROR("amdgpu asic reset failed\n"); 2681 } 2682 2683 return 0; 2684 } 2685 2686 /** 2687 * amdgpu_device_resume - initiate device resume 2688 * 2689 * @dev: drm dev pointer 2690 * @resume: resume state 2691 * @fbcon : notify the fbdev of resume 2692 * 2693 * Bring the hw back to operating state (all asics). 2694 * Returns 0 for success or an error on failure. 2695 * Called at driver resume. 2696 */ 2697 int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) 2698 { 2699 struct drm_connector *connector; 2700 struct amdgpu_device *adev = dev->dev_private; 2701 struct drm_crtc *crtc; 2702 int r = 0; 2703 2704 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 2705 return 0; 2706 2707 if (resume) { 2708 pci_set_power_state(dev->pdev, PCI_D0); 2709 pci_restore_state(dev->pdev); 2710 r = pci_enable_device(dev->pdev); 2711 if (r) 2712 return r; 2713 } 2714 2715 /* post card */ 2716 if (amdgpu_device_need_post(adev)) { 2717 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 2718 if (r) 2719 DRM_ERROR("amdgpu asic init failed\n"); 2720 } 2721 2722 r = amdgpu_device_ip_resume(adev); 2723 if (r) { 2724 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r); 2725 return r; 2726 } 2727 amdgpu_fence_driver_resume(adev); 2728 2729 2730 r = amdgpu_device_ip_late_init(adev); 2731 if (r) 2732 return r; 2733 2734 if (!amdgpu_device_has_dc_support(adev)) { 2735 /* pin cursors */ 2736 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 2737 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2738 2739 if (amdgpu_crtc->cursor_bo) { 2740 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2741 r = amdgpu_bo_reserve(aobj, true); 2742 if (r == 0) { 2743 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 2744 if (r != 0) 2745 DRM_ERROR("Failed to pin cursor BO (%d)\n", r); 2746 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); 2747 amdgpu_bo_unreserve(aobj); 2748 } 2749 } 2750 } 2751 } 2752 r = amdgpu_amdkfd_resume(adev); 2753 if (r) 2754 return r; 2755 2756 /* Make sure IB tests flushed */ 2757 flush_delayed_work(&adev->late_init_work); 2758 2759 /* blat the mode back in */ 2760 if (fbcon) { 2761 if (!amdgpu_device_has_dc_support(adev)) { 2762 /* pre DCE11 */ 2763 drm_helper_resume_force_mode(dev); 2764 2765 /* turn on display hw */ 2766 drm_modeset_lock_all(dev); 2767 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 2768 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); 2769 } 2770 drm_modeset_unlock_all(dev); 2771 } 2772 amdgpu_fbdev_set_suspend(adev, 0); 2773 } 2774 2775 drm_kms_helper_poll_enable(dev); 2776 2777 /* 2778 * Most of the connector probing functions try to acquire runtime pm 2779 * refs to ensure that the GPU is powered on when connector polling is 2780 * performed. Since we're calling this from a runtime PM callback, 2781 * trying to acquire rpm refs will cause us to deadlock. 2782 * 2783 * Since we're guaranteed to be holding the rpm lock, it's safe to 2784 * temporarily disable the rpm helpers so this doesn't deadlock us. 2785 */ 2786 #ifdef CONFIG_PM 2787 dev->dev->power.disable_depth++; 2788 #endif 2789 if (!amdgpu_device_has_dc_support(adev)) 2790 drm_helper_hpd_irq_event(dev); 2791 else 2792 drm_kms_helper_hotplug_event(dev); 2793 #ifdef CONFIG_PM 2794 dev->dev->power.disable_depth--; 2795 #endif 2796 return 0; 2797 } 2798 2799 /** 2800 * amdgpu_device_ip_check_soft_reset - did soft reset succeed 2801 * 2802 * @adev: amdgpu_device pointer 2803 * 2804 * The list of all the hardware IPs that make up the asic is walked and 2805 * the check_soft_reset callbacks are run. check_soft_reset determines 2806 * if the asic is still hung or not. 2807 * Returns true if any of the IPs are still in a hung state, false if not. 2808 */ 2809 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 2810 { 2811 int i; 2812 bool asic_hang = false; 2813 2814 if (amdgpu_sriov_vf(adev)) 2815 return true; 2816 2817 if (amdgpu_asic_need_full_reset(adev)) 2818 return true; 2819 2820 for (i = 0; i < adev->num_ip_blocks; i++) { 2821 if (!adev->ip_blocks[i].status.valid) 2822 continue; 2823 if (adev->ip_blocks[i].version->funcs->check_soft_reset) 2824 adev->ip_blocks[i].status.hang = 2825 adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 2826 if (adev->ip_blocks[i].status.hang) { 2827 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 2828 asic_hang = true; 2829 } 2830 } 2831 return asic_hang; 2832 } 2833 2834 /** 2835 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 2836 * 2837 * @adev: amdgpu_device pointer 2838 * 2839 * The list of all the hardware IPs that make up the asic is walked and the 2840 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 2841 * handles any IP specific hardware or software state changes that are 2842 * necessary for a soft reset to succeed. 2843 * Returns 0 on success, negative error code on failure. 2844 */ 2845 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 2846 { 2847 int i, r = 0; 2848 2849 for (i = 0; i < adev->num_ip_blocks; i++) { 2850 if (!adev->ip_blocks[i].status.valid) 2851 continue; 2852 if (adev->ip_blocks[i].status.hang && 2853 adev->ip_blocks[i].version->funcs->pre_soft_reset) { 2854 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 2855 if (r) 2856 return r; 2857 } 2858 } 2859 2860 return 0; 2861 } 2862 2863 /** 2864 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 2865 * 2866 * @adev: amdgpu_device pointer 2867 * 2868 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 2869 * reset is necessary to recover. 2870 * Returns true if a full asic reset is required, false if not. 2871 */ 2872 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 2873 { 2874 int i; 2875 2876 if (amdgpu_asic_need_full_reset(adev)) 2877 return true; 2878 2879 for (i = 0; i < adev->num_ip_blocks; i++) { 2880 if (!adev->ip_blocks[i].status.valid) 2881 continue; 2882 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 2883 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 2884 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 2885 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 2886 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 2887 if (adev->ip_blocks[i].status.hang) { 2888 DRM_INFO("Some block need full reset!\n"); 2889 return true; 2890 } 2891 } 2892 } 2893 return false; 2894 } 2895 2896 /** 2897 * amdgpu_device_ip_soft_reset - do a soft reset 2898 * 2899 * @adev: amdgpu_device pointer 2900 * 2901 * The list of all the hardware IPs that make up the asic is walked and the 2902 * soft_reset callbacks are run if the block is hung. soft_reset handles any 2903 * IP specific hardware or software state changes that are necessary to soft 2904 * reset the IP. 2905 * Returns 0 on success, negative error code on failure. 2906 */ 2907 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 2908 { 2909 int i, r = 0; 2910 2911 for (i = 0; i < adev->num_ip_blocks; i++) { 2912 if (!adev->ip_blocks[i].status.valid) 2913 continue; 2914 if (adev->ip_blocks[i].status.hang && 2915 adev->ip_blocks[i].version->funcs->soft_reset) { 2916 r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 2917 if (r) 2918 return r; 2919 } 2920 } 2921 2922 return 0; 2923 } 2924 2925 /** 2926 * amdgpu_device_ip_post_soft_reset - clean up from soft reset 2927 * 2928 * @adev: amdgpu_device pointer 2929 * 2930 * The list of all the hardware IPs that make up the asic is walked and the 2931 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 2932 * handles any IP specific hardware or software state changes that are 2933 * necessary after the IP has been soft reset. 2934 * Returns 0 on success, negative error code on failure. 2935 */ 2936 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 2937 { 2938 int i, r = 0; 2939 2940 for (i = 0; i < adev->num_ip_blocks; i++) { 2941 if (!adev->ip_blocks[i].status.valid) 2942 continue; 2943 if (adev->ip_blocks[i].status.hang && 2944 adev->ip_blocks[i].version->funcs->post_soft_reset) 2945 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 2946 if (r) 2947 return r; 2948 } 2949 2950 return 0; 2951 } 2952 2953 /** 2954 * amdgpu_device_recover_vram - Recover some VRAM contents 2955 * 2956 * @adev: amdgpu_device pointer 2957 * 2958 * Restores the contents of VRAM buffers from the shadows in GTT. Used to 2959 * restore things like GPUVM page tables after a GPU reset where 2960 * the contents of VRAM might be lost. 2961 * 2962 * Returns: 2963 * 0 on success, negative error code on failure. 2964 */ 2965 static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 2966 { 2967 struct dma_fence *fence = NULL, *next = NULL; 2968 struct amdgpu_bo *shadow; 2969 long r = 1, tmo; 2970 2971 if (amdgpu_sriov_runtime(adev)) 2972 tmo = msecs_to_jiffies(8000); 2973 else 2974 tmo = msecs_to_jiffies(100); 2975 2976 DRM_INFO("recover vram bo from shadow start\n"); 2977 mutex_lock(&adev->shadow_list_lock); 2978 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) { 2979 2980 /* No need to recover an evicted BO */ 2981 if (shadow->tbo.mem.mem_type != TTM_PL_TT || 2982 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) 2983 continue; 2984 2985 r = amdgpu_bo_restore_shadow(shadow, &next); 2986 if (r) 2987 break; 2988 2989 if (fence) { 2990 r = dma_fence_wait_timeout(fence, false, tmo); 2991 dma_fence_put(fence); 2992 fence = next; 2993 if (r <= 0) 2994 break; 2995 } else { 2996 fence = next; 2997 } 2998 } 2999 mutex_unlock(&adev->shadow_list_lock); 3000 3001 if (fence) 3002 tmo = dma_fence_wait_timeout(fence, false, tmo); 3003 dma_fence_put(fence); 3004 3005 if (r <= 0 || tmo <= 0) { 3006 DRM_ERROR("recover vram bo from shadow failed\n"); 3007 return -EIO; 3008 } 3009 3010 DRM_INFO("recover vram bo from shadow done\n"); 3011 return 0; 3012 } 3013 3014 /** 3015 * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough 3016 * 3017 * @adev: amdgpu device pointer 3018 * 3019 * attempt to do soft-reset or full-reset and reinitialize Asic 3020 * return 0 means succeeded otherwise failed 3021 */ 3022 static int amdgpu_device_reset(struct amdgpu_device *adev) 3023 { 3024 bool need_full_reset, vram_lost = 0; 3025 int r; 3026 3027 need_full_reset = amdgpu_device_ip_need_full_reset(adev); 3028 3029 if (!need_full_reset) { 3030 amdgpu_device_ip_pre_soft_reset(adev); 3031 r = amdgpu_device_ip_soft_reset(adev); 3032 amdgpu_device_ip_post_soft_reset(adev); 3033 if (r || amdgpu_device_ip_check_soft_reset(adev)) { 3034 DRM_INFO("soft reset failed, will fallback to full reset!\n"); 3035 need_full_reset = true; 3036 } 3037 } 3038 3039 if (need_full_reset) { 3040 r = amdgpu_device_ip_suspend(adev); 3041 3042 retry: 3043 r = amdgpu_asic_reset(adev); 3044 /* post card */ 3045 amdgpu_atom_asic_init(adev->mode_info.atom_context); 3046 3047 if (!r) { 3048 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); 3049 r = amdgpu_device_ip_resume_phase1(adev); 3050 if (r) 3051 goto out; 3052 3053 vram_lost = amdgpu_device_check_vram_lost(adev); 3054 if (vram_lost) { 3055 DRM_ERROR("VRAM is lost!\n"); 3056 atomic_inc(&adev->vram_lost_counter); 3057 } 3058 3059 r = amdgpu_gtt_mgr_recover( 3060 &adev->mman.bdev.man[TTM_PL_TT]); 3061 if (r) 3062 goto out; 3063 3064 r = amdgpu_device_ip_resume_phase2(adev); 3065 if (r) 3066 goto out; 3067 3068 if (vram_lost) 3069 amdgpu_device_fill_reset_magic(adev); 3070 } 3071 } 3072 3073 out: 3074 if (!r) { 3075 amdgpu_irq_gpu_reset_resume_helper(adev); 3076 r = amdgpu_ib_ring_tests(adev); 3077 if (r) { 3078 dev_err(adev->dev, "ib ring test failed (%d).\n", r); 3079 r = amdgpu_device_ip_suspend(adev); 3080 need_full_reset = true; 3081 goto retry; 3082 } 3083 } 3084 3085 if (!r) 3086 r = amdgpu_device_recover_vram(adev); 3087 3088 return r; 3089 } 3090 3091 /** 3092 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 3093 * 3094 * @adev: amdgpu device pointer 3095 * @from_hypervisor: request from hypervisor 3096 * 3097 * do VF FLR and reinitialize Asic 3098 * return 0 means succeeded otherwise failed 3099 */ 3100 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 3101 bool from_hypervisor) 3102 { 3103 int r; 3104 3105 if (from_hypervisor) 3106 r = amdgpu_virt_request_full_gpu(adev, true); 3107 else 3108 r = amdgpu_virt_reset_gpu(adev); 3109 if (r) 3110 return r; 3111 3112 /* Resume IP prior to SMC */ 3113 r = amdgpu_device_ip_reinit_early_sriov(adev); 3114 if (r) 3115 goto error; 3116 3117 /* we need recover gart prior to run SMC/CP/SDMA resume */ 3118 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); 3119 3120 /* now we are okay to resume SMC/CP/SDMA */ 3121 r = amdgpu_device_ip_reinit_late_sriov(adev); 3122 if (r) 3123 goto error; 3124 3125 amdgpu_irq_gpu_reset_resume_helper(adev); 3126 r = amdgpu_ib_ring_tests(adev); 3127 3128 error: 3129 amdgpu_virt_release_full_gpu(adev, true); 3130 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 3131 atomic_inc(&adev->vram_lost_counter); 3132 r = amdgpu_device_recover_vram(adev); 3133 } 3134 3135 return r; 3136 } 3137 3138 /** 3139 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 3140 * 3141 * @adev: amdgpu device pointer 3142 * 3143 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 3144 * a hung GPU. 3145 */ 3146 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 3147 { 3148 if (!amdgpu_device_ip_check_soft_reset(adev)) { 3149 DRM_INFO("Timeout, but no hardware hang detected.\n"); 3150 return false; 3151 } 3152 3153 if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 && 3154 !amdgpu_sriov_vf(adev))) { 3155 DRM_INFO("GPU recovery disabled.\n"); 3156 return false; 3157 } 3158 3159 return true; 3160 } 3161 3162 /** 3163 * amdgpu_device_gpu_recover - reset the asic and recover scheduler 3164 * 3165 * @adev: amdgpu device pointer 3166 * @job: which job trigger hang 3167 * 3168 * Attempt to reset the GPU if it has hung (all asics). 3169 * Returns 0 for success or an error on failure. 3170 */ 3171 int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 3172 struct amdgpu_job *job) 3173 { 3174 int i, r, resched; 3175 3176 dev_info(adev->dev, "GPU reset begin!\n"); 3177 3178 mutex_lock(&adev->lock_reset); 3179 atomic_inc(&adev->gpu_reset_counter); 3180 adev->in_gpu_reset = 1; 3181 3182 /* Block kfd */ 3183 amdgpu_amdkfd_pre_reset(adev); 3184 3185 /* block TTM */ 3186 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); 3187 3188 /* block all schedulers and reset given job's ring */ 3189 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3190 struct amdgpu_ring *ring = adev->rings[i]; 3191 3192 if (!ring || !ring->sched.thread) 3193 continue; 3194 3195 kthread_park(ring->sched.thread); 3196 3197 if (job && job->base.sched == &ring->sched) 3198 continue; 3199 3200 drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); 3201 3202 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 3203 amdgpu_fence_driver_force_completion(ring); 3204 } 3205 3206 if (amdgpu_sriov_vf(adev)) 3207 r = amdgpu_device_reset_sriov(adev, job ? false : true); 3208 else 3209 r = amdgpu_device_reset(adev); 3210 3211 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3212 struct amdgpu_ring *ring = adev->rings[i]; 3213 3214 if (!ring || !ring->sched.thread) 3215 continue; 3216 3217 /* only need recovery sched of the given job's ring 3218 * or all rings (in the case @job is NULL) 3219 * after above amdgpu_reset accomplished 3220 */ 3221 if ((!job || job->base.sched == &ring->sched) && !r) 3222 drm_sched_job_recovery(&ring->sched); 3223 3224 kthread_unpark(ring->sched.thread); 3225 } 3226 3227 if (!amdgpu_device_has_dc_support(adev)) { 3228 drm_helper_resume_force_mode(adev->ddev); 3229 } 3230 3231 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); 3232 3233 if (r) { 3234 /* bad news, how to tell it to userspace ? */ 3235 dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); 3236 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 3237 } else { 3238 dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter)); 3239 } 3240 3241 /*unlock kfd */ 3242 amdgpu_amdkfd_post_reset(adev); 3243 amdgpu_vf_error_trans_all(adev); 3244 adev->in_gpu_reset = 0; 3245 mutex_unlock(&adev->lock_reset); 3246 return r; 3247 } 3248 3249 /** 3250 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 3251 * 3252 * @adev: amdgpu_device pointer 3253 * 3254 * Fetchs and stores in the driver the PCIE capabilities (gen speed 3255 * and lanes) of the slot the device is in. Handles APUs and 3256 * virtualized environments where PCIE config space may not be available. 3257 */ 3258 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 3259 { 3260 struct pci_dev *pdev; 3261 enum pci_bus_speed speed_cap; 3262 enum pcie_link_width link_width; 3263 3264 if (amdgpu_pcie_gen_cap) 3265 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 3266 3267 if (amdgpu_pcie_lane_cap) 3268 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 3269 3270 /* covers APUs as well */ 3271 if (pci_is_root_bus(adev->pdev->bus)) { 3272 if (adev->pm.pcie_gen_mask == 0) 3273 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 3274 if (adev->pm.pcie_mlw_mask == 0) 3275 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 3276 return; 3277 } 3278 3279 if (adev->pm.pcie_gen_mask == 0) { 3280 /* asic caps */ 3281 pdev = adev->pdev; 3282 speed_cap = pcie_get_speed_cap(pdev); 3283 if (speed_cap == PCI_SPEED_UNKNOWN) { 3284 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3285 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3286 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 3287 } else { 3288 if (speed_cap == PCIE_SPEED_16_0GT) 3289 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3290 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3291 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 3292 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 3293 else if (speed_cap == PCIE_SPEED_8_0GT) 3294 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3295 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3296 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 3297 else if (speed_cap == PCIE_SPEED_5_0GT) 3298 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3299 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 3300 else 3301 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 3302 } 3303 /* platform caps */ 3304 pdev = adev->ddev->pdev->bus->self; 3305 speed_cap = pcie_get_speed_cap(pdev); 3306 if (speed_cap == PCI_SPEED_UNKNOWN) { 3307 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3308 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 3309 } else { 3310 if (speed_cap == PCIE_SPEED_16_0GT) 3311 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3312 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3313 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 3314 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 3315 else if (speed_cap == PCIE_SPEED_8_0GT) 3316 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3317 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3318 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 3319 else if (speed_cap == PCIE_SPEED_5_0GT) 3320 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3321 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 3322 else 3323 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 3324 3325 } 3326 } 3327 if (adev->pm.pcie_mlw_mask == 0) { 3328 pdev = adev->ddev->pdev->bus->self; 3329 link_width = pcie_get_width_cap(pdev); 3330 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { 3331 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 3332 } else { 3333 switch (link_width) { 3334 case PCIE_LNK_X32: 3335 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 3336 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 3337 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3338 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3339 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3340 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3341 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3342 break; 3343 case PCIE_LNK_X16: 3344 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 3345 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3346 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3347 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3348 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3349 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3350 break; 3351 case PCIE_LNK_X12: 3352 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3353 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3354 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3355 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3356 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3357 break; 3358 case PCIE_LNK_X8: 3359 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3360 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3361 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3362 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3363 break; 3364 case PCIE_LNK_X4: 3365 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3366 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3367 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3368 break; 3369 case PCIE_LNK_X2: 3370 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3371 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3372 break; 3373 case PCIE_LNK_X1: 3374 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 3375 break; 3376 default: 3377 break; 3378 } 3379 } 3380 } 3381 } 3382 3383