1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/power_supply.h> 29 #include <linux/kthread.h> 30 #include <linux/console.h> 31 #include <linux/slab.h> 32 #include <drm/drmP.h> 33 #include <drm/drm_atomic_helper.h> 34 #include <drm/drm_probe_helper.h> 35 #include <drm/amdgpu_drm.h> 36 #include <linux/vgaarb.h> 37 #include <linux/vga_switcheroo.h> 38 #include <linux/efi.h> 39 #include "amdgpu.h" 40 #include "amdgpu_trace.h" 41 #include "amdgpu_i2c.h" 42 #include "atom.h" 43 #include "amdgpu_atombios.h" 44 #include "amdgpu_atomfirmware.h" 45 #include "amd_pcie.h" 46 #ifdef CONFIG_DRM_AMDGPU_SI 47 #include "si.h" 48 #endif 49 #ifdef CONFIG_DRM_AMDGPU_CIK 50 #include "cik.h" 51 #endif 52 #include "vi.h" 53 #include "soc15.h" 54 #include "bif/bif_4_1_d.h" 55 #include <linux/pci.h> 56 #include <linux/firmware.h> 57 #include "amdgpu_vf_error.h" 58 59 #include "amdgpu_amdkfd.h" 60 #include "amdgpu_pm.h" 61 62 #include "amdgpu_xgmi.h" 63 #include "amdgpu_ras.h" 64 #include "amdgpu_pmu.h" 65 66 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 67 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 68 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 69 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 70 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 71 72 #define AMDGPU_RESUME_MS 2000 73 74 static const char *amdgpu_asic_name[] = { 75 "TAHITI", 76 "PITCAIRN", 77 "VERDE", 78 "OLAND", 79 "HAINAN", 80 "BONAIRE", 81 "KAVERI", 82 "KABINI", 83 "HAWAII", 84 "MULLINS", 85 "TOPAZ", 86 "TONGA", 87 "FIJI", 88 "CARRIZO", 89 "STONEY", 90 "POLARIS10", 91 "POLARIS11", 92 "POLARIS12", 93 "VEGAM", 94 "VEGA10", 95 "VEGA12", 96 "VEGA20", 97 "RAVEN", 98 "LAST", 99 }; 100 101 /** 102 * DOC: pcie_replay_count 103 * 104 * The amdgpu driver provides a sysfs API for reporting the total number 105 * of PCIe replays (NAKs) 106 * The file pcie_replay_count is used for this and returns the total 107 * number of replays as a sum of the NAKs generated and NAKs received 108 */ 109 110 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, 111 struct device_attribute *attr, char *buf) 112 { 113 struct drm_device *ddev = dev_get_drvdata(dev); 114 struct amdgpu_device *adev = ddev->dev_private; 115 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); 116 117 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt); 118 } 119 120 static DEVICE_ATTR(pcie_replay_count, S_IRUGO, 121 amdgpu_device_get_pcie_replay_count, NULL); 122 123 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 124 125 /** 126 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control 127 * 128 * @dev: drm_device pointer 129 * 130 * Returns true if the device is a dGPU with HG/PX power control, 131 * otherwise return false. 132 */ 133 bool amdgpu_device_is_px(struct drm_device *dev) 134 { 135 struct amdgpu_device *adev = dev->dev_private; 136 137 if (adev->flags & AMD_IS_PX) 138 return true; 139 return false; 140 } 141 142 /* 143 * MMIO register access helper functions. 144 */ 145 /** 146 * amdgpu_mm_rreg - read a memory mapped IO register 147 * 148 * @adev: amdgpu_device pointer 149 * @reg: dword aligned register offset 150 * @acc_flags: access flags which require special behavior 151 * 152 * Returns the 32 bit value from the offset specified. 153 */ 154 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, 155 uint32_t acc_flags) 156 { 157 uint32_t ret; 158 159 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 160 return amdgpu_virt_kiq_rreg(adev, reg); 161 162 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) 163 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 164 else { 165 unsigned long flags; 166 167 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 168 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); 169 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); 170 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 171 } 172 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret); 173 return ret; 174 } 175 176 /* 177 * MMIO register read with bytes helper functions 178 * @offset:bytes offset from MMIO start 179 * 180 */ 181 182 /** 183 * amdgpu_mm_rreg8 - read a memory mapped IO register 184 * 185 * @adev: amdgpu_device pointer 186 * @offset: byte aligned register offset 187 * 188 * Returns the 8 bit value from the offset specified. 189 */ 190 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { 191 if (offset < adev->rmmio_size) 192 return (readb(adev->rmmio + offset)); 193 BUG(); 194 } 195 196 /* 197 * MMIO register write with bytes helper functions 198 * @offset:bytes offset from MMIO start 199 * @value: the value want to be written to the register 200 * 201 */ 202 /** 203 * amdgpu_mm_wreg8 - read a memory mapped IO register 204 * 205 * @adev: amdgpu_device pointer 206 * @offset: byte aligned register offset 207 * @value: 8 bit value to write 208 * 209 * Writes the value specified to the offset specified. 210 */ 211 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { 212 if (offset < adev->rmmio_size) 213 writeb(value, adev->rmmio + offset); 214 else 215 BUG(); 216 } 217 218 /** 219 * amdgpu_mm_wreg - write to a memory mapped IO register 220 * 221 * @adev: amdgpu_device pointer 222 * @reg: dword aligned register offset 223 * @v: 32 bit value to write to the register 224 * @acc_flags: access flags which require special behavior 225 * 226 * Writes the value specified to the offset specified. 227 */ 228 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 229 uint32_t acc_flags) 230 { 231 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); 232 233 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 234 adev->last_mm_index = v; 235 } 236 237 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 238 return amdgpu_virt_kiq_wreg(adev, reg, v); 239 240 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) 241 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 242 else { 243 unsigned long flags; 244 245 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 246 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); 247 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); 248 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 249 } 250 251 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { 252 udelay(500); 253 } 254 } 255 256 /** 257 * amdgpu_io_rreg - read an IO register 258 * 259 * @adev: amdgpu_device pointer 260 * @reg: dword aligned register offset 261 * 262 * Returns the 32 bit value from the offset specified. 263 */ 264 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) 265 { 266 if ((reg * 4) < adev->rio_mem_size) 267 return ioread32(adev->rio_mem + (reg * 4)); 268 else { 269 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 270 return ioread32(adev->rio_mem + (mmMM_DATA * 4)); 271 } 272 } 273 274 /** 275 * amdgpu_io_wreg - write to an IO register 276 * 277 * @adev: amdgpu_device pointer 278 * @reg: dword aligned register offset 279 * @v: 32 bit value to write to the register 280 * 281 * Writes the value specified to the offset specified. 282 */ 283 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) 284 { 285 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 286 adev->last_mm_index = v; 287 } 288 289 if ((reg * 4) < adev->rio_mem_size) 290 iowrite32(v, adev->rio_mem + (reg * 4)); 291 else { 292 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 293 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); 294 } 295 296 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { 297 udelay(500); 298 } 299 } 300 301 /** 302 * amdgpu_mm_rdoorbell - read a doorbell dword 303 * 304 * @adev: amdgpu_device pointer 305 * @index: doorbell index 306 * 307 * Returns the value in the doorbell aperture at the 308 * requested doorbell index (CIK). 309 */ 310 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) 311 { 312 if (index < adev->doorbell.num_doorbells) { 313 return readl(adev->doorbell.ptr + index); 314 } else { 315 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 316 return 0; 317 } 318 } 319 320 /** 321 * amdgpu_mm_wdoorbell - write a doorbell dword 322 * 323 * @adev: amdgpu_device pointer 324 * @index: doorbell index 325 * @v: value to write 326 * 327 * Writes @v to the doorbell aperture at the 328 * requested doorbell index (CIK). 329 */ 330 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) 331 { 332 if (index < adev->doorbell.num_doorbells) { 333 writel(v, adev->doorbell.ptr + index); 334 } else { 335 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 336 } 337 } 338 339 /** 340 * amdgpu_mm_rdoorbell64 - read a doorbell Qword 341 * 342 * @adev: amdgpu_device pointer 343 * @index: doorbell index 344 * 345 * Returns the value in the doorbell aperture at the 346 * requested doorbell index (VEGA10+). 347 */ 348 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) 349 { 350 if (index < adev->doorbell.num_doorbells) { 351 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); 352 } else { 353 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 354 return 0; 355 } 356 } 357 358 /** 359 * amdgpu_mm_wdoorbell64 - write a doorbell Qword 360 * 361 * @adev: amdgpu_device pointer 362 * @index: doorbell index 363 * @v: value to write 364 * 365 * Writes @v to the doorbell aperture at the 366 * requested doorbell index (VEGA10+). 367 */ 368 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) 369 { 370 if (index < adev->doorbell.num_doorbells) { 371 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); 372 } else { 373 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 374 } 375 } 376 377 /** 378 * amdgpu_invalid_rreg - dummy reg read function 379 * 380 * @adev: amdgpu device pointer 381 * @reg: offset of register 382 * 383 * Dummy register read function. Used for register blocks 384 * that certain asics don't have (all asics). 385 * Returns the value in the register. 386 */ 387 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 388 { 389 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 390 BUG(); 391 return 0; 392 } 393 394 /** 395 * amdgpu_invalid_wreg - dummy reg write function 396 * 397 * @adev: amdgpu device pointer 398 * @reg: offset of register 399 * @v: value to write to the register 400 * 401 * Dummy register read function. Used for register blocks 402 * that certain asics don't have (all asics). 403 */ 404 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 405 { 406 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 407 reg, v); 408 BUG(); 409 } 410 411 /** 412 * amdgpu_block_invalid_rreg - dummy reg read function 413 * 414 * @adev: amdgpu device pointer 415 * @block: offset of instance 416 * @reg: offset of register 417 * 418 * Dummy register read function. Used for register blocks 419 * that certain asics don't have (all asics). 420 * Returns the value in the register. 421 */ 422 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 423 uint32_t block, uint32_t reg) 424 { 425 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 426 reg, block); 427 BUG(); 428 return 0; 429 } 430 431 /** 432 * amdgpu_block_invalid_wreg - dummy reg write function 433 * 434 * @adev: amdgpu device pointer 435 * @block: offset of instance 436 * @reg: offset of register 437 * @v: value to write to the register 438 * 439 * Dummy register read function. Used for register blocks 440 * that certain asics don't have (all asics). 441 */ 442 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 443 uint32_t block, 444 uint32_t reg, uint32_t v) 445 { 446 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 447 reg, block, v); 448 BUG(); 449 } 450 451 /** 452 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page 453 * 454 * @adev: amdgpu device pointer 455 * 456 * Allocates a scratch page of VRAM for use by various things in the 457 * driver. 458 */ 459 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) 460 { 461 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, 462 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 463 &adev->vram_scratch.robj, 464 &adev->vram_scratch.gpu_addr, 465 (void **)&adev->vram_scratch.ptr); 466 } 467 468 /** 469 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page 470 * 471 * @adev: amdgpu device pointer 472 * 473 * Frees the VRAM scratch page. 474 */ 475 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) 476 { 477 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); 478 } 479 480 /** 481 * amdgpu_device_program_register_sequence - program an array of registers. 482 * 483 * @adev: amdgpu_device pointer 484 * @registers: pointer to the register array 485 * @array_size: size of the register array 486 * 487 * Programs an array or registers with and and or masks. 488 * This is a helper for setting golden registers. 489 */ 490 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 491 const u32 *registers, 492 const u32 array_size) 493 { 494 u32 tmp, reg, and_mask, or_mask; 495 int i; 496 497 if (array_size % 3) 498 return; 499 500 for (i = 0; i < array_size; i +=3) { 501 reg = registers[i + 0]; 502 and_mask = registers[i + 1]; 503 or_mask = registers[i + 2]; 504 505 if (and_mask == 0xffffffff) { 506 tmp = or_mask; 507 } else { 508 tmp = RREG32(reg); 509 tmp &= ~and_mask; 510 tmp |= or_mask; 511 } 512 WREG32(reg, tmp); 513 } 514 } 515 516 /** 517 * amdgpu_device_pci_config_reset - reset the GPU 518 * 519 * @adev: amdgpu_device pointer 520 * 521 * Resets the GPU using the pci config reset sequence. 522 * Only applicable to asics prior to vega10. 523 */ 524 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 525 { 526 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 527 } 528 529 /* 530 * GPU doorbell aperture helpers function. 531 */ 532 /** 533 * amdgpu_device_doorbell_init - Init doorbell driver information. 534 * 535 * @adev: amdgpu_device pointer 536 * 537 * Init doorbell driver information (CIK) 538 * Returns 0 on success, error on failure. 539 */ 540 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) 541 { 542 543 /* No doorbell on SI hardware generation */ 544 if (adev->asic_type < CHIP_BONAIRE) { 545 adev->doorbell.base = 0; 546 adev->doorbell.size = 0; 547 adev->doorbell.num_doorbells = 0; 548 adev->doorbell.ptr = NULL; 549 return 0; 550 } 551 552 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) 553 return -EINVAL; 554 555 amdgpu_asic_init_doorbell_index(adev); 556 557 /* doorbell bar mapping */ 558 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 559 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 560 561 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), 562 adev->doorbell_index.max_assignment+1); 563 if (adev->doorbell.num_doorbells == 0) 564 return -EINVAL; 565 566 /* For Vega, reserve and map two pages on doorbell BAR since SDMA 567 * paging queue doorbell use the second page. The 568 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the 569 * doorbells are in the first page. So with paging queue enabled, 570 * the max num_doorbells should + 1 page (0x400 in dword) 571 */ 572 if (adev->asic_type >= CHIP_VEGA10) 573 adev->doorbell.num_doorbells += 0x400; 574 575 adev->doorbell.ptr = ioremap(adev->doorbell.base, 576 adev->doorbell.num_doorbells * 577 sizeof(u32)); 578 if (adev->doorbell.ptr == NULL) 579 return -ENOMEM; 580 581 return 0; 582 } 583 584 /** 585 * amdgpu_device_doorbell_fini - Tear down doorbell driver information. 586 * 587 * @adev: amdgpu_device pointer 588 * 589 * Tear down doorbell driver information (CIK) 590 */ 591 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) 592 { 593 iounmap(adev->doorbell.ptr); 594 adev->doorbell.ptr = NULL; 595 } 596 597 598 599 /* 600 * amdgpu_device_wb_*() 601 * Writeback is the method by which the GPU updates special pages in memory 602 * with the status of certain GPU events (fences, ring pointers,etc.). 603 */ 604 605 /** 606 * amdgpu_device_wb_fini - Disable Writeback and free memory 607 * 608 * @adev: amdgpu_device pointer 609 * 610 * Disables Writeback and frees the Writeback memory (all asics). 611 * Used at driver shutdown. 612 */ 613 static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 614 { 615 if (adev->wb.wb_obj) { 616 amdgpu_bo_free_kernel(&adev->wb.wb_obj, 617 &adev->wb.gpu_addr, 618 (void **)&adev->wb.wb); 619 adev->wb.wb_obj = NULL; 620 } 621 } 622 623 /** 624 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory 625 * 626 * @adev: amdgpu_device pointer 627 * 628 * Initializes writeback and allocates writeback memory (all asics). 629 * Used at driver startup. 630 * Returns 0 on success or an -error on failure. 631 */ 632 static int amdgpu_device_wb_init(struct amdgpu_device *adev) 633 { 634 int r; 635 636 if (adev->wb.wb_obj == NULL) { 637 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 638 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 639 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 640 &adev->wb.wb_obj, &adev->wb.gpu_addr, 641 (void **)&adev->wb.wb); 642 if (r) { 643 dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 644 return r; 645 } 646 647 adev->wb.num_wb = AMDGPU_MAX_WB; 648 memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 649 650 /* clear wb memory */ 651 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 652 } 653 654 return 0; 655 } 656 657 /** 658 * amdgpu_device_wb_get - Allocate a wb entry 659 * 660 * @adev: amdgpu_device pointer 661 * @wb: wb index 662 * 663 * Allocate a wb slot for use by the driver (all asics). 664 * Returns 0 on success or -EINVAL on failure. 665 */ 666 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 667 { 668 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 669 670 if (offset < adev->wb.num_wb) { 671 __set_bit(offset, adev->wb.used); 672 *wb = offset << 3; /* convert to dw offset */ 673 return 0; 674 } else { 675 return -EINVAL; 676 } 677 } 678 679 /** 680 * amdgpu_device_wb_free - Free a wb entry 681 * 682 * @adev: amdgpu_device pointer 683 * @wb: wb index 684 * 685 * Free a wb slot allocated for use by the driver (all asics) 686 */ 687 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 688 { 689 wb >>= 3; 690 if (wb < adev->wb.num_wb) 691 __clear_bit(wb, adev->wb.used); 692 } 693 694 /** 695 * amdgpu_device_resize_fb_bar - try to resize FB BAR 696 * 697 * @adev: amdgpu_device pointer 698 * 699 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 700 * to fail, but if any of the BARs is not accessible after the size we abort 701 * driver loading by returning -ENODEV. 702 */ 703 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 704 { 705 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size); 706 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; 707 struct pci_bus *root; 708 struct resource *res; 709 unsigned i; 710 u16 cmd; 711 int r; 712 713 /* Bypass for VF */ 714 if (amdgpu_sriov_vf(adev)) 715 return 0; 716 717 /* Check if the root BUS has 64bit memory resources */ 718 root = adev->pdev->bus; 719 while (root->parent) 720 root = root->parent; 721 722 pci_bus_for_each_resource(root, res, i) { 723 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 724 res->start > 0x100000000ull) 725 break; 726 } 727 728 /* Trying to resize is pointless without a root hub window above 4GB */ 729 if (!res) 730 return 0; 731 732 /* Disable memory decoding while we change the BAR addresses and size */ 733 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 734 pci_write_config_word(adev->pdev, PCI_COMMAND, 735 cmd & ~PCI_COMMAND_MEMORY); 736 737 /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 738 amdgpu_device_doorbell_fini(adev); 739 if (adev->asic_type >= CHIP_BONAIRE) 740 pci_release_resource(adev->pdev, 2); 741 742 pci_release_resource(adev->pdev, 0); 743 744 r = pci_resize_resource(adev->pdev, 0, rbar_size); 745 if (r == -ENOSPC) 746 DRM_INFO("Not enough PCI address space for a large BAR."); 747 else if (r && r != -ENOTSUPP) 748 DRM_ERROR("Problem resizing BAR0 (%d).", r); 749 750 pci_assign_unassigned_bus_resources(adev->pdev->bus); 751 752 /* When the doorbell or fb BAR isn't available we have no chance of 753 * using the device. 754 */ 755 r = amdgpu_device_doorbell_init(adev); 756 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 757 return -ENODEV; 758 759 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 760 761 return 0; 762 } 763 764 /* 765 * GPU helpers function. 766 */ 767 /** 768 * amdgpu_device_need_post - check if the hw need post or not 769 * 770 * @adev: amdgpu_device pointer 771 * 772 * Check if the asic has been initialized (all asics) at driver startup 773 * or post is needed if hw reset is performed. 774 * Returns true if need or false if not. 775 */ 776 bool amdgpu_device_need_post(struct amdgpu_device *adev) 777 { 778 uint32_t reg; 779 780 if (amdgpu_sriov_vf(adev)) 781 return false; 782 783 if (amdgpu_passthrough(adev)) { 784 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 785 * some old smc fw still need driver do vPost otherwise gpu hang, while 786 * those smc fw version above 22.15 doesn't have this flaw, so we force 787 * vpost executed for smc version below 22.15 788 */ 789 if (adev->asic_type == CHIP_FIJI) { 790 int err; 791 uint32_t fw_ver; 792 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 793 /* force vPost if error occured */ 794 if (err) 795 return true; 796 797 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 798 if (fw_ver < 0x00160e00) 799 return true; 800 } 801 } 802 803 if (adev->has_hw_reset) { 804 adev->has_hw_reset = false; 805 return true; 806 } 807 808 /* bios scratch used on CIK+ */ 809 if (adev->asic_type >= CHIP_BONAIRE) 810 return amdgpu_atombios_scratch_need_asic_init(adev); 811 812 /* check MEM_SIZE for older asics */ 813 reg = amdgpu_asic_get_config_memsize(adev); 814 815 if ((reg != 0) && (reg != 0xffffffff)) 816 return false; 817 818 return true; 819 } 820 821 /* if we get transitioned to only one device, take VGA back */ 822 /** 823 * amdgpu_device_vga_set_decode - enable/disable vga decode 824 * 825 * @cookie: amdgpu_device pointer 826 * @state: enable/disable vga decode 827 * 828 * Enable/disable vga decode (all asics). 829 * Returns VGA resource flags. 830 */ 831 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) 832 { 833 struct amdgpu_device *adev = cookie; 834 amdgpu_asic_set_vga_state(adev, state); 835 if (state) 836 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 837 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 838 else 839 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 840 } 841 842 /** 843 * amdgpu_device_check_block_size - validate the vm block size 844 * 845 * @adev: amdgpu_device pointer 846 * 847 * Validates the vm block size specified via module parameter. 848 * The vm block size defines number of bits in page table versus page directory, 849 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 850 * page table and the remaining bits are in the page directory. 851 */ 852 static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 853 { 854 /* defines number of bits in page table versus page directory, 855 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 856 * page table and the remaining bits are in the page directory */ 857 if (amdgpu_vm_block_size == -1) 858 return; 859 860 if (amdgpu_vm_block_size < 9) { 861 dev_warn(adev->dev, "VM page table size (%d) too small\n", 862 amdgpu_vm_block_size); 863 amdgpu_vm_block_size = -1; 864 } 865 } 866 867 /** 868 * amdgpu_device_check_vm_size - validate the vm size 869 * 870 * @adev: amdgpu_device pointer 871 * 872 * Validates the vm size in GB specified via module parameter. 873 * The VM size is the size of the GPU virtual memory space in GB. 874 */ 875 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 876 { 877 /* no need to check the default value */ 878 if (amdgpu_vm_size == -1) 879 return; 880 881 if (amdgpu_vm_size < 1) { 882 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 883 amdgpu_vm_size); 884 amdgpu_vm_size = -1; 885 } 886 } 887 888 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 889 { 890 struct sysinfo si; 891 bool is_os_64 = (sizeof(void *) == 8) ? true : false; 892 uint64_t total_memory; 893 uint64_t dram_size_seven_GB = 0x1B8000000; 894 uint64_t dram_size_three_GB = 0xB8000000; 895 896 if (amdgpu_smu_memory_pool_size == 0) 897 return; 898 899 if (!is_os_64) { 900 DRM_WARN("Not 64-bit OS, feature not supported\n"); 901 goto def_value; 902 } 903 si_meminfo(&si); 904 total_memory = (uint64_t)si.totalram * si.mem_unit; 905 906 if ((amdgpu_smu_memory_pool_size == 1) || 907 (amdgpu_smu_memory_pool_size == 2)) { 908 if (total_memory < dram_size_three_GB) 909 goto def_value1; 910 } else if ((amdgpu_smu_memory_pool_size == 4) || 911 (amdgpu_smu_memory_pool_size == 8)) { 912 if (total_memory < dram_size_seven_GB) 913 goto def_value1; 914 } else { 915 DRM_WARN("Smu memory pool size not supported\n"); 916 goto def_value; 917 } 918 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 919 920 return; 921 922 def_value1: 923 DRM_WARN("No enough system memory\n"); 924 def_value: 925 adev->pm.smu_prv_buffer_size = 0; 926 } 927 928 /** 929 * amdgpu_device_check_arguments - validate module params 930 * 931 * @adev: amdgpu_device pointer 932 * 933 * Validates certain module parameters and updates 934 * the associated values used by the driver (all asics). 935 */ 936 static int amdgpu_device_check_arguments(struct amdgpu_device *adev) 937 { 938 int ret = 0; 939 940 if (amdgpu_sched_jobs < 4) { 941 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 942 amdgpu_sched_jobs); 943 amdgpu_sched_jobs = 4; 944 } else if (!is_power_of_2(amdgpu_sched_jobs)){ 945 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 946 amdgpu_sched_jobs); 947 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 948 } 949 950 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 951 /* gart size must be greater or equal to 32M */ 952 dev_warn(adev->dev, "gart size (%d) too small\n", 953 amdgpu_gart_size); 954 amdgpu_gart_size = -1; 955 } 956 957 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 958 /* gtt size must be greater or equal to 32M */ 959 dev_warn(adev->dev, "gtt size (%d) too small\n", 960 amdgpu_gtt_size); 961 amdgpu_gtt_size = -1; 962 } 963 964 /* valid range is between 4 and 9 inclusive */ 965 if (amdgpu_vm_fragment_size != -1 && 966 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 967 dev_warn(adev->dev, "valid range is between 4 and 9\n"); 968 amdgpu_vm_fragment_size = -1; 969 } 970 971 amdgpu_device_check_smu_prv_buffer_size(adev); 972 973 amdgpu_device_check_vm_size(adev); 974 975 amdgpu_device_check_block_size(adev); 976 977 ret = amdgpu_device_get_job_timeout_settings(adev); 978 if (ret) { 979 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); 980 return ret; 981 } 982 983 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 984 985 return ret; 986 } 987 988 /** 989 * amdgpu_switcheroo_set_state - set switcheroo state 990 * 991 * @pdev: pci dev pointer 992 * @state: vga_switcheroo state 993 * 994 * Callback for the switcheroo driver. Suspends or resumes the 995 * the asics before or after it is powered up using ACPI methods. 996 */ 997 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) 998 { 999 struct drm_device *dev = pci_get_drvdata(pdev); 1000 1001 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF) 1002 return; 1003 1004 if (state == VGA_SWITCHEROO_ON) { 1005 pr_info("amdgpu: switched on\n"); 1006 /* don't suspend or resume card normally */ 1007 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1008 1009 amdgpu_device_resume(dev, true, true); 1010 1011 dev->switch_power_state = DRM_SWITCH_POWER_ON; 1012 drm_kms_helper_poll_enable(dev); 1013 } else { 1014 pr_info("amdgpu: switched off\n"); 1015 drm_kms_helper_poll_disable(dev); 1016 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1017 amdgpu_device_suspend(dev, true, true); 1018 dev->switch_power_state = DRM_SWITCH_POWER_OFF; 1019 } 1020 } 1021 1022 /** 1023 * amdgpu_switcheroo_can_switch - see if switcheroo state can change 1024 * 1025 * @pdev: pci dev pointer 1026 * 1027 * Callback for the switcheroo driver. Check of the switcheroo 1028 * state can be changed. 1029 * Returns true if the state can be changed, false if not. 1030 */ 1031 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 1032 { 1033 struct drm_device *dev = pci_get_drvdata(pdev); 1034 1035 /* 1036 * FIXME: open_count is protected by drm_global_mutex but that would lead to 1037 * locking inversion with the driver load path. And the access here is 1038 * completely racy anyway. So don't bother with locking for now. 1039 */ 1040 return dev->open_count == 0; 1041 } 1042 1043 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 1044 .set_gpu_state = amdgpu_switcheroo_set_state, 1045 .reprobe = NULL, 1046 .can_switch = amdgpu_switcheroo_can_switch, 1047 }; 1048 1049 /** 1050 * amdgpu_device_ip_set_clockgating_state - set the CG state 1051 * 1052 * @dev: amdgpu_device pointer 1053 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1054 * @state: clockgating state (gate or ungate) 1055 * 1056 * Sets the requested clockgating state for all instances of 1057 * the hardware IP specified. 1058 * Returns the error code from the last instance. 1059 */ 1060 int amdgpu_device_ip_set_clockgating_state(void *dev, 1061 enum amd_ip_block_type block_type, 1062 enum amd_clockgating_state state) 1063 { 1064 struct amdgpu_device *adev = dev; 1065 int i, r = 0; 1066 1067 for (i = 0; i < adev->num_ip_blocks; i++) { 1068 if (!adev->ip_blocks[i].status.valid) 1069 continue; 1070 if (adev->ip_blocks[i].version->type != block_type) 1071 continue; 1072 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1073 continue; 1074 r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1075 (void *)adev, state); 1076 if (r) 1077 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1078 adev->ip_blocks[i].version->funcs->name, r); 1079 } 1080 return r; 1081 } 1082 1083 /** 1084 * amdgpu_device_ip_set_powergating_state - set the PG state 1085 * 1086 * @dev: amdgpu_device pointer 1087 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1088 * @state: powergating state (gate or ungate) 1089 * 1090 * Sets the requested powergating state for all instances of 1091 * the hardware IP specified. 1092 * Returns the error code from the last instance. 1093 */ 1094 int amdgpu_device_ip_set_powergating_state(void *dev, 1095 enum amd_ip_block_type block_type, 1096 enum amd_powergating_state state) 1097 { 1098 struct amdgpu_device *adev = dev; 1099 int i, r = 0; 1100 1101 for (i = 0; i < adev->num_ip_blocks; i++) { 1102 if (!adev->ip_blocks[i].status.valid) 1103 continue; 1104 if (adev->ip_blocks[i].version->type != block_type) 1105 continue; 1106 if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1107 continue; 1108 r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1109 (void *)adev, state); 1110 if (r) 1111 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1112 adev->ip_blocks[i].version->funcs->name, r); 1113 } 1114 return r; 1115 } 1116 1117 /** 1118 * amdgpu_device_ip_get_clockgating_state - get the CG state 1119 * 1120 * @adev: amdgpu_device pointer 1121 * @flags: clockgating feature flags 1122 * 1123 * Walks the list of IPs on the device and updates the clockgating 1124 * flags for each IP. 1125 * Updates @flags with the feature flags for each hardware IP where 1126 * clockgating is enabled. 1127 */ 1128 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1129 u32 *flags) 1130 { 1131 int i; 1132 1133 for (i = 0; i < adev->num_ip_blocks; i++) { 1134 if (!adev->ip_blocks[i].status.valid) 1135 continue; 1136 if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1137 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1138 } 1139 } 1140 1141 /** 1142 * amdgpu_device_ip_wait_for_idle - wait for idle 1143 * 1144 * @adev: amdgpu_device pointer 1145 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1146 * 1147 * Waits for the request hardware IP to be idle. 1148 * Returns 0 for success or a negative error code on failure. 1149 */ 1150 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1151 enum amd_ip_block_type block_type) 1152 { 1153 int i, r; 1154 1155 for (i = 0; i < adev->num_ip_blocks; i++) { 1156 if (!adev->ip_blocks[i].status.valid) 1157 continue; 1158 if (adev->ip_blocks[i].version->type == block_type) { 1159 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1160 if (r) 1161 return r; 1162 break; 1163 } 1164 } 1165 return 0; 1166 1167 } 1168 1169 /** 1170 * amdgpu_device_ip_is_idle - is the hardware IP idle 1171 * 1172 * @adev: amdgpu_device pointer 1173 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1174 * 1175 * Check if the hardware IP is idle or not. 1176 * Returns true if it the IP is idle, false if not. 1177 */ 1178 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1179 enum amd_ip_block_type block_type) 1180 { 1181 int i; 1182 1183 for (i = 0; i < adev->num_ip_blocks; i++) { 1184 if (!adev->ip_blocks[i].status.valid) 1185 continue; 1186 if (adev->ip_blocks[i].version->type == block_type) 1187 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1188 } 1189 return true; 1190 1191 } 1192 1193 /** 1194 * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1195 * 1196 * @adev: amdgpu_device pointer 1197 * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1198 * 1199 * Returns a pointer to the hardware IP block structure 1200 * if it exists for the asic, otherwise NULL. 1201 */ 1202 struct amdgpu_ip_block * 1203 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1204 enum amd_ip_block_type type) 1205 { 1206 int i; 1207 1208 for (i = 0; i < adev->num_ip_blocks; i++) 1209 if (adev->ip_blocks[i].version->type == type) 1210 return &adev->ip_blocks[i]; 1211 1212 return NULL; 1213 } 1214 1215 /** 1216 * amdgpu_device_ip_block_version_cmp 1217 * 1218 * @adev: amdgpu_device pointer 1219 * @type: enum amd_ip_block_type 1220 * @major: major version 1221 * @minor: minor version 1222 * 1223 * return 0 if equal or greater 1224 * return 1 if smaller or the ip_block doesn't exist 1225 */ 1226 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1227 enum amd_ip_block_type type, 1228 u32 major, u32 minor) 1229 { 1230 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1231 1232 if (ip_block && ((ip_block->version->major > major) || 1233 ((ip_block->version->major == major) && 1234 (ip_block->version->minor >= minor)))) 1235 return 0; 1236 1237 return 1; 1238 } 1239 1240 /** 1241 * amdgpu_device_ip_block_add 1242 * 1243 * @adev: amdgpu_device pointer 1244 * @ip_block_version: pointer to the IP to add 1245 * 1246 * Adds the IP block driver information to the collection of IPs 1247 * on the asic. 1248 */ 1249 int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1250 const struct amdgpu_ip_block_version *ip_block_version) 1251 { 1252 if (!ip_block_version) 1253 return -EINVAL; 1254 1255 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1256 ip_block_version->funcs->name); 1257 1258 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1259 1260 return 0; 1261 } 1262 1263 /** 1264 * amdgpu_device_enable_virtual_display - enable virtual display feature 1265 * 1266 * @adev: amdgpu_device pointer 1267 * 1268 * Enabled the virtual display feature if the user has enabled it via 1269 * the module parameter virtual_display. This feature provides a virtual 1270 * display hardware on headless boards or in virtualized environments. 1271 * This function parses and validates the configuration string specified by 1272 * the user and configues the virtual display configuration (number of 1273 * virtual connectors, crtcs, etc.) specified. 1274 */ 1275 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1276 { 1277 adev->enable_virtual_display = false; 1278 1279 if (amdgpu_virtual_display) { 1280 struct drm_device *ddev = adev->ddev; 1281 const char *pci_address_name = pci_name(ddev->pdev); 1282 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 1283 1284 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 1285 pciaddstr_tmp = pciaddstr; 1286 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 1287 pciaddname = strsep(&pciaddname_tmp, ","); 1288 if (!strcmp("all", pciaddname) 1289 || !strcmp(pci_address_name, pciaddname)) { 1290 long num_crtc; 1291 int res = -1; 1292 1293 adev->enable_virtual_display = true; 1294 1295 if (pciaddname_tmp) 1296 res = kstrtol(pciaddname_tmp, 10, 1297 &num_crtc); 1298 1299 if (!res) { 1300 if (num_crtc < 1) 1301 num_crtc = 1; 1302 if (num_crtc > 6) 1303 num_crtc = 6; 1304 adev->mode_info.num_crtc = num_crtc; 1305 } else { 1306 adev->mode_info.num_crtc = 1; 1307 } 1308 break; 1309 } 1310 } 1311 1312 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 1313 amdgpu_virtual_display, pci_address_name, 1314 adev->enable_virtual_display, adev->mode_info.num_crtc); 1315 1316 kfree(pciaddstr); 1317 } 1318 } 1319 1320 /** 1321 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 1322 * 1323 * @adev: amdgpu_device pointer 1324 * 1325 * Parses the asic configuration parameters specified in the gpu info 1326 * firmware and makes them availale to the driver for use in configuring 1327 * the asic. 1328 * Returns 0 on success, -EINVAL on failure. 1329 */ 1330 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1331 { 1332 const char *chip_name; 1333 char fw_name[30]; 1334 int err; 1335 const struct gpu_info_firmware_header_v1_0 *hdr; 1336 1337 adev->firmware.gpu_info_fw = NULL; 1338 1339 switch (adev->asic_type) { 1340 case CHIP_TOPAZ: 1341 case CHIP_TONGA: 1342 case CHIP_FIJI: 1343 case CHIP_POLARIS10: 1344 case CHIP_POLARIS11: 1345 case CHIP_POLARIS12: 1346 case CHIP_VEGAM: 1347 case CHIP_CARRIZO: 1348 case CHIP_STONEY: 1349 #ifdef CONFIG_DRM_AMDGPU_SI 1350 case CHIP_VERDE: 1351 case CHIP_TAHITI: 1352 case CHIP_PITCAIRN: 1353 case CHIP_OLAND: 1354 case CHIP_HAINAN: 1355 #endif 1356 #ifdef CONFIG_DRM_AMDGPU_CIK 1357 case CHIP_BONAIRE: 1358 case CHIP_HAWAII: 1359 case CHIP_KAVERI: 1360 case CHIP_KABINI: 1361 case CHIP_MULLINS: 1362 #endif 1363 case CHIP_VEGA20: 1364 default: 1365 return 0; 1366 case CHIP_VEGA10: 1367 chip_name = "vega10"; 1368 break; 1369 case CHIP_VEGA12: 1370 chip_name = "vega12"; 1371 break; 1372 case CHIP_RAVEN: 1373 if (adev->rev_id >= 8) 1374 chip_name = "raven2"; 1375 else if (adev->pdev->device == 0x15d8) 1376 chip_name = "picasso"; 1377 else 1378 chip_name = "raven"; 1379 break; 1380 } 1381 1382 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 1383 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); 1384 if (err) { 1385 dev_err(adev->dev, 1386 "Failed to load gpu_info firmware \"%s\"\n", 1387 fw_name); 1388 goto out; 1389 } 1390 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); 1391 if (err) { 1392 dev_err(adev->dev, 1393 "Failed to validate gpu_info firmware \"%s\"\n", 1394 fw_name); 1395 goto out; 1396 } 1397 1398 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 1399 amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 1400 1401 switch (hdr->version_major) { 1402 case 1: 1403 { 1404 const struct gpu_info_firmware_v1_0 *gpu_info_fw = 1405 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 1406 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1407 1408 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 1409 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 1410 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 1411 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 1412 adev->gfx.config.max_texture_channel_caches = 1413 le32_to_cpu(gpu_info_fw->gc_num_tccs); 1414 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 1415 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 1416 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 1417 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 1418 adev->gfx.config.double_offchip_lds_buf = 1419 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 1420 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 1421 adev->gfx.cu_info.max_waves_per_simd = 1422 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 1423 adev->gfx.cu_info.max_scratch_slots_per_cu = 1424 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 1425 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 1426 break; 1427 } 1428 default: 1429 dev_err(adev->dev, 1430 "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 1431 err = -EINVAL; 1432 goto out; 1433 } 1434 out: 1435 return err; 1436 } 1437 1438 /** 1439 * amdgpu_device_ip_early_init - run early init for hardware IPs 1440 * 1441 * @adev: amdgpu_device pointer 1442 * 1443 * Early initialization pass for hardware IPs. The hardware IPs that make 1444 * up each asic are discovered each IP's early_init callback is run. This 1445 * is the first stage in initializing the asic. 1446 * Returns 0 on success, negative error code on failure. 1447 */ 1448 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 1449 { 1450 int i, r; 1451 1452 amdgpu_device_enable_virtual_display(adev); 1453 1454 switch (adev->asic_type) { 1455 case CHIP_TOPAZ: 1456 case CHIP_TONGA: 1457 case CHIP_FIJI: 1458 case CHIP_POLARIS10: 1459 case CHIP_POLARIS11: 1460 case CHIP_POLARIS12: 1461 case CHIP_VEGAM: 1462 case CHIP_CARRIZO: 1463 case CHIP_STONEY: 1464 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) 1465 adev->family = AMDGPU_FAMILY_CZ; 1466 else 1467 adev->family = AMDGPU_FAMILY_VI; 1468 1469 r = vi_set_ip_blocks(adev); 1470 if (r) 1471 return r; 1472 break; 1473 #ifdef CONFIG_DRM_AMDGPU_SI 1474 case CHIP_VERDE: 1475 case CHIP_TAHITI: 1476 case CHIP_PITCAIRN: 1477 case CHIP_OLAND: 1478 case CHIP_HAINAN: 1479 adev->family = AMDGPU_FAMILY_SI; 1480 r = si_set_ip_blocks(adev); 1481 if (r) 1482 return r; 1483 break; 1484 #endif 1485 #ifdef CONFIG_DRM_AMDGPU_CIK 1486 case CHIP_BONAIRE: 1487 case CHIP_HAWAII: 1488 case CHIP_KAVERI: 1489 case CHIP_KABINI: 1490 case CHIP_MULLINS: 1491 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII)) 1492 adev->family = AMDGPU_FAMILY_CI; 1493 else 1494 adev->family = AMDGPU_FAMILY_KV; 1495 1496 r = cik_set_ip_blocks(adev); 1497 if (r) 1498 return r; 1499 break; 1500 #endif 1501 case CHIP_VEGA10: 1502 case CHIP_VEGA12: 1503 case CHIP_VEGA20: 1504 case CHIP_RAVEN: 1505 if (adev->asic_type == CHIP_RAVEN) 1506 adev->family = AMDGPU_FAMILY_RV; 1507 else 1508 adev->family = AMDGPU_FAMILY_AI; 1509 1510 r = soc15_set_ip_blocks(adev); 1511 if (r) 1512 return r; 1513 break; 1514 default: 1515 /* FIXME: not supported yet */ 1516 return -EINVAL; 1517 } 1518 1519 r = amdgpu_device_parse_gpu_info_fw(adev); 1520 if (r) 1521 return r; 1522 1523 amdgpu_amdkfd_device_probe(adev); 1524 1525 if (amdgpu_sriov_vf(adev)) { 1526 r = amdgpu_virt_request_full_gpu(adev, true); 1527 if (r) 1528 return -EAGAIN; 1529 1530 /* query the reg access mode at the very beginning */ 1531 amdgpu_virt_init_reg_access_mode(adev); 1532 } 1533 1534 adev->pm.pp_feature = amdgpu_pp_feature_mask; 1535 if (amdgpu_sriov_vf(adev)) 1536 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1537 1538 for (i = 0; i < adev->num_ip_blocks; i++) { 1539 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 1540 DRM_ERROR("disabled ip block: %d <%s>\n", 1541 i, adev->ip_blocks[i].version->funcs->name); 1542 adev->ip_blocks[i].status.valid = false; 1543 } else { 1544 if (adev->ip_blocks[i].version->funcs->early_init) { 1545 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 1546 if (r == -ENOENT) { 1547 adev->ip_blocks[i].status.valid = false; 1548 } else if (r) { 1549 DRM_ERROR("early_init of IP block <%s> failed %d\n", 1550 adev->ip_blocks[i].version->funcs->name, r); 1551 return r; 1552 } else { 1553 adev->ip_blocks[i].status.valid = true; 1554 } 1555 } else { 1556 adev->ip_blocks[i].status.valid = true; 1557 } 1558 } 1559 /* get the vbios after the asic_funcs are set up */ 1560 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { 1561 /* Read BIOS */ 1562 if (!amdgpu_get_bios(adev)) 1563 return -EINVAL; 1564 1565 r = amdgpu_atombios_init(adev); 1566 if (r) { 1567 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 1568 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 1569 return r; 1570 } 1571 } 1572 } 1573 1574 adev->cg_flags &= amdgpu_cg_mask; 1575 adev->pg_flags &= amdgpu_pg_mask; 1576 1577 return 0; 1578 } 1579 1580 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) 1581 { 1582 int i, r; 1583 1584 for (i = 0; i < adev->num_ip_blocks; i++) { 1585 if (!adev->ip_blocks[i].status.sw) 1586 continue; 1587 if (adev->ip_blocks[i].status.hw) 1588 continue; 1589 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 1590 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || 1591 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 1592 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1593 if (r) { 1594 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1595 adev->ip_blocks[i].version->funcs->name, r); 1596 return r; 1597 } 1598 adev->ip_blocks[i].status.hw = true; 1599 } 1600 } 1601 1602 return 0; 1603 } 1604 1605 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) 1606 { 1607 int i, r; 1608 1609 for (i = 0; i < adev->num_ip_blocks; i++) { 1610 if (!adev->ip_blocks[i].status.sw) 1611 continue; 1612 if (adev->ip_blocks[i].status.hw) 1613 continue; 1614 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1615 if (r) { 1616 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1617 adev->ip_blocks[i].version->funcs->name, r); 1618 return r; 1619 } 1620 adev->ip_blocks[i].status.hw = true; 1621 } 1622 1623 return 0; 1624 } 1625 1626 static int amdgpu_device_fw_loading(struct amdgpu_device *adev) 1627 { 1628 int r = 0; 1629 int i; 1630 uint32_t smu_version; 1631 1632 if (adev->asic_type >= CHIP_VEGA10) { 1633 for (i = 0; i < adev->num_ip_blocks; i++) { 1634 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 1635 if (adev->in_gpu_reset || adev->in_suspend) { 1636 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) 1637 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */ 1638 r = adev->ip_blocks[i].version->funcs->resume(adev); 1639 if (r) { 1640 DRM_ERROR("resume of IP block <%s> failed %d\n", 1641 adev->ip_blocks[i].version->funcs->name, r); 1642 return r; 1643 } 1644 } else { 1645 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1646 if (r) { 1647 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1648 adev->ip_blocks[i].version->funcs->name, r); 1649 return r; 1650 } 1651 } 1652 adev->ip_blocks[i].status.hw = true; 1653 } 1654 } 1655 } 1656 r = amdgpu_pm_load_smu_firmware(adev, &smu_version); 1657 1658 return r; 1659 } 1660 1661 /** 1662 * amdgpu_device_ip_init - run init for hardware IPs 1663 * 1664 * @adev: amdgpu_device pointer 1665 * 1666 * Main initialization pass for hardware IPs. The list of all the hardware 1667 * IPs that make up the asic is walked and the sw_init and hw_init callbacks 1668 * are run. sw_init initializes the software state associated with each IP 1669 * and hw_init initializes the hardware associated with each IP. 1670 * Returns 0 on success, negative error code on failure. 1671 */ 1672 static int amdgpu_device_ip_init(struct amdgpu_device *adev) 1673 { 1674 int i, r; 1675 1676 r = amdgpu_ras_init(adev); 1677 if (r) 1678 return r; 1679 1680 for (i = 0; i < adev->num_ip_blocks; i++) { 1681 if (!adev->ip_blocks[i].status.valid) 1682 continue; 1683 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 1684 if (r) { 1685 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 1686 adev->ip_blocks[i].version->funcs->name, r); 1687 goto init_failed; 1688 } 1689 adev->ip_blocks[i].status.sw = true; 1690 1691 /* need to do gmc hw init early so we can allocate gpu mem */ 1692 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1693 r = amdgpu_device_vram_scratch_init(adev); 1694 if (r) { 1695 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); 1696 goto init_failed; 1697 } 1698 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 1699 if (r) { 1700 DRM_ERROR("hw_init %d failed %d\n", i, r); 1701 goto init_failed; 1702 } 1703 r = amdgpu_device_wb_init(adev); 1704 if (r) { 1705 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 1706 goto init_failed; 1707 } 1708 adev->ip_blocks[i].status.hw = true; 1709 1710 /* right after GMC hw init, we create CSA */ 1711 if (amdgpu_sriov_vf(adev)) { 1712 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, 1713 AMDGPU_GEM_DOMAIN_VRAM, 1714 AMDGPU_CSA_SIZE); 1715 if (r) { 1716 DRM_ERROR("allocate CSA failed %d\n", r); 1717 goto init_failed; 1718 } 1719 } 1720 } 1721 } 1722 1723 r = amdgpu_ib_pool_init(adev); 1724 if (r) { 1725 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 1726 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 1727 goto init_failed; 1728 } 1729 1730 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 1731 if (r) 1732 goto init_failed; 1733 1734 r = amdgpu_device_ip_hw_init_phase1(adev); 1735 if (r) 1736 goto init_failed; 1737 1738 r = amdgpu_device_fw_loading(adev); 1739 if (r) 1740 goto init_failed; 1741 1742 r = amdgpu_device_ip_hw_init_phase2(adev); 1743 if (r) 1744 goto init_failed; 1745 1746 if (adev->gmc.xgmi.num_physical_nodes > 1) 1747 amdgpu_xgmi_add_device(adev); 1748 amdgpu_amdkfd_device_init(adev); 1749 1750 init_failed: 1751 if (amdgpu_sriov_vf(adev)) { 1752 if (!r) 1753 amdgpu_virt_init_data_exchange(adev); 1754 amdgpu_virt_release_full_gpu(adev, true); 1755 } 1756 1757 return r; 1758 } 1759 1760 /** 1761 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 1762 * 1763 * @adev: amdgpu_device pointer 1764 * 1765 * Writes a reset magic value to the gart pointer in VRAM. The driver calls 1766 * this function before a GPU reset. If the value is retained after a 1767 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 1768 */ 1769 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 1770 { 1771 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 1772 } 1773 1774 /** 1775 * amdgpu_device_check_vram_lost - check if vram is valid 1776 * 1777 * @adev: amdgpu_device pointer 1778 * 1779 * Checks the reset magic value written to the gart pointer in VRAM. 1780 * The driver calls this after a GPU reset to see if the contents of 1781 * VRAM is lost or now. 1782 * returns true if vram is lost, false if not. 1783 */ 1784 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 1785 { 1786 return !!memcmp(adev->gart.ptr, adev->reset_magic, 1787 AMDGPU_RESET_MAGIC_NUM); 1788 } 1789 1790 /** 1791 * amdgpu_device_set_cg_state - set clockgating for amdgpu device 1792 * 1793 * @adev: amdgpu_device pointer 1794 * 1795 * The list of all the hardware IPs that make up the asic is walked and the 1796 * set_clockgating_state callbacks are run. 1797 * Late initialization pass enabling clockgating for hardware IPs. 1798 * Fini or suspend, pass disabling clockgating for hardware IPs. 1799 * Returns 0 on success, negative error code on failure. 1800 */ 1801 1802 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 1803 enum amd_clockgating_state state) 1804 { 1805 int i, j, r; 1806 1807 if (amdgpu_emu_mode == 1) 1808 return 0; 1809 1810 for (j = 0; j < adev->num_ip_blocks; j++) { 1811 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 1812 if (!adev->ip_blocks[i].status.late_initialized) 1813 continue; 1814 /* skip CG for VCE/UVD, it's handled specially */ 1815 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1816 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 1817 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 1818 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 1819 /* enable clockgating to save power */ 1820 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 1821 state); 1822 if (r) { 1823 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 1824 adev->ip_blocks[i].version->funcs->name, r); 1825 return r; 1826 } 1827 } 1828 } 1829 1830 return 0; 1831 } 1832 1833 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state) 1834 { 1835 int i, j, r; 1836 1837 if (amdgpu_emu_mode == 1) 1838 return 0; 1839 1840 for (j = 0; j < adev->num_ip_blocks; j++) { 1841 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 1842 if (!adev->ip_blocks[i].status.late_initialized) 1843 continue; 1844 /* skip CG for VCE/UVD, it's handled specially */ 1845 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1846 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 1847 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 1848 adev->ip_blocks[i].version->funcs->set_powergating_state) { 1849 /* enable powergating to save power */ 1850 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 1851 state); 1852 if (r) { 1853 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 1854 adev->ip_blocks[i].version->funcs->name, r); 1855 return r; 1856 } 1857 } 1858 } 1859 return 0; 1860 } 1861 1862 static int amdgpu_device_enable_mgpu_fan_boost(void) 1863 { 1864 struct amdgpu_gpu_instance *gpu_ins; 1865 struct amdgpu_device *adev; 1866 int i, ret = 0; 1867 1868 mutex_lock(&mgpu_info.mutex); 1869 1870 /* 1871 * MGPU fan boost feature should be enabled 1872 * only when there are two or more dGPUs in 1873 * the system 1874 */ 1875 if (mgpu_info.num_dgpu < 2) 1876 goto out; 1877 1878 for (i = 0; i < mgpu_info.num_dgpu; i++) { 1879 gpu_ins = &(mgpu_info.gpu_ins[i]); 1880 adev = gpu_ins->adev; 1881 if (!(adev->flags & AMD_IS_APU) && 1882 !gpu_ins->mgpu_fan_enabled && 1883 adev->powerplay.pp_funcs && 1884 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) { 1885 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); 1886 if (ret) 1887 break; 1888 1889 gpu_ins->mgpu_fan_enabled = 1; 1890 } 1891 } 1892 1893 out: 1894 mutex_unlock(&mgpu_info.mutex); 1895 1896 return ret; 1897 } 1898 1899 /** 1900 * amdgpu_device_ip_late_init - run late init for hardware IPs 1901 * 1902 * @adev: amdgpu_device pointer 1903 * 1904 * Late initialization pass for hardware IPs. The list of all the hardware 1905 * IPs that make up the asic is walked and the late_init callbacks are run. 1906 * late_init covers any special initialization that an IP requires 1907 * after all of the have been initialized or something that needs to happen 1908 * late in the init process. 1909 * Returns 0 on success, negative error code on failure. 1910 */ 1911 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 1912 { 1913 int i = 0, r; 1914 1915 for (i = 0; i < adev->num_ip_blocks; i++) { 1916 if (!adev->ip_blocks[i].status.hw) 1917 continue; 1918 if (adev->ip_blocks[i].version->funcs->late_init) { 1919 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 1920 if (r) { 1921 DRM_ERROR("late_init of IP block <%s> failed %d\n", 1922 adev->ip_blocks[i].version->funcs->name, r); 1923 return r; 1924 } 1925 } 1926 adev->ip_blocks[i].status.late_initialized = true; 1927 } 1928 1929 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 1930 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 1931 1932 amdgpu_device_fill_reset_magic(adev); 1933 1934 r = amdgpu_device_enable_mgpu_fan_boost(); 1935 if (r) 1936 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 1937 1938 /* set to low pstate by default */ 1939 amdgpu_xgmi_set_pstate(adev, 0); 1940 1941 return 0; 1942 } 1943 1944 /** 1945 * amdgpu_device_ip_fini - run fini for hardware IPs 1946 * 1947 * @adev: amdgpu_device pointer 1948 * 1949 * Main teardown pass for hardware IPs. The list of all the hardware 1950 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 1951 * are run. hw_fini tears down the hardware associated with each IP 1952 * and sw_fini tears down any software state associated with each IP. 1953 * Returns 0 on success, negative error code on failure. 1954 */ 1955 static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 1956 { 1957 int i, r; 1958 1959 amdgpu_ras_pre_fini(adev); 1960 1961 if (adev->gmc.xgmi.num_physical_nodes > 1) 1962 amdgpu_xgmi_remove_device(adev); 1963 1964 amdgpu_amdkfd_device_fini(adev); 1965 1966 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 1967 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 1968 1969 /* need to disable SMC first */ 1970 for (i = 0; i < adev->num_ip_blocks; i++) { 1971 if (!adev->ip_blocks[i].status.hw) 1972 continue; 1973 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 1974 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 1975 /* XXX handle errors */ 1976 if (r) { 1977 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 1978 adev->ip_blocks[i].version->funcs->name, r); 1979 } 1980 adev->ip_blocks[i].status.hw = false; 1981 break; 1982 } 1983 } 1984 1985 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1986 if (!adev->ip_blocks[i].status.hw) 1987 continue; 1988 1989 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 1990 /* XXX handle errors */ 1991 if (r) { 1992 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 1993 adev->ip_blocks[i].version->funcs->name, r); 1994 } 1995 1996 adev->ip_blocks[i].status.hw = false; 1997 } 1998 1999 2000 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2001 if (!adev->ip_blocks[i].status.sw) 2002 continue; 2003 2004 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2005 amdgpu_ucode_free_bo(adev); 2006 amdgpu_free_static_csa(&adev->virt.csa_obj); 2007 amdgpu_device_wb_fini(adev); 2008 amdgpu_device_vram_scratch_fini(adev); 2009 amdgpu_ib_pool_fini(adev); 2010 } 2011 2012 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 2013 /* XXX handle errors */ 2014 if (r) { 2015 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 2016 adev->ip_blocks[i].version->funcs->name, r); 2017 } 2018 adev->ip_blocks[i].status.sw = false; 2019 adev->ip_blocks[i].status.valid = false; 2020 } 2021 2022 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2023 if (!adev->ip_blocks[i].status.late_initialized) 2024 continue; 2025 if (adev->ip_blocks[i].version->funcs->late_fini) 2026 adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 2027 adev->ip_blocks[i].status.late_initialized = false; 2028 } 2029 2030 amdgpu_ras_fini(adev); 2031 2032 if (amdgpu_sriov_vf(adev)) 2033 if (amdgpu_virt_release_full_gpu(adev, false)) 2034 DRM_ERROR("failed to release exclusive mode on fini\n"); 2035 2036 return 0; 2037 } 2038 2039 /** 2040 * amdgpu_device_delayed_init_work_handler - work handler for IB tests 2041 * 2042 * @work: work_struct. 2043 */ 2044 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) 2045 { 2046 struct amdgpu_device *adev = 2047 container_of(work, struct amdgpu_device, delayed_init_work.work); 2048 int r; 2049 2050 r = amdgpu_ib_ring_tests(adev); 2051 if (r) 2052 DRM_ERROR("ib ring test failed (%d).\n", r); 2053 } 2054 2055 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 2056 { 2057 struct amdgpu_device *adev = 2058 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 2059 2060 mutex_lock(&adev->gfx.gfx_off_mutex); 2061 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { 2062 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 2063 adev->gfx.gfx_off_state = true; 2064 } 2065 mutex_unlock(&adev->gfx.gfx_off_mutex); 2066 } 2067 2068 /** 2069 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 2070 * 2071 * @adev: amdgpu_device pointer 2072 * 2073 * Main suspend function for hardware IPs. The list of all the hardware 2074 * IPs that make up the asic is walked, clockgating is disabled and the 2075 * suspend callbacks are run. suspend puts the hardware and software state 2076 * in each IP into a state suitable for suspend. 2077 * Returns 0 on success, negative error code on failure. 2078 */ 2079 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 2080 { 2081 int i, r; 2082 2083 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2084 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2085 2086 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2087 if (!adev->ip_blocks[i].status.valid) 2088 continue; 2089 /* displays are handled separately */ 2090 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { 2091 /* XXX handle errors */ 2092 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2093 /* XXX handle errors */ 2094 if (r) { 2095 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2096 adev->ip_blocks[i].version->funcs->name, r); 2097 } 2098 } 2099 } 2100 2101 return 0; 2102 } 2103 2104 /** 2105 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 2106 * 2107 * @adev: amdgpu_device pointer 2108 * 2109 * Main suspend function for hardware IPs. The list of all the hardware 2110 * IPs that make up the asic is walked, clockgating is disabled and the 2111 * suspend callbacks are run. suspend puts the hardware and software state 2112 * in each IP into a state suitable for suspend. 2113 * Returns 0 on success, negative error code on failure. 2114 */ 2115 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 2116 { 2117 int i, r; 2118 2119 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2120 if (!adev->ip_blocks[i].status.valid) 2121 continue; 2122 /* displays are handled in phase1 */ 2123 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 2124 continue; 2125 /* XXX handle errors */ 2126 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2127 /* XXX handle errors */ 2128 if (r) { 2129 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2130 adev->ip_blocks[i].version->funcs->name, r); 2131 } 2132 } 2133 2134 return 0; 2135 } 2136 2137 /** 2138 * amdgpu_device_ip_suspend - run suspend for hardware IPs 2139 * 2140 * @adev: amdgpu_device pointer 2141 * 2142 * Main suspend function for hardware IPs. The list of all the hardware 2143 * IPs that make up the asic is walked, clockgating is disabled and the 2144 * suspend callbacks are run. suspend puts the hardware and software state 2145 * in each IP into a state suitable for suspend. 2146 * Returns 0 on success, negative error code on failure. 2147 */ 2148 int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 2149 { 2150 int r; 2151 2152 if (amdgpu_sriov_vf(adev)) 2153 amdgpu_virt_request_full_gpu(adev, false); 2154 2155 r = amdgpu_device_ip_suspend_phase1(adev); 2156 if (r) 2157 return r; 2158 r = amdgpu_device_ip_suspend_phase2(adev); 2159 2160 if (amdgpu_sriov_vf(adev)) 2161 amdgpu_virt_release_full_gpu(adev, false); 2162 2163 return r; 2164 } 2165 2166 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 2167 { 2168 int i, r; 2169 2170 static enum amd_ip_block_type ip_order[] = { 2171 AMD_IP_BLOCK_TYPE_GMC, 2172 AMD_IP_BLOCK_TYPE_COMMON, 2173 AMD_IP_BLOCK_TYPE_PSP, 2174 AMD_IP_BLOCK_TYPE_IH, 2175 }; 2176 2177 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2178 int j; 2179 struct amdgpu_ip_block *block; 2180 2181 for (j = 0; j < adev->num_ip_blocks; j++) { 2182 block = &adev->ip_blocks[j]; 2183 2184 if (block->version->type != ip_order[i] || 2185 !block->status.valid) 2186 continue; 2187 2188 r = block->version->funcs->hw_init(adev); 2189 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2190 if (r) 2191 return r; 2192 } 2193 } 2194 2195 return 0; 2196 } 2197 2198 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 2199 { 2200 int i, r; 2201 2202 static enum amd_ip_block_type ip_order[] = { 2203 AMD_IP_BLOCK_TYPE_SMC, 2204 AMD_IP_BLOCK_TYPE_DCE, 2205 AMD_IP_BLOCK_TYPE_GFX, 2206 AMD_IP_BLOCK_TYPE_SDMA, 2207 AMD_IP_BLOCK_TYPE_UVD, 2208 AMD_IP_BLOCK_TYPE_VCE 2209 }; 2210 2211 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2212 int j; 2213 struct amdgpu_ip_block *block; 2214 2215 for (j = 0; j < adev->num_ip_blocks; j++) { 2216 block = &adev->ip_blocks[j]; 2217 2218 if (block->version->type != ip_order[i] || 2219 !block->status.valid) 2220 continue; 2221 2222 r = block->version->funcs->hw_init(adev); 2223 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2224 if (r) 2225 return r; 2226 } 2227 } 2228 2229 return 0; 2230 } 2231 2232 /** 2233 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 2234 * 2235 * @adev: amdgpu_device pointer 2236 * 2237 * First resume function for hardware IPs. The list of all the hardware 2238 * IPs that make up the asic is walked and the resume callbacks are run for 2239 * COMMON, GMC, and IH. resume puts the hardware into a functional state 2240 * after a suspend and updates the software state as necessary. This 2241 * function is also used for restoring the GPU after a GPU reset. 2242 * Returns 0 on success, negative error code on failure. 2243 */ 2244 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 2245 { 2246 int i, r; 2247 2248 for (i = 0; i < adev->num_ip_blocks; i++) { 2249 if (!adev->ip_blocks[i].status.valid) 2250 continue; 2251 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2252 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2253 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2254 r = adev->ip_blocks[i].version->funcs->resume(adev); 2255 if (r) { 2256 DRM_ERROR("resume of IP block <%s> failed %d\n", 2257 adev->ip_blocks[i].version->funcs->name, r); 2258 return r; 2259 } 2260 } 2261 } 2262 2263 return 0; 2264 } 2265 2266 /** 2267 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 2268 * 2269 * @adev: amdgpu_device pointer 2270 * 2271 * First resume function for hardware IPs. The list of all the hardware 2272 * IPs that make up the asic is walked and the resume callbacks are run for 2273 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 2274 * functional state after a suspend and updates the software state as 2275 * necessary. This function is also used for restoring the GPU after a GPU 2276 * reset. 2277 * Returns 0 on success, negative error code on failure. 2278 */ 2279 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 2280 { 2281 int i, r; 2282 2283 for (i = 0; i < adev->num_ip_blocks; i++) { 2284 if (!adev->ip_blocks[i].status.valid) 2285 continue; 2286 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2287 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2288 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 2289 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 2290 continue; 2291 r = adev->ip_blocks[i].version->funcs->resume(adev); 2292 if (r) { 2293 DRM_ERROR("resume of IP block <%s> failed %d\n", 2294 adev->ip_blocks[i].version->funcs->name, r); 2295 return r; 2296 } 2297 } 2298 2299 return 0; 2300 } 2301 2302 /** 2303 * amdgpu_device_ip_resume - run resume for hardware IPs 2304 * 2305 * @adev: amdgpu_device pointer 2306 * 2307 * Main resume function for hardware IPs. The hardware IPs 2308 * are split into two resume functions because they are 2309 * are also used in in recovering from a GPU reset and some additional 2310 * steps need to be take between them. In this case (S3/S4) they are 2311 * run sequentially. 2312 * Returns 0 on success, negative error code on failure. 2313 */ 2314 static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 2315 { 2316 int r; 2317 2318 r = amdgpu_device_ip_resume_phase1(adev); 2319 if (r) 2320 return r; 2321 2322 r = amdgpu_device_fw_loading(adev); 2323 if (r) 2324 return r; 2325 2326 r = amdgpu_device_ip_resume_phase2(adev); 2327 2328 return r; 2329 } 2330 2331 /** 2332 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 2333 * 2334 * @adev: amdgpu_device pointer 2335 * 2336 * Query the VBIOS data tables to determine if the board supports SR-IOV. 2337 */ 2338 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 2339 { 2340 if (amdgpu_sriov_vf(adev)) { 2341 if (adev->is_atom_fw) { 2342 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) 2343 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2344 } else { 2345 if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 2346 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2347 } 2348 2349 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 2350 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 2351 } 2352 } 2353 2354 /** 2355 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 2356 * 2357 * @asic_type: AMD asic type 2358 * 2359 * Check if there is DC (new modesetting infrastructre) support for an asic. 2360 * returns true if DC has support, false if not. 2361 */ 2362 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 2363 { 2364 switch (asic_type) { 2365 #if defined(CONFIG_DRM_AMD_DC) 2366 case CHIP_BONAIRE: 2367 case CHIP_KAVERI: 2368 case CHIP_KABINI: 2369 case CHIP_MULLINS: 2370 /* 2371 * We have systems in the wild with these ASICs that require 2372 * LVDS and VGA support which is not supported with DC. 2373 * 2374 * Fallback to the non-DC driver here by default so as not to 2375 * cause regressions. 2376 */ 2377 return amdgpu_dc > 0; 2378 case CHIP_HAWAII: 2379 case CHIP_CARRIZO: 2380 case CHIP_STONEY: 2381 case CHIP_POLARIS10: 2382 case CHIP_POLARIS11: 2383 case CHIP_POLARIS12: 2384 case CHIP_VEGAM: 2385 case CHIP_TONGA: 2386 case CHIP_FIJI: 2387 case CHIP_VEGA10: 2388 case CHIP_VEGA12: 2389 case CHIP_VEGA20: 2390 #if defined(CONFIG_DRM_AMD_DC_DCN1_0) 2391 case CHIP_RAVEN: 2392 #endif 2393 return amdgpu_dc != 0; 2394 #endif 2395 default: 2396 return false; 2397 } 2398 } 2399 2400 /** 2401 * amdgpu_device_has_dc_support - check if dc is supported 2402 * 2403 * @adev: amdgpu_device_pointer 2404 * 2405 * Returns true for supported, false for not supported 2406 */ 2407 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 2408 { 2409 if (amdgpu_sriov_vf(adev)) 2410 return false; 2411 2412 return amdgpu_device_asic_has_dc_support(adev->asic_type); 2413 } 2414 2415 2416 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) 2417 { 2418 struct amdgpu_device *adev = 2419 container_of(__work, struct amdgpu_device, xgmi_reset_work); 2420 2421 adev->asic_reset_res = amdgpu_asic_reset(adev); 2422 if (adev->asic_reset_res) 2423 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", 2424 adev->asic_reset_res, adev->ddev->unique); 2425 } 2426 2427 2428 /** 2429 * amdgpu_device_init - initialize the driver 2430 * 2431 * @adev: amdgpu_device pointer 2432 * @ddev: drm dev pointer 2433 * @pdev: pci dev pointer 2434 * @flags: driver flags 2435 * 2436 * Initializes the driver info and hw (all asics). 2437 * Returns 0 for success or an error on failure. 2438 * Called at driver startup. 2439 */ 2440 int amdgpu_device_init(struct amdgpu_device *adev, 2441 struct drm_device *ddev, 2442 struct pci_dev *pdev, 2443 uint32_t flags) 2444 { 2445 int r, i; 2446 bool runtime = false; 2447 u32 max_MBps; 2448 2449 adev->shutdown = false; 2450 adev->dev = &pdev->dev; 2451 adev->ddev = ddev; 2452 adev->pdev = pdev; 2453 adev->flags = flags; 2454 adev->asic_type = flags & AMD_ASIC_MASK; 2455 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 2456 if (amdgpu_emu_mode == 1) 2457 adev->usec_timeout *= 2; 2458 adev->gmc.gart_size = 512 * 1024 * 1024; 2459 adev->accel_working = false; 2460 adev->num_rings = 0; 2461 adev->mman.buffer_funcs = NULL; 2462 adev->mman.buffer_funcs_ring = NULL; 2463 adev->vm_manager.vm_pte_funcs = NULL; 2464 adev->vm_manager.vm_pte_num_rqs = 0; 2465 adev->gmc.gmc_funcs = NULL; 2466 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2467 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 2468 2469 adev->smc_rreg = &amdgpu_invalid_rreg; 2470 adev->smc_wreg = &amdgpu_invalid_wreg; 2471 adev->pcie_rreg = &amdgpu_invalid_rreg; 2472 adev->pcie_wreg = &amdgpu_invalid_wreg; 2473 adev->pciep_rreg = &amdgpu_invalid_rreg; 2474 adev->pciep_wreg = &amdgpu_invalid_wreg; 2475 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 2476 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 2477 adev->didt_rreg = &amdgpu_invalid_rreg; 2478 adev->didt_wreg = &amdgpu_invalid_wreg; 2479 adev->gc_cac_rreg = &amdgpu_invalid_rreg; 2480 adev->gc_cac_wreg = &amdgpu_invalid_wreg; 2481 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 2482 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 2483 2484 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 2485 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 2486 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 2487 2488 /* mutex initialization are all done here so we 2489 * can recall function without having locking issues */ 2490 atomic_set(&adev->irq.ih.lock, 0); 2491 mutex_init(&adev->firmware.mutex); 2492 mutex_init(&adev->pm.mutex); 2493 mutex_init(&adev->gfx.gpu_clock_mutex); 2494 mutex_init(&adev->srbm_mutex); 2495 mutex_init(&adev->gfx.pipe_reserve_mutex); 2496 mutex_init(&adev->gfx.gfx_off_mutex); 2497 mutex_init(&adev->grbm_idx_mutex); 2498 mutex_init(&adev->mn_lock); 2499 mutex_init(&adev->virt.vf_errors.lock); 2500 hash_init(adev->mn_hash); 2501 mutex_init(&adev->lock_reset); 2502 mutex_init(&adev->virt.dpm_mutex); 2503 2504 r = amdgpu_device_check_arguments(adev); 2505 if (r) 2506 return r; 2507 2508 spin_lock_init(&adev->mmio_idx_lock); 2509 spin_lock_init(&adev->smc_idx_lock); 2510 spin_lock_init(&adev->pcie_idx_lock); 2511 spin_lock_init(&adev->uvd_ctx_idx_lock); 2512 spin_lock_init(&adev->didt_idx_lock); 2513 spin_lock_init(&adev->gc_cac_idx_lock); 2514 spin_lock_init(&adev->se_cac_idx_lock); 2515 spin_lock_init(&adev->audio_endpt_idx_lock); 2516 spin_lock_init(&adev->mm_stats.lock); 2517 2518 INIT_LIST_HEAD(&adev->shadow_list); 2519 mutex_init(&adev->shadow_list_lock); 2520 2521 INIT_LIST_HEAD(&adev->ring_lru_list); 2522 spin_lock_init(&adev->ring_lru_list_lock); 2523 2524 INIT_DELAYED_WORK(&adev->delayed_init_work, 2525 amdgpu_device_delayed_init_work_handler); 2526 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 2527 amdgpu_device_delay_enable_gfx_off); 2528 2529 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); 2530 2531 adev->gfx.gfx_off_req_count = 1; 2532 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; 2533 2534 /* Registers mapping */ 2535 /* TODO: block userspace mapping of io register */ 2536 if (adev->asic_type >= CHIP_BONAIRE) { 2537 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 2538 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 2539 } else { 2540 adev->rmmio_base = pci_resource_start(adev->pdev, 2); 2541 adev->rmmio_size = pci_resource_len(adev->pdev, 2); 2542 } 2543 2544 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 2545 if (adev->rmmio == NULL) { 2546 return -ENOMEM; 2547 } 2548 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 2549 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); 2550 2551 /* io port mapping */ 2552 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 2553 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) { 2554 adev->rio_mem_size = pci_resource_len(adev->pdev, i); 2555 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size); 2556 break; 2557 } 2558 } 2559 if (adev->rio_mem == NULL) 2560 DRM_INFO("PCI I/O BAR is not found.\n"); 2561 2562 amdgpu_device_get_pcie_info(adev); 2563 2564 /* early init functions */ 2565 r = amdgpu_device_ip_early_init(adev); 2566 if (r) 2567 return r; 2568 2569 /* doorbell bar mapping and doorbell index init*/ 2570 amdgpu_device_doorbell_init(adev); 2571 2572 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 2573 /* this will fail for cards that aren't VGA class devices, just 2574 * ignore it */ 2575 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); 2576 2577 if (amdgpu_device_is_px(ddev)) 2578 runtime = true; 2579 if (!pci_is_thunderbolt_attached(adev->pdev)) 2580 vga_switcheroo_register_client(adev->pdev, 2581 &amdgpu_switcheroo_ops, runtime); 2582 if (runtime) 2583 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 2584 2585 if (amdgpu_emu_mode == 1) { 2586 /* post the asic on emulation mode */ 2587 emu_soc_asic_init(adev); 2588 goto fence_driver_init; 2589 } 2590 2591 /* detect if we are with an SRIOV vbios */ 2592 amdgpu_device_detect_sriov_bios(adev); 2593 2594 /* check if we need to reset the asic 2595 * E.g., driver was not cleanly unloaded previously, etc. 2596 */ 2597 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { 2598 r = amdgpu_asic_reset(adev); 2599 if (r) { 2600 dev_err(adev->dev, "asic reset on init failed\n"); 2601 goto failed; 2602 } 2603 } 2604 2605 /* Post card if necessary */ 2606 if (amdgpu_device_need_post(adev)) { 2607 if (!adev->bios) { 2608 dev_err(adev->dev, "no vBIOS found\n"); 2609 r = -EINVAL; 2610 goto failed; 2611 } 2612 DRM_INFO("GPU posting now...\n"); 2613 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 2614 if (r) { 2615 dev_err(adev->dev, "gpu post error!\n"); 2616 goto failed; 2617 } 2618 } 2619 2620 if (adev->is_atom_fw) { 2621 /* Initialize clocks */ 2622 r = amdgpu_atomfirmware_get_clock_info(adev); 2623 if (r) { 2624 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 2625 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 2626 goto failed; 2627 } 2628 } else { 2629 /* Initialize clocks */ 2630 r = amdgpu_atombios_get_clock_info(adev); 2631 if (r) { 2632 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 2633 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 2634 goto failed; 2635 } 2636 /* init i2c buses */ 2637 if (!amdgpu_device_has_dc_support(adev)) 2638 amdgpu_atombios_i2c_init(adev); 2639 } 2640 2641 fence_driver_init: 2642 /* Fence driver */ 2643 r = amdgpu_fence_driver_init(adev); 2644 if (r) { 2645 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); 2646 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 2647 goto failed; 2648 } 2649 2650 /* init the mode config */ 2651 drm_mode_config_init(adev->ddev); 2652 2653 r = amdgpu_device_ip_init(adev); 2654 if (r) { 2655 /* failed in exclusive mode due to timeout */ 2656 if (amdgpu_sriov_vf(adev) && 2657 !amdgpu_sriov_runtime(adev) && 2658 amdgpu_virt_mmio_blocked(adev) && 2659 !amdgpu_virt_wait_reset(adev)) { 2660 dev_err(adev->dev, "VF exclusive mode timeout\n"); 2661 /* Don't send request since VF is inactive. */ 2662 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 2663 adev->virt.ops = NULL; 2664 r = -EAGAIN; 2665 goto failed; 2666 } 2667 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 2668 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 2669 if (amdgpu_virt_request_full_gpu(adev, false)) 2670 amdgpu_virt_release_full_gpu(adev, false); 2671 goto failed; 2672 } 2673 2674 adev->accel_working = true; 2675 2676 amdgpu_vm_check_compute_bug(adev); 2677 2678 /* Initialize the buffer migration limit. */ 2679 if (amdgpu_moverate >= 0) 2680 max_MBps = amdgpu_moverate; 2681 else 2682 max_MBps = 8; /* Allow 8 MB/s. */ 2683 /* Get a log2 for easy divisions. */ 2684 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 2685 2686 amdgpu_fbdev_init(adev); 2687 2688 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)) 2689 amdgpu_pm_virt_sysfs_init(adev); 2690 2691 r = amdgpu_pm_sysfs_init(adev); 2692 if (r) 2693 DRM_ERROR("registering pm debugfs failed (%d).\n", r); 2694 2695 r = amdgpu_ucode_sysfs_init(adev); 2696 if (r) 2697 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); 2698 2699 r = amdgpu_debugfs_gem_init(adev); 2700 if (r) 2701 DRM_ERROR("registering gem debugfs failed (%d).\n", r); 2702 2703 r = amdgpu_debugfs_regs_init(adev); 2704 if (r) 2705 DRM_ERROR("registering register debugfs failed (%d).\n", r); 2706 2707 r = amdgpu_debugfs_firmware_init(adev); 2708 if (r) 2709 DRM_ERROR("registering firmware debugfs failed (%d).\n", r); 2710 2711 r = amdgpu_debugfs_init(adev); 2712 if (r) 2713 DRM_ERROR("Creating debugfs files failed (%d).\n", r); 2714 2715 if ((amdgpu_testing & 1)) { 2716 if (adev->accel_working) 2717 amdgpu_test_moves(adev); 2718 else 2719 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); 2720 } 2721 if (amdgpu_benchmarking) { 2722 if (adev->accel_working) 2723 amdgpu_benchmark(adev, amdgpu_benchmarking); 2724 else 2725 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); 2726 } 2727 2728 /* enable clockgating, etc. after ib tests, etc. since some blocks require 2729 * explicit gating rather than handling it automatically. 2730 */ 2731 r = amdgpu_device_ip_late_init(adev); 2732 if (r) { 2733 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 2734 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 2735 goto failed; 2736 } 2737 2738 /* must succeed. */ 2739 amdgpu_ras_resume(adev); 2740 2741 queue_delayed_work(system_wq, &adev->delayed_init_work, 2742 msecs_to_jiffies(AMDGPU_RESUME_MS)); 2743 2744 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count); 2745 if (r) { 2746 dev_err(adev->dev, "Could not create pcie_replay_count"); 2747 return r; 2748 } 2749 2750 r = amdgpu_pmu_init(adev); 2751 if (r) 2752 dev_err(adev->dev, "amdgpu_pmu_init failed\n"); 2753 2754 return 0; 2755 2756 failed: 2757 amdgpu_vf_error_trans_all(adev); 2758 if (runtime) 2759 vga_switcheroo_fini_domain_pm_ops(adev->dev); 2760 2761 return r; 2762 } 2763 2764 /** 2765 * amdgpu_device_fini - tear down the driver 2766 * 2767 * @adev: amdgpu_device pointer 2768 * 2769 * Tear down the driver info (all asics). 2770 * Called at driver shutdown. 2771 */ 2772 void amdgpu_device_fini(struct amdgpu_device *adev) 2773 { 2774 int r; 2775 2776 DRM_INFO("amdgpu: finishing device.\n"); 2777 adev->shutdown = true; 2778 /* disable all interrupts */ 2779 amdgpu_irq_disable_all(adev); 2780 if (adev->mode_info.mode_config_initialized){ 2781 if (!amdgpu_device_has_dc_support(adev)) 2782 drm_helper_force_disable_all(adev->ddev); 2783 else 2784 drm_atomic_helper_shutdown(adev->ddev); 2785 } 2786 amdgpu_fence_driver_fini(adev); 2787 amdgpu_pm_sysfs_fini(adev); 2788 amdgpu_fbdev_fini(adev); 2789 r = amdgpu_device_ip_fini(adev); 2790 if (adev->firmware.gpu_info_fw) { 2791 release_firmware(adev->firmware.gpu_info_fw); 2792 adev->firmware.gpu_info_fw = NULL; 2793 } 2794 adev->accel_working = false; 2795 cancel_delayed_work_sync(&adev->delayed_init_work); 2796 /* free i2c buses */ 2797 if (!amdgpu_device_has_dc_support(adev)) 2798 amdgpu_i2c_fini(adev); 2799 2800 if (amdgpu_emu_mode != 1) 2801 amdgpu_atombios_fini(adev); 2802 2803 kfree(adev->bios); 2804 adev->bios = NULL; 2805 if (!pci_is_thunderbolt_attached(adev->pdev)) 2806 vga_switcheroo_unregister_client(adev->pdev); 2807 if (adev->flags & AMD_IS_PX) 2808 vga_switcheroo_fini_domain_pm_ops(adev->dev); 2809 vga_client_register(adev->pdev, NULL, NULL, NULL); 2810 if (adev->rio_mem) 2811 pci_iounmap(adev->pdev, adev->rio_mem); 2812 adev->rio_mem = NULL; 2813 iounmap(adev->rmmio); 2814 adev->rmmio = NULL; 2815 amdgpu_device_doorbell_fini(adev); 2816 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)) 2817 amdgpu_pm_virt_sysfs_fini(adev); 2818 2819 amdgpu_debugfs_regs_cleanup(adev); 2820 device_remove_file(adev->dev, &dev_attr_pcie_replay_count); 2821 amdgpu_ucode_sysfs_fini(adev); 2822 amdgpu_pmu_fini(adev); 2823 } 2824 2825 2826 /* 2827 * Suspend & resume. 2828 */ 2829 /** 2830 * amdgpu_device_suspend - initiate device suspend 2831 * 2832 * @dev: drm dev pointer 2833 * @suspend: suspend state 2834 * @fbcon : notify the fbdev of suspend 2835 * 2836 * Puts the hw in the suspend state (all asics). 2837 * Returns 0 for success or an error on failure. 2838 * Called at driver suspend. 2839 */ 2840 int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) 2841 { 2842 struct amdgpu_device *adev; 2843 struct drm_crtc *crtc; 2844 struct drm_connector *connector; 2845 int r; 2846 2847 if (dev == NULL || dev->dev_private == NULL) { 2848 return -ENODEV; 2849 } 2850 2851 adev = dev->dev_private; 2852 2853 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 2854 return 0; 2855 2856 adev->in_suspend = true; 2857 drm_kms_helper_poll_disable(dev); 2858 2859 if (fbcon) 2860 amdgpu_fbdev_set_suspend(adev, 1); 2861 2862 cancel_delayed_work_sync(&adev->delayed_init_work); 2863 2864 if (!amdgpu_device_has_dc_support(adev)) { 2865 /* turn off display hw */ 2866 drm_modeset_lock_all(dev); 2867 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 2868 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); 2869 } 2870 drm_modeset_unlock_all(dev); 2871 /* unpin the front buffers and cursors */ 2872 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 2873 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2874 struct drm_framebuffer *fb = crtc->primary->fb; 2875 struct amdgpu_bo *robj; 2876 2877 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { 2878 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2879 r = amdgpu_bo_reserve(aobj, true); 2880 if (r == 0) { 2881 amdgpu_bo_unpin(aobj); 2882 amdgpu_bo_unreserve(aobj); 2883 } 2884 } 2885 2886 if (fb == NULL || fb->obj[0] == NULL) { 2887 continue; 2888 } 2889 robj = gem_to_amdgpu_bo(fb->obj[0]); 2890 /* don't unpin kernel fb objects */ 2891 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { 2892 r = amdgpu_bo_reserve(robj, true); 2893 if (r == 0) { 2894 amdgpu_bo_unpin(robj); 2895 amdgpu_bo_unreserve(robj); 2896 } 2897 } 2898 } 2899 } 2900 2901 amdgpu_amdkfd_suspend(adev); 2902 2903 amdgpu_ras_suspend(adev); 2904 2905 r = amdgpu_device_ip_suspend_phase1(adev); 2906 2907 /* evict vram memory */ 2908 amdgpu_bo_evict_vram(adev); 2909 2910 amdgpu_fence_driver_suspend(adev); 2911 2912 r = amdgpu_device_ip_suspend_phase2(adev); 2913 2914 /* evict remaining vram memory 2915 * This second call to evict vram is to evict the gart page table 2916 * using the CPU. 2917 */ 2918 amdgpu_bo_evict_vram(adev); 2919 2920 pci_save_state(dev->pdev); 2921 if (suspend) { 2922 /* Shut down the device */ 2923 pci_disable_device(dev->pdev); 2924 pci_set_power_state(dev->pdev, PCI_D3hot); 2925 } else { 2926 r = amdgpu_asic_reset(adev); 2927 if (r) 2928 DRM_ERROR("amdgpu asic reset failed\n"); 2929 } 2930 2931 return 0; 2932 } 2933 2934 /** 2935 * amdgpu_device_resume - initiate device resume 2936 * 2937 * @dev: drm dev pointer 2938 * @resume: resume state 2939 * @fbcon : notify the fbdev of resume 2940 * 2941 * Bring the hw back to operating state (all asics). 2942 * Returns 0 for success or an error on failure. 2943 * Called at driver resume. 2944 */ 2945 int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) 2946 { 2947 struct drm_connector *connector; 2948 struct amdgpu_device *adev = dev->dev_private; 2949 struct drm_crtc *crtc; 2950 int r = 0; 2951 2952 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 2953 return 0; 2954 2955 if (resume) { 2956 pci_set_power_state(dev->pdev, PCI_D0); 2957 pci_restore_state(dev->pdev); 2958 r = pci_enable_device(dev->pdev); 2959 if (r) 2960 return r; 2961 } 2962 2963 /* post card */ 2964 if (amdgpu_device_need_post(adev)) { 2965 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 2966 if (r) 2967 DRM_ERROR("amdgpu asic init failed\n"); 2968 } 2969 2970 r = amdgpu_device_ip_resume(adev); 2971 if (r) { 2972 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r); 2973 return r; 2974 } 2975 amdgpu_fence_driver_resume(adev); 2976 2977 2978 r = amdgpu_device_ip_late_init(adev); 2979 if (r) 2980 return r; 2981 2982 queue_delayed_work(system_wq, &adev->delayed_init_work, 2983 msecs_to_jiffies(AMDGPU_RESUME_MS)); 2984 2985 if (!amdgpu_device_has_dc_support(adev)) { 2986 /* pin cursors */ 2987 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 2988 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2989 2990 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { 2991 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2992 r = amdgpu_bo_reserve(aobj, true); 2993 if (r == 0) { 2994 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 2995 if (r != 0) 2996 DRM_ERROR("Failed to pin cursor BO (%d)\n", r); 2997 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); 2998 amdgpu_bo_unreserve(aobj); 2999 } 3000 } 3001 } 3002 } 3003 r = amdgpu_amdkfd_resume(adev); 3004 if (r) 3005 return r; 3006 3007 /* Make sure IB tests flushed */ 3008 flush_delayed_work(&adev->delayed_init_work); 3009 3010 /* blat the mode back in */ 3011 if (fbcon) { 3012 if (!amdgpu_device_has_dc_support(adev)) { 3013 /* pre DCE11 */ 3014 drm_helper_resume_force_mode(dev); 3015 3016 /* turn on display hw */ 3017 drm_modeset_lock_all(dev); 3018 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 3019 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); 3020 } 3021 drm_modeset_unlock_all(dev); 3022 } 3023 amdgpu_fbdev_set_suspend(adev, 0); 3024 } 3025 3026 drm_kms_helper_poll_enable(dev); 3027 3028 amdgpu_ras_resume(adev); 3029 3030 /* 3031 * Most of the connector probing functions try to acquire runtime pm 3032 * refs to ensure that the GPU is powered on when connector polling is 3033 * performed. Since we're calling this from a runtime PM callback, 3034 * trying to acquire rpm refs will cause us to deadlock. 3035 * 3036 * Since we're guaranteed to be holding the rpm lock, it's safe to 3037 * temporarily disable the rpm helpers so this doesn't deadlock us. 3038 */ 3039 #ifdef CONFIG_PM 3040 dev->dev->power.disable_depth++; 3041 #endif 3042 if (!amdgpu_device_has_dc_support(adev)) 3043 drm_helper_hpd_irq_event(dev); 3044 else 3045 drm_kms_helper_hotplug_event(dev); 3046 #ifdef CONFIG_PM 3047 dev->dev->power.disable_depth--; 3048 #endif 3049 adev->in_suspend = false; 3050 3051 return 0; 3052 } 3053 3054 /** 3055 * amdgpu_device_ip_check_soft_reset - did soft reset succeed 3056 * 3057 * @adev: amdgpu_device pointer 3058 * 3059 * The list of all the hardware IPs that make up the asic is walked and 3060 * the check_soft_reset callbacks are run. check_soft_reset determines 3061 * if the asic is still hung or not. 3062 * Returns true if any of the IPs are still in a hung state, false if not. 3063 */ 3064 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 3065 { 3066 int i; 3067 bool asic_hang = false; 3068 3069 if (amdgpu_sriov_vf(adev)) 3070 return true; 3071 3072 if (amdgpu_asic_need_full_reset(adev)) 3073 return true; 3074 3075 for (i = 0; i < adev->num_ip_blocks; i++) { 3076 if (!adev->ip_blocks[i].status.valid) 3077 continue; 3078 if (adev->ip_blocks[i].version->funcs->check_soft_reset) 3079 adev->ip_blocks[i].status.hang = 3080 adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 3081 if (adev->ip_blocks[i].status.hang) { 3082 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 3083 asic_hang = true; 3084 } 3085 } 3086 return asic_hang; 3087 } 3088 3089 /** 3090 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 3091 * 3092 * @adev: amdgpu_device pointer 3093 * 3094 * The list of all the hardware IPs that make up the asic is walked and the 3095 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 3096 * handles any IP specific hardware or software state changes that are 3097 * necessary for a soft reset to succeed. 3098 * Returns 0 on success, negative error code on failure. 3099 */ 3100 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 3101 { 3102 int i, r = 0; 3103 3104 for (i = 0; i < adev->num_ip_blocks; i++) { 3105 if (!adev->ip_blocks[i].status.valid) 3106 continue; 3107 if (adev->ip_blocks[i].status.hang && 3108 adev->ip_blocks[i].version->funcs->pre_soft_reset) { 3109 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 3110 if (r) 3111 return r; 3112 } 3113 } 3114 3115 return 0; 3116 } 3117 3118 /** 3119 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 3120 * 3121 * @adev: amdgpu_device pointer 3122 * 3123 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 3124 * reset is necessary to recover. 3125 * Returns true if a full asic reset is required, false if not. 3126 */ 3127 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 3128 { 3129 int i; 3130 3131 if (amdgpu_asic_need_full_reset(adev)) 3132 return true; 3133 3134 for (i = 0; i < adev->num_ip_blocks; i++) { 3135 if (!adev->ip_blocks[i].status.valid) 3136 continue; 3137 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 3138 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 3139 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 3140 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 3141 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 3142 if (adev->ip_blocks[i].status.hang) { 3143 DRM_INFO("Some block need full reset!\n"); 3144 return true; 3145 } 3146 } 3147 } 3148 return false; 3149 } 3150 3151 /** 3152 * amdgpu_device_ip_soft_reset - do a soft reset 3153 * 3154 * @adev: amdgpu_device pointer 3155 * 3156 * The list of all the hardware IPs that make up the asic is walked and the 3157 * soft_reset callbacks are run if the block is hung. soft_reset handles any 3158 * IP specific hardware or software state changes that are necessary to soft 3159 * reset the IP. 3160 * Returns 0 on success, negative error code on failure. 3161 */ 3162 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 3163 { 3164 int i, r = 0; 3165 3166 for (i = 0; i < adev->num_ip_blocks; i++) { 3167 if (!adev->ip_blocks[i].status.valid) 3168 continue; 3169 if (adev->ip_blocks[i].status.hang && 3170 adev->ip_blocks[i].version->funcs->soft_reset) { 3171 r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 3172 if (r) 3173 return r; 3174 } 3175 } 3176 3177 return 0; 3178 } 3179 3180 /** 3181 * amdgpu_device_ip_post_soft_reset - clean up from soft reset 3182 * 3183 * @adev: amdgpu_device pointer 3184 * 3185 * The list of all the hardware IPs that make up the asic is walked and the 3186 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 3187 * handles any IP specific hardware or software state changes that are 3188 * necessary after the IP has been soft reset. 3189 * Returns 0 on success, negative error code on failure. 3190 */ 3191 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 3192 { 3193 int i, r = 0; 3194 3195 for (i = 0; i < adev->num_ip_blocks; i++) { 3196 if (!adev->ip_blocks[i].status.valid) 3197 continue; 3198 if (adev->ip_blocks[i].status.hang && 3199 adev->ip_blocks[i].version->funcs->post_soft_reset) 3200 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 3201 if (r) 3202 return r; 3203 } 3204 3205 return 0; 3206 } 3207 3208 /** 3209 * amdgpu_device_recover_vram - Recover some VRAM contents 3210 * 3211 * @adev: amdgpu_device pointer 3212 * 3213 * Restores the contents of VRAM buffers from the shadows in GTT. Used to 3214 * restore things like GPUVM page tables after a GPU reset where 3215 * the contents of VRAM might be lost. 3216 * 3217 * Returns: 3218 * 0 on success, negative error code on failure. 3219 */ 3220 static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 3221 { 3222 struct dma_fence *fence = NULL, *next = NULL; 3223 struct amdgpu_bo *shadow; 3224 long r = 1, tmo; 3225 3226 if (amdgpu_sriov_runtime(adev)) 3227 tmo = msecs_to_jiffies(8000); 3228 else 3229 tmo = msecs_to_jiffies(100); 3230 3231 DRM_INFO("recover vram bo from shadow start\n"); 3232 mutex_lock(&adev->shadow_list_lock); 3233 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) { 3234 3235 /* No need to recover an evicted BO */ 3236 if (shadow->tbo.mem.mem_type != TTM_PL_TT || 3237 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET || 3238 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) 3239 continue; 3240 3241 r = amdgpu_bo_restore_shadow(shadow, &next); 3242 if (r) 3243 break; 3244 3245 if (fence) { 3246 tmo = dma_fence_wait_timeout(fence, false, tmo); 3247 dma_fence_put(fence); 3248 fence = next; 3249 if (tmo == 0) { 3250 r = -ETIMEDOUT; 3251 break; 3252 } else if (tmo < 0) { 3253 r = tmo; 3254 break; 3255 } 3256 } else { 3257 fence = next; 3258 } 3259 } 3260 mutex_unlock(&adev->shadow_list_lock); 3261 3262 if (fence) 3263 tmo = dma_fence_wait_timeout(fence, false, tmo); 3264 dma_fence_put(fence); 3265 3266 if (r < 0 || tmo <= 0) { 3267 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); 3268 return -EIO; 3269 } 3270 3271 DRM_INFO("recover vram bo from shadow done\n"); 3272 return 0; 3273 } 3274 3275 3276 /** 3277 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 3278 * 3279 * @adev: amdgpu device pointer 3280 * @from_hypervisor: request from hypervisor 3281 * 3282 * do VF FLR and reinitialize Asic 3283 * return 0 means succeeded otherwise failed 3284 */ 3285 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 3286 bool from_hypervisor) 3287 { 3288 int r; 3289 3290 if (from_hypervisor) 3291 r = amdgpu_virt_request_full_gpu(adev, true); 3292 else 3293 r = amdgpu_virt_reset_gpu(adev); 3294 if (r) 3295 return r; 3296 3297 amdgpu_amdkfd_pre_reset(adev); 3298 3299 /* Resume IP prior to SMC */ 3300 r = amdgpu_device_ip_reinit_early_sriov(adev); 3301 if (r) 3302 goto error; 3303 3304 /* we need recover gart prior to run SMC/CP/SDMA resume */ 3305 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); 3306 3307 r = amdgpu_device_fw_loading(adev); 3308 if (r) 3309 return r; 3310 3311 /* now we are okay to resume SMC/CP/SDMA */ 3312 r = amdgpu_device_ip_reinit_late_sriov(adev); 3313 if (r) 3314 goto error; 3315 3316 amdgpu_irq_gpu_reset_resume_helper(adev); 3317 r = amdgpu_ib_ring_tests(adev); 3318 amdgpu_amdkfd_post_reset(adev); 3319 3320 error: 3321 amdgpu_virt_init_data_exchange(adev); 3322 amdgpu_virt_release_full_gpu(adev, true); 3323 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 3324 atomic_inc(&adev->vram_lost_counter); 3325 r = amdgpu_device_recover_vram(adev); 3326 } 3327 3328 return r; 3329 } 3330 3331 /** 3332 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 3333 * 3334 * @adev: amdgpu device pointer 3335 * 3336 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 3337 * a hung GPU. 3338 */ 3339 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 3340 { 3341 if (!amdgpu_device_ip_check_soft_reset(adev)) { 3342 DRM_INFO("Timeout, but no hardware hang detected.\n"); 3343 return false; 3344 } 3345 3346 if (amdgpu_gpu_recovery == 0) 3347 goto disabled; 3348 3349 if (amdgpu_sriov_vf(adev)) 3350 return true; 3351 3352 if (amdgpu_gpu_recovery == -1) { 3353 switch (adev->asic_type) { 3354 case CHIP_BONAIRE: 3355 case CHIP_HAWAII: 3356 case CHIP_TOPAZ: 3357 case CHIP_TONGA: 3358 case CHIP_FIJI: 3359 case CHIP_POLARIS10: 3360 case CHIP_POLARIS11: 3361 case CHIP_POLARIS12: 3362 case CHIP_VEGAM: 3363 case CHIP_VEGA20: 3364 case CHIP_VEGA10: 3365 case CHIP_VEGA12: 3366 break; 3367 default: 3368 goto disabled; 3369 } 3370 } 3371 3372 return true; 3373 3374 disabled: 3375 DRM_INFO("GPU recovery disabled.\n"); 3376 return false; 3377 } 3378 3379 3380 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, 3381 struct amdgpu_job *job, 3382 bool *need_full_reset_arg) 3383 { 3384 int i, r = 0; 3385 bool need_full_reset = *need_full_reset_arg; 3386 3387 /* block all schedulers and reset given job's ring */ 3388 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3389 struct amdgpu_ring *ring = adev->rings[i]; 3390 3391 if (!ring || !ring->sched.thread) 3392 continue; 3393 3394 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 3395 amdgpu_fence_driver_force_completion(ring); 3396 } 3397 3398 if(job) 3399 drm_sched_increase_karma(&job->base); 3400 3401 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ 3402 if (!amdgpu_sriov_vf(adev)) { 3403 3404 if (!need_full_reset) 3405 need_full_reset = amdgpu_device_ip_need_full_reset(adev); 3406 3407 if (!need_full_reset) { 3408 amdgpu_device_ip_pre_soft_reset(adev); 3409 r = amdgpu_device_ip_soft_reset(adev); 3410 amdgpu_device_ip_post_soft_reset(adev); 3411 if (r || amdgpu_device_ip_check_soft_reset(adev)) { 3412 DRM_INFO("soft reset failed, will fallback to full reset!\n"); 3413 need_full_reset = true; 3414 } 3415 } 3416 3417 if (need_full_reset) 3418 r = amdgpu_device_ip_suspend(adev); 3419 3420 *need_full_reset_arg = need_full_reset; 3421 } 3422 3423 return r; 3424 } 3425 3426 static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, 3427 struct list_head *device_list_handle, 3428 bool *need_full_reset_arg) 3429 { 3430 struct amdgpu_device *tmp_adev = NULL; 3431 bool need_full_reset = *need_full_reset_arg, vram_lost = false; 3432 int r = 0; 3433 3434 /* 3435 * ASIC reset has to be done on all HGMI hive nodes ASAP 3436 * to allow proper links negotiation in FW (within 1 sec) 3437 */ 3438 if (need_full_reset) { 3439 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 3440 /* For XGMI run all resets in parallel to speed up the process */ 3441 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 3442 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work)) 3443 r = -EALREADY; 3444 } else 3445 r = amdgpu_asic_reset(tmp_adev); 3446 3447 if (r) { 3448 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s", 3449 r, tmp_adev->ddev->unique); 3450 break; 3451 } 3452 } 3453 3454 /* For XGMI wait for all PSP resets to complete before proceed */ 3455 if (!r) { 3456 list_for_each_entry(tmp_adev, device_list_handle, 3457 gmc.xgmi.head) { 3458 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 3459 flush_work(&tmp_adev->xgmi_reset_work); 3460 r = tmp_adev->asic_reset_res; 3461 if (r) 3462 break; 3463 } 3464 } 3465 3466 list_for_each_entry(tmp_adev, device_list_handle, 3467 gmc.xgmi.head) { 3468 amdgpu_ras_reserve_bad_pages(tmp_adev); 3469 } 3470 } 3471 } 3472 3473 3474 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 3475 if (need_full_reset) { 3476 /* post card */ 3477 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) 3478 DRM_WARN("asic atom init failed!"); 3479 3480 if (!r) { 3481 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); 3482 r = amdgpu_device_ip_resume_phase1(tmp_adev); 3483 if (r) 3484 goto out; 3485 3486 vram_lost = amdgpu_device_check_vram_lost(tmp_adev); 3487 if (vram_lost) { 3488 DRM_INFO("VRAM is lost due to GPU reset!\n"); 3489 atomic_inc(&tmp_adev->vram_lost_counter); 3490 } 3491 3492 r = amdgpu_gtt_mgr_recover( 3493 &tmp_adev->mman.bdev.man[TTM_PL_TT]); 3494 if (r) 3495 goto out; 3496 3497 r = amdgpu_device_fw_loading(tmp_adev); 3498 if (r) 3499 return r; 3500 3501 r = amdgpu_device_ip_resume_phase2(tmp_adev); 3502 if (r) 3503 goto out; 3504 3505 if (vram_lost) 3506 amdgpu_device_fill_reset_magic(tmp_adev); 3507 3508 r = amdgpu_device_ip_late_init(tmp_adev); 3509 if (r) 3510 goto out; 3511 3512 /* must succeed. */ 3513 amdgpu_ras_resume(tmp_adev); 3514 3515 /* Update PSP FW topology after reset */ 3516 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) 3517 r = amdgpu_xgmi_update_topology(hive, tmp_adev); 3518 } 3519 } 3520 3521 3522 out: 3523 if (!r) { 3524 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 3525 r = amdgpu_ib_ring_tests(tmp_adev); 3526 if (r) { 3527 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); 3528 r = amdgpu_device_ip_suspend(tmp_adev); 3529 need_full_reset = true; 3530 r = -EAGAIN; 3531 goto end; 3532 } 3533 } 3534 3535 if (!r) 3536 r = amdgpu_device_recover_vram(tmp_adev); 3537 else 3538 tmp_adev->asic_reset_res = r; 3539 } 3540 3541 end: 3542 *need_full_reset_arg = need_full_reset; 3543 return r; 3544 } 3545 3546 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) 3547 { 3548 if (trylock) { 3549 if (!mutex_trylock(&adev->lock_reset)) 3550 return false; 3551 } else 3552 mutex_lock(&adev->lock_reset); 3553 3554 atomic_inc(&adev->gpu_reset_counter); 3555 adev->in_gpu_reset = 1; 3556 /* Block kfd: SRIOV would do it separately */ 3557 if (!amdgpu_sriov_vf(adev)) 3558 amdgpu_amdkfd_pre_reset(adev); 3559 3560 return true; 3561 } 3562 3563 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) 3564 { 3565 /*unlock kfd: SRIOV would do it separately */ 3566 if (!amdgpu_sriov_vf(adev)) 3567 amdgpu_amdkfd_post_reset(adev); 3568 amdgpu_vf_error_trans_all(adev); 3569 adev->in_gpu_reset = 0; 3570 mutex_unlock(&adev->lock_reset); 3571 } 3572 3573 3574 /** 3575 * amdgpu_device_gpu_recover - reset the asic and recover scheduler 3576 * 3577 * @adev: amdgpu device pointer 3578 * @job: which job trigger hang 3579 * 3580 * Attempt to reset the GPU if it has hung (all asics). 3581 * Attempt to do soft-reset or full-reset and reinitialize Asic 3582 * Returns 0 for success or an error on failure. 3583 */ 3584 3585 int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 3586 struct amdgpu_job *job) 3587 { 3588 struct list_head device_list, *device_list_handle = NULL; 3589 bool need_full_reset, job_signaled; 3590 struct amdgpu_hive_info *hive = NULL; 3591 struct amdgpu_device *tmp_adev = NULL; 3592 int i, r = 0; 3593 3594 need_full_reset = job_signaled = false; 3595 INIT_LIST_HEAD(&device_list); 3596 3597 dev_info(adev->dev, "GPU reset begin!\n"); 3598 3599 cancel_delayed_work_sync(&adev->delayed_init_work); 3600 3601 hive = amdgpu_get_xgmi_hive(adev, false); 3602 3603 /* 3604 * Here we trylock to avoid chain of resets executing from 3605 * either trigger by jobs on different adevs in XGMI hive or jobs on 3606 * different schedulers for same device while this TO handler is running. 3607 * We always reset all schedulers for device and all devices for XGMI 3608 * hive so that should take care of them too. 3609 */ 3610 3611 if (hive && !mutex_trylock(&hive->reset_lock)) { 3612 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", 3613 job->base.id, hive->hive_id); 3614 return 0; 3615 } 3616 3617 /* Start with adev pre asic reset first for soft reset check.*/ 3618 if (!amdgpu_device_lock_adev(adev, !hive)) { 3619 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", 3620 job->base.id); 3621 return 0; 3622 } 3623 3624 /* Build list of devices to reset */ 3625 if (adev->gmc.xgmi.num_physical_nodes > 1) { 3626 if (!hive) { 3627 amdgpu_device_unlock_adev(adev); 3628 return -ENODEV; 3629 } 3630 3631 /* 3632 * In case we are in XGMI hive mode device reset is done for all the 3633 * nodes in the hive to retrain all XGMI links and hence the reset 3634 * sequence is executed in loop on all nodes. 3635 */ 3636 device_list_handle = &hive->device_list; 3637 } else { 3638 list_add_tail(&adev->gmc.xgmi.head, &device_list); 3639 device_list_handle = &device_list; 3640 } 3641 3642 /* block all schedulers and reset given job's ring */ 3643 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 3644 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3645 struct amdgpu_ring *ring = tmp_adev->rings[i]; 3646 3647 if (!ring || !ring->sched.thread) 3648 continue; 3649 3650 drm_sched_stop(&ring->sched, &job->base); 3651 } 3652 } 3653 3654 3655 /* 3656 * Must check guilty signal here since after this point all old 3657 * HW fences are force signaled. 3658 * 3659 * job->base holds a reference to parent fence 3660 */ 3661 if (job && job->base.s_fence->parent && 3662 dma_fence_is_signaled(job->base.s_fence->parent)) 3663 job_signaled = true; 3664 3665 if (!amdgpu_device_ip_need_full_reset(adev)) 3666 device_list_handle = &device_list; 3667 3668 if (job_signaled) { 3669 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); 3670 goto skip_hw_reset; 3671 } 3672 3673 3674 /* Guilty job will be freed after this*/ 3675 r = amdgpu_device_pre_asic_reset(adev, 3676 job, 3677 &need_full_reset); 3678 if (r) { 3679 /*TODO Should we stop ?*/ 3680 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", 3681 r, adev->ddev->unique); 3682 adev->asic_reset_res = r; 3683 } 3684 3685 retry: /* Rest of adevs pre asic reset from XGMI hive. */ 3686 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 3687 3688 if (tmp_adev == adev) 3689 continue; 3690 3691 amdgpu_device_lock_adev(tmp_adev, false); 3692 r = amdgpu_device_pre_asic_reset(tmp_adev, 3693 NULL, 3694 &need_full_reset); 3695 /*TODO Should we stop ?*/ 3696 if (r) { 3697 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", 3698 r, tmp_adev->ddev->unique); 3699 tmp_adev->asic_reset_res = r; 3700 } 3701 } 3702 3703 /* Actual ASIC resets if needed.*/ 3704 /* TODO Implement XGMI hive reset logic for SRIOV */ 3705 if (amdgpu_sriov_vf(adev)) { 3706 r = amdgpu_device_reset_sriov(adev, job ? false : true); 3707 if (r) 3708 adev->asic_reset_res = r; 3709 } else { 3710 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset); 3711 if (r && r == -EAGAIN) 3712 goto retry; 3713 } 3714 3715 skip_hw_reset: 3716 3717 /* Post ASIC reset for all devs .*/ 3718 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 3719 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3720 struct amdgpu_ring *ring = tmp_adev->rings[i]; 3721 3722 if (!ring || !ring->sched.thread) 3723 continue; 3724 3725 /* No point to resubmit jobs if we didn't HW reset*/ 3726 if (!tmp_adev->asic_reset_res && !job_signaled) 3727 drm_sched_resubmit_jobs(&ring->sched); 3728 3729 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); 3730 } 3731 3732 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) { 3733 drm_helper_resume_force_mode(tmp_adev->ddev); 3734 } 3735 3736 tmp_adev->asic_reset_res = 0; 3737 3738 if (r) { 3739 /* bad news, how to tell it to userspace ? */ 3740 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); 3741 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 3742 } else { 3743 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter)); 3744 } 3745 3746 amdgpu_device_unlock_adev(tmp_adev); 3747 } 3748 3749 if (hive) 3750 mutex_unlock(&hive->reset_lock); 3751 3752 if (r) 3753 dev_info(adev->dev, "GPU reset end with ret = %d\n", r); 3754 return r; 3755 } 3756 3757 /** 3758 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 3759 * 3760 * @adev: amdgpu_device pointer 3761 * 3762 * Fetchs and stores in the driver the PCIE capabilities (gen speed 3763 * and lanes) of the slot the device is in. Handles APUs and 3764 * virtualized environments where PCIE config space may not be available. 3765 */ 3766 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 3767 { 3768 struct pci_dev *pdev; 3769 enum pci_bus_speed speed_cap, platform_speed_cap; 3770 enum pcie_link_width platform_link_width; 3771 3772 if (amdgpu_pcie_gen_cap) 3773 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 3774 3775 if (amdgpu_pcie_lane_cap) 3776 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 3777 3778 /* covers APUs as well */ 3779 if (pci_is_root_bus(adev->pdev->bus)) { 3780 if (adev->pm.pcie_gen_mask == 0) 3781 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 3782 if (adev->pm.pcie_mlw_mask == 0) 3783 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 3784 return; 3785 } 3786 3787 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) 3788 return; 3789 3790 pcie_bandwidth_available(adev->pdev, NULL, 3791 &platform_speed_cap, &platform_link_width); 3792 3793 if (adev->pm.pcie_gen_mask == 0) { 3794 /* asic caps */ 3795 pdev = adev->pdev; 3796 speed_cap = pcie_get_speed_cap(pdev); 3797 if (speed_cap == PCI_SPEED_UNKNOWN) { 3798 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3799 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3800 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 3801 } else { 3802 if (speed_cap == PCIE_SPEED_16_0GT) 3803 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3804 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3805 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 3806 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 3807 else if (speed_cap == PCIE_SPEED_8_0GT) 3808 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3809 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3810 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 3811 else if (speed_cap == PCIE_SPEED_5_0GT) 3812 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3813 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 3814 else 3815 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 3816 } 3817 /* platform caps */ 3818 if (platform_speed_cap == PCI_SPEED_UNKNOWN) { 3819 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3820 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 3821 } else { 3822 if (platform_speed_cap == PCIE_SPEED_16_0GT) 3823 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3824 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3825 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 3826 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 3827 else if (platform_speed_cap == PCIE_SPEED_8_0GT) 3828 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3829 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3830 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 3831 else if (platform_speed_cap == PCIE_SPEED_5_0GT) 3832 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3833 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 3834 else 3835 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 3836 3837 } 3838 } 3839 if (adev->pm.pcie_mlw_mask == 0) { 3840 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { 3841 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 3842 } else { 3843 switch (platform_link_width) { 3844 case PCIE_LNK_X32: 3845 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 3846 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 3847 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3848 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3849 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3850 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3851 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3852 break; 3853 case PCIE_LNK_X16: 3854 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 3855 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3856 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3857 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3858 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3859 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3860 break; 3861 case PCIE_LNK_X12: 3862 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3863 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3864 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3865 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3866 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3867 break; 3868 case PCIE_LNK_X8: 3869 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3870 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3871 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3872 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3873 break; 3874 case PCIE_LNK_X4: 3875 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3876 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3877 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3878 break; 3879 case PCIE_LNK_X2: 3880 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3881 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3882 break; 3883 case PCIE_LNK_X1: 3884 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 3885 break; 3886 default: 3887 break; 3888 } 3889 } 3890 } 3891 } 3892 3893