1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/devcoredump.h>
36 #include <generated/utsrelease.h>
37 #include <linux/pci-p2pdma.h>
38 
39 #include <drm/drm_aperture.h>
40 #include <drm/drm_atomic_helper.h>
41 #include <drm/drm_crtc_helper.h>
42 #include <drm/drm_fb_helper.h>
43 #include <drm/drm_probe_helper.h>
44 #include <drm/amdgpu_drm.h>
45 #include <linux/vgaarb.h>
46 #include <linux/vga_switcheroo.h>
47 #include <linux/efi.h>
48 #include "amdgpu.h"
49 #include "amdgpu_trace.h"
50 #include "amdgpu_i2c.h"
51 #include "atom.h"
52 #include "amdgpu_atombios.h"
53 #include "amdgpu_atomfirmware.h"
54 #include "amd_pcie.h"
55 #ifdef CONFIG_DRM_AMDGPU_SI
56 #include "si.h"
57 #endif
58 #ifdef CONFIG_DRM_AMDGPU_CIK
59 #include "cik.h"
60 #endif
61 #include "vi.h"
62 #include "soc15.h"
63 #include "nv.h"
64 #include "bif/bif_4_1_d.h"
65 #include <linux/firmware.h>
66 #include "amdgpu_vf_error.h"
67 
68 #include "amdgpu_amdkfd.h"
69 #include "amdgpu_pm.h"
70 
71 #include "amdgpu_xgmi.h"
72 #include "amdgpu_ras.h"
73 #include "amdgpu_pmu.h"
74 #include "amdgpu_fru_eeprom.h"
75 #include "amdgpu_reset.h"
76 
77 #include <linux/suspend.h>
78 #include <drm/task_barrier.h>
79 #include <linux/pm_runtime.h>
80 
81 #include <drm/drm_drv.h>
82 
83 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
84 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
85 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
86 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
87 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
88 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
89 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
90 
91 #define AMDGPU_RESUME_MS		2000
92 #define AMDGPU_MAX_RETRY_LIMIT		2
93 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
94 
95 static const struct drm_driver amdgpu_kms_driver;
96 
97 const char *amdgpu_asic_name[] = {
98 	"TAHITI",
99 	"PITCAIRN",
100 	"VERDE",
101 	"OLAND",
102 	"HAINAN",
103 	"BONAIRE",
104 	"KAVERI",
105 	"KABINI",
106 	"HAWAII",
107 	"MULLINS",
108 	"TOPAZ",
109 	"TONGA",
110 	"FIJI",
111 	"CARRIZO",
112 	"STONEY",
113 	"POLARIS10",
114 	"POLARIS11",
115 	"POLARIS12",
116 	"VEGAM",
117 	"VEGA10",
118 	"VEGA12",
119 	"VEGA20",
120 	"RAVEN",
121 	"ARCTURUS",
122 	"RENOIR",
123 	"ALDEBARAN",
124 	"NAVI10",
125 	"CYAN_SKILLFISH",
126 	"NAVI14",
127 	"NAVI12",
128 	"SIENNA_CICHLID",
129 	"NAVY_FLOUNDER",
130 	"VANGOGH",
131 	"DIMGREY_CAVEFISH",
132 	"BEIGE_GOBY",
133 	"YELLOW_CARP",
134 	"IP DISCOVERY",
135 	"LAST",
136 };
137 
138 /**
139  * DOC: pcie_replay_count
140  *
141  * The amdgpu driver provides a sysfs API for reporting the total number
142  * of PCIe replays (NAKs)
143  * The file pcie_replay_count is used for this and returns the total
144  * number of replays as a sum of the NAKs generated and NAKs received
145  */
146 
147 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
148 		struct device_attribute *attr, char *buf)
149 {
150 	struct drm_device *ddev = dev_get_drvdata(dev);
151 	struct amdgpu_device *adev = drm_to_adev(ddev);
152 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
153 
154 	return sysfs_emit(buf, "%llu\n", cnt);
155 }
156 
157 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
158 		amdgpu_device_get_pcie_replay_count, NULL);
159 
160 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
161 
162 /**
163  * DOC: product_name
164  *
165  * The amdgpu driver provides a sysfs API for reporting the product name
166  * for the device
167  * The file product_name is used for this and returns the product name
168  * as returned from the FRU.
169  * NOTE: This is only available for certain server cards
170  */
171 
172 static ssize_t amdgpu_device_get_product_name(struct device *dev,
173 		struct device_attribute *attr, char *buf)
174 {
175 	struct drm_device *ddev = dev_get_drvdata(dev);
176 	struct amdgpu_device *adev = drm_to_adev(ddev);
177 
178 	return sysfs_emit(buf, "%s\n", adev->product_name);
179 }
180 
181 static DEVICE_ATTR(product_name, S_IRUGO,
182 		amdgpu_device_get_product_name, NULL);
183 
184 /**
185  * DOC: product_number
186  *
187  * The amdgpu driver provides a sysfs API for reporting the part number
188  * for the device
189  * The file product_number is used for this and returns the part number
190  * as returned from the FRU.
191  * NOTE: This is only available for certain server cards
192  */
193 
194 static ssize_t amdgpu_device_get_product_number(struct device *dev,
195 		struct device_attribute *attr, char *buf)
196 {
197 	struct drm_device *ddev = dev_get_drvdata(dev);
198 	struct amdgpu_device *adev = drm_to_adev(ddev);
199 
200 	return sysfs_emit(buf, "%s\n", adev->product_number);
201 }
202 
203 static DEVICE_ATTR(product_number, S_IRUGO,
204 		amdgpu_device_get_product_number, NULL);
205 
206 /**
207  * DOC: serial_number
208  *
209  * The amdgpu driver provides a sysfs API for reporting the serial number
210  * for the device
211  * The file serial_number is used for this and returns the serial number
212  * as returned from the FRU.
213  * NOTE: This is only available for certain server cards
214  */
215 
216 static ssize_t amdgpu_device_get_serial_number(struct device *dev,
217 		struct device_attribute *attr, char *buf)
218 {
219 	struct drm_device *ddev = dev_get_drvdata(dev);
220 	struct amdgpu_device *adev = drm_to_adev(ddev);
221 
222 	return sysfs_emit(buf, "%s\n", adev->serial);
223 }
224 
225 static DEVICE_ATTR(serial_number, S_IRUGO,
226 		amdgpu_device_get_serial_number, NULL);
227 
228 /**
229  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
230  *
231  * @dev: drm_device pointer
232  *
233  * Returns true if the device is a dGPU with ATPX power control,
234  * otherwise return false.
235  */
236 bool amdgpu_device_supports_px(struct drm_device *dev)
237 {
238 	struct amdgpu_device *adev = drm_to_adev(dev);
239 
240 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
241 		return true;
242 	return false;
243 }
244 
245 /**
246  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
247  *
248  * @dev: drm_device pointer
249  *
250  * Returns true if the device is a dGPU with ACPI power control,
251  * otherwise return false.
252  */
253 bool amdgpu_device_supports_boco(struct drm_device *dev)
254 {
255 	struct amdgpu_device *adev = drm_to_adev(dev);
256 
257 	if (adev->has_pr3 ||
258 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
259 		return true;
260 	return false;
261 }
262 
263 /**
264  * amdgpu_device_supports_baco - Does the device support BACO
265  *
266  * @dev: drm_device pointer
267  *
268  * Returns true if the device supporte BACO,
269  * otherwise return false.
270  */
271 bool amdgpu_device_supports_baco(struct drm_device *dev)
272 {
273 	struct amdgpu_device *adev = drm_to_adev(dev);
274 
275 	return amdgpu_asic_supports_baco(adev);
276 }
277 
278 /**
279  * amdgpu_device_supports_smart_shift - Is the device dGPU with
280  * smart shift support
281  *
282  * @dev: drm_device pointer
283  *
284  * Returns true if the device is a dGPU with Smart Shift support,
285  * otherwise returns false.
286  */
287 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
288 {
289 	return (amdgpu_device_supports_boco(dev) &&
290 		amdgpu_acpi_is_power_shift_control_supported());
291 }
292 
293 /*
294  * VRAM access helper functions
295  */
296 
297 /**
298  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
299  *
300  * @adev: amdgpu_device pointer
301  * @pos: offset of the buffer in vram
302  * @buf: virtual address of the buffer in system memory
303  * @size: read/write size, sizeof(@buf) must > @size
304  * @write: true - write to vram, otherwise - read from vram
305  */
306 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
307 			     void *buf, size_t size, bool write)
308 {
309 	unsigned long flags;
310 	uint32_t hi = ~0, tmp = 0;
311 	uint32_t *data = buf;
312 	uint64_t last;
313 	int idx;
314 
315 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
316 		return;
317 
318 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
319 
320 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
321 	for (last = pos + size; pos < last; pos += 4) {
322 		tmp = pos >> 31;
323 
324 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
325 		if (tmp != hi) {
326 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
327 			hi = tmp;
328 		}
329 		if (write)
330 			WREG32_NO_KIQ(mmMM_DATA, *data++);
331 		else
332 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
333 	}
334 
335 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
336 	drm_dev_exit(idx);
337 }
338 
339 /**
340  * amdgpu_device_aper_access - access vram by vram aperature
341  *
342  * @adev: amdgpu_device pointer
343  * @pos: offset of the buffer in vram
344  * @buf: virtual address of the buffer in system memory
345  * @size: read/write size, sizeof(@buf) must > @size
346  * @write: true - write to vram, otherwise - read from vram
347  *
348  * The return value means how many bytes have been transferred.
349  */
350 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
351 				 void *buf, size_t size, bool write)
352 {
353 #ifdef CONFIG_64BIT
354 	void __iomem *addr;
355 	size_t count = 0;
356 	uint64_t last;
357 
358 	if (!adev->mman.aper_base_kaddr)
359 		return 0;
360 
361 	last = min(pos + size, adev->gmc.visible_vram_size);
362 	if (last > pos) {
363 		addr = adev->mman.aper_base_kaddr + pos;
364 		count = last - pos;
365 
366 		if (write) {
367 			memcpy_toio(addr, buf, count);
368 			mb();
369 			amdgpu_device_flush_hdp(adev, NULL);
370 		} else {
371 			amdgpu_device_invalidate_hdp(adev, NULL);
372 			mb();
373 			memcpy_fromio(buf, addr, count);
374 		}
375 
376 	}
377 
378 	return count;
379 #else
380 	return 0;
381 #endif
382 }
383 
384 /**
385  * amdgpu_device_vram_access - read/write a buffer in vram
386  *
387  * @adev: amdgpu_device pointer
388  * @pos: offset of the buffer in vram
389  * @buf: virtual address of the buffer in system memory
390  * @size: read/write size, sizeof(@buf) must > @size
391  * @write: true - write to vram, otherwise - read from vram
392  */
393 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
394 			       void *buf, size_t size, bool write)
395 {
396 	size_t count;
397 
398 	/* try to using vram apreature to access vram first */
399 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
400 	size -= count;
401 	if (size) {
402 		/* using MM to access rest vram */
403 		pos += count;
404 		buf += count;
405 		amdgpu_device_mm_access(adev, pos, buf, size, write);
406 	}
407 }
408 
409 /*
410  * register access helper functions.
411  */
412 
413 /* Check if hw access should be skipped because of hotplug or device error */
414 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
415 {
416 	if (adev->no_hw_access)
417 		return true;
418 
419 #ifdef CONFIG_LOCKDEP
420 	/*
421 	 * This is a bit complicated to understand, so worth a comment. What we assert
422 	 * here is that the GPU reset is not running on another thread in parallel.
423 	 *
424 	 * For this we trylock the read side of the reset semaphore, if that succeeds
425 	 * we know that the reset is not running in paralell.
426 	 *
427 	 * If the trylock fails we assert that we are either already holding the read
428 	 * side of the lock or are the reset thread itself and hold the write side of
429 	 * the lock.
430 	 */
431 	if (in_task()) {
432 		if (down_read_trylock(&adev->reset_domain->sem))
433 			up_read(&adev->reset_domain->sem);
434 		else
435 			lockdep_assert_held(&adev->reset_domain->sem);
436 	}
437 #endif
438 	return false;
439 }
440 
441 /**
442  * amdgpu_device_rreg - read a memory mapped IO or indirect register
443  *
444  * @adev: amdgpu_device pointer
445  * @reg: dword aligned register offset
446  * @acc_flags: access flags which require special behavior
447  *
448  * Returns the 32 bit value from the offset specified.
449  */
450 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
451 			    uint32_t reg, uint32_t acc_flags)
452 {
453 	uint32_t ret;
454 
455 	if (amdgpu_device_skip_hw_access(adev))
456 		return 0;
457 
458 	if ((reg * 4) < adev->rmmio_size) {
459 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
460 		    amdgpu_sriov_runtime(adev) &&
461 		    down_read_trylock(&adev->reset_domain->sem)) {
462 			ret = amdgpu_kiq_rreg(adev, reg);
463 			up_read(&adev->reset_domain->sem);
464 		} else {
465 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
466 		}
467 	} else {
468 		ret = adev->pcie_rreg(adev, reg * 4);
469 	}
470 
471 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
472 
473 	return ret;
474 }
475 
476 /*
477  * MMIO register read with bytes helper functions
478  * @offset:bytes offset from MMIO start
479  *
480 */
481 
482 /**
483  * amdgpu_mm_rreg8 - read a memory mapped IO register
484  *
485  * @adev: amdgpu_device pointer
486  * @offset: byte aligned register offset
487  *
488  * Returns the 8 bit value from the offset specified.
489  */
490 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
491 {
492 	if (amdgpu_device_skip_hw_access(adev))
493 		return 0;
494 
495 	if (offset < adev->rmmio_size)
496 		return (readb(adev->rmmio + offset));
497 	BUG();
498 }
499 
500 /*
501  * MMIO register write with bytes helper functions
502  * @offset:bytes offset from MMIO start
503  * @value: the value want to be written to the register
504  *
505 */
506 /**
507  * amdgpu_mm_wreg8 - read a memory mapped IO register
508  *
509  * @adev: amdgpu_device pointer
510  * @offset: byte aligned register offset
511  * @value: 8 bit value to write
512  *
513  * Writes the value specified to the offset specified.
514  */
515 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
516 {
517 	if (amdgpu_device_skip_hw_access(adev))
518 		return;
519 
520 	if (offset < adev->rmmio_size)
521 		writeb(value, adev->rmmio + offset);
522 	else
523 		BUG();
524 }
525 
526 /**
527  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
528  *
529  * @adev: amdgpu_device pointer
530  * @reg: dword aligned register offset
531  * @v: 32 bit value to write to the register
532  * @acc_flags: access flags which require special behavior
533  *
534  * Writes the value specified to the offset specified.
535  */
536 void amdgpu_device_wreg(struct amdgpu_device *adev,
537 			uint32_t reg, uint32_t v,
538 			uint32_t acc_flags)
539 {
540 	if (amdgpu_device_skip_hw_access(adev))
541 		return;
542 
543 	if ((reg * 4) < adev->rmmio_size) {
544 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
545 		    amdgpu_sriov_runtime(adev) &&
546 		    down_read_trylock(&adev->reset_domain->sem)) {
547 			amdgpu_kiq_wreg(adev, reg, v);
548 			up_read(&adev->reset_domain->sem);
549 		} else {
550 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
551 		}
552 	} else {
553 		adev->pcie_wreg(adev, reg * 4, v);
554 	}
555 
556 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
557 }
558 
559 /**
560  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
561  *
562  * @adev: amdgpu_device pointer
563  * @reg: mmio/rlc register
564  * @v: value to write
565  *
566  * this function is invoked only for the debugfs register access
567  */
568 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
569 			     uint32_t reg, uint32_t v)
570 {
571 	if (amdgpu_device_skip_hw_access(adev))
572 		return;
573 
574 	if (amdgpu_sriov_fullaccess(adev) &&
575 	    adev->gfx.rlc.funcs &&
576 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
577 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
578 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
579 	} else if ((reg * 4) >= adev->rmmio_size) {
580 		adev->pcie_wreg(adev, reg * 4, v);
581 	} else {
582 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
583 	}
584 }
585 
586 /**
587  * amdgpu_mm_rdoorbell - read a doorbell dword
588  *
589  * @adev: amdgpu_device pointer
590  * @index: doorbell index
591  *
592  * Returns the value in the doorbell aperture at the
593  * requested doorbell index (CIK).
594  */
595 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
596 {
597 	if (amdgpu_device_skip_hw_access(adev))
598 		return 0;
599 
600 	if (index < adev->doorbell.num_doorbells) {
601 		return readl(adev->doorbell.ptr + index);
602 	} else {
603 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
604 		return 0;
605 	}
606 }
607 
608 /**
609  * amdgpu_mm_wdoorbell - write a doorbell dword
610  *
611  * @adev: amdgpu_device pointer
612  * @index: doorbell index
613  * @v: value to write
614  *
615  * Writes @v to the doorbell aperture at the
616  * requested doorbell index (CIK).
617  */
618 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
619 {
620 	if (amdgpu_device_skip_hw_access(adev))
621 		return;
622 
623 	if (index < adev->doorbell.num_doorbells) {
624 		writel(v, adev->doorbell.ptr + index);
625 	} else {
626 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
627 	}
628 }
629 
630 /**
631  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
632  *
633  * @adev: amdgpu_device pointer
634  * @index: doorbell index
635  *
636  * Returns the value in the doorbell aperture at the
637  * requested doorbell index (VEGA10+).
638  */
639 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
640 {
641 	if (amdgpu_device_skip_hw_access(adev))
642 		return 0;
643 
644 	if (index < adev->doorbell.num_doorbells) {
645 		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
646 	} else {
647 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
648 		return 0;
649 	}
650 }
651 
652 /**
653  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
654  *
655  * @adev: amdgpu_device pointer
656  * @index: doorbell index
657  * @v: value to write
658  *
659  * Writes @v to the doorbell aperture at the
660  * requested doorbell index (VEGA10+).
661  */
662 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
663 {
664 	if (amdgpu_device_skip_hw_access(adev))
665 		return;
666 
667 	if (index < adev->doorbell.num_doorbells) {
668 		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
669 	} else {
670 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
671 	}
672 }
673 
674 /**
675  * amdgpu_device_indirect_rreg - read an indirect register
676  *
677  * @adev: amdgpu_device pointer
678  * @pcie_index: mmio register offset
679  * @pcie_data: mmio register offset
680  * @reg_addr: indirect register address to read from
681  *
682  * Returns the value of indirect register @reg_addr
683  */
684 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
685 				u32 pcie_index, u32 pcie_data,
686 				u32 reg_addr)
687 {
688 	unsigned long flags;
689 	u32 r;
690 	void __iomem *pcie_index_offset;
691 	void __iomem *pcie_data_offset;
692 
693 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
694 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
695 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
696 
697 	writel(reg_addr, pcie_index_offset);
698 	readl(pcie_index_offset);
699 	r = readl(pcie_data_offset);
700 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
701 
702 	return r;
703 }
704 
705 /**
706  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
707  *
708  * @adev: amdgpu_device pointer
709  * @pcie_index: mmio register offset
710  * @pcie_data: mmio register offset
711  * @reg_addr: indirect register address to read from
712  *
713  * Returns the value of indirect register @reg_addr
714  */
715 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
716 				  u32 pcie_index, u32 pcie_data,
717 				  u32 reg_addr)
718 {
719 	unsigned long flags;
720 	u64 r;
721 	void __iomem *pcie_index_offset;
722 	void __iomem *pcie_data_offset;
723 
724 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
725 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
726 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
727 
728 	/* read low 32 bits */
729 	writel(reg_addr, pcie_index_offset);
730 	readl(pcie_index_offset);
731 	r = readl(pcie_data_offset);
732 	/* read high 32 bits */
733 	writel(reg_addr + 4, pcie_index_offset);
734 	readl(pcie_index_offset);
735 	r |= ((u64)readl(pcie_data_offset) << 32);
736 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
737 
738 	return r;
739 }
740 
741 /**
742  * amdgpu_device_indirect_wreg - write an indirect register address
743  *
744  * @adev: amdgpu_device pointer
745  * @pcie_index: mmio register offset
746  * @pcie_data: mmio register offset
747  * @reg_addr: indirect register offset
748  * @reg_data: indirect register data
749  *
750  */
751 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
752 				 u32 pcie_index, u32 pcie_data,
753 				 u32 reg_addr, u32 reg_data)
754 {
755 	unsigned long flags;
756 	void __iomem *pcie_index_offset;
757 	void __iomem *pcie_data_offset;
758 
759 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
760 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
761 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
762 
763 	writel(reg_addr, pcie_index_offset);
764 	readl(pcie_index_offset);
765 	writel(reg_data, pcie_data_offset);
766 	readl(pcie_data_offset);
767 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
768 }
769 
770 /**
771  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
772  *
773  * @adev: amdgpu_device pointer
774  * @pcie_index: mmio register offset
775  * @pcie_data: mmio register offset
776  * @reg_addr: indirect register offset
777  * @reg_data: indirect register data
778  *
779  */
780 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
781 				   u32 pcie_index, u32 pcie_data,
782 				   u32 reg_addr, u64 reg_data)
783 {
784 	unsigned long flags;
785 	void __iomem *pcie_index_offset;
786 	void __iomem *pcie_data_offset;
787 
788 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
789 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
790 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
791 
792 	/* write low 32 bits */
793 	writel(reg_addr, pcie_index_offset);
794 	readl(pcie_index_offset);
795 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
796 	readl(pcie_data_offset);
797 	/* write high 32 bits */
798 	writel(reg_addr + 4, pcie_index_offset);
799 	readl(pcie_index_offset);
800 	writel((u32)(reg_data >> 32), pcie_data_offset);
801 	readl(pcie_data_offset);
802 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
803 }
804 
805 /**
806  * amdgpu_invalid_rreg - dummy reg read function
807  *
808  * @adev: amdgpu_device pointer
809  * @reg: offset of register
810  *
811  * Dummy register read function.  Used for register blocks
812  * that certain asics don't have (all asics).
813  * Returns the value in the register.
814  */
815 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
816 {
817 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
818 	BUG();
819 	return 0;
820 }
821 
822 /**
823  * amdgpu_invalid_wreg - dummy reg write function
824  *
825  * @adev: amdgpu_device pointer
826  * @reg: offset of register
827  * @v: value to write to the register
828  *
829  * Dummy register read function.  Used for register blocks
830  * that certain asics don't have (all asics).
831  */
832 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
833 {
834 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
835 		  reg, v);
836 	BUG();
837 }
838 
839 /**
840  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
841  *
842  * @adev: amdgpu_device pointer
843  * @reg: offset of register
844  *
845  * Dummy register read function.  Used for register blocks
846  * that certain asics don't have (all asics).
847  * Returns the value in the register.
848  */
849 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
850 {
851 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
852 	BUG();
853 	return 0;
854 }
855 
856 /**
857  * amdgpu_invalid_wreg64 - dummy reg write function
858  *
859  * @adev: amdgpu_device pointer
860  * @reg: offset of register
861  * @v: value to write to the register
862  *
863  * Dummy register read function.  Used for register blocks
864  * that certain asics don't have (all asics).
865  */
866 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
867 {
868 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
869 		  reg, v);
870 	BUG();
871 }
872 
873 /**
874  * amdgpu_block_invalid_rreg - dummy reg read function
875  *
876  * @adev: amdgpu_device pointer
877  * @block: offset of instance
878  * @reg: offset of register
879  *
880  * Dummy register read function.  Used for register blocks
881  * that certain asics don't have (all asics).
882  * Returns the value in the register.
883  */
884 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
885 					  uint32_t block, uint32_t reg)
886 {
887 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
888 		  reg, block);
889 	BUG();
890 	return 0;
891 }
892 
893 /**
894  * amdgpu_block_invalid_wreg - dummy reg write function
895  *
896  * @adev: amdgpu_device pointer
897  * @block: offset of instance
898  * @reg: offset of register
899  * @v: value to write to the register
900  *
901  * Dummy register read function.  Used for register blocks
902  * that certain asics don't have (all asics).
903  */
904 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
905 				      uint32_t block,
906 				      uint32_t reg, uint32_t v)
907 {
908 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
909 		  reg, block, v);
910 	BUG();
911 }
912 
913 /**
914  * amdgpu_device_asic_init - Wrapper for atom asic_init
915  *
916  * @adev: amdgpu_device pointer
917  *
918  * Does any asic specific work and then calls atom asic init.
919  */
920 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
921 {
922 	amdgpu_asic_pre_asic_init(adev);
923 
924 	if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
925 		return amdgpu_atomfirmware_asic_init(adev, true);
926 	else
927 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
928 }
929 
930 /**
931  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
932  *
933  * @adev: amdgpu_device pointer
934  *
935  * Allocates a scratch page of VRAM for use by various things in the
936  * driver.
937  */
938 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
939 {
940 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
941 				       AMDGPU_GEM_DOMAIN_VRAM |
942 				       AMDGPU_GEM_DOMAIN_GTT,
943 				       &adev->mem_scratch.robj,
944 				       &adev->mem_scratch.gpu_addr,
945 				       (void **)&adev->mem_scratch.ptr);
946 }
947 
948 /**
949  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
950  *
951  * @adev: amdgpu_device pointer
952  *
953  * Frees the VRAM scratch page.
954  */
955 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
956 {
957 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
958 }
959 
960 /**
961  * amdgpu_device_program_register_sequence - program an array of registers.
962  *
963  * @adev: amdgpu_device pointer
964  * @registers: pointer to the register array
965  * @array_size: size of the register array
966  *
967  * Programs an array or registers with and and or masks.
968  * This is a helper for setting golden registers.
969  */
970 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
971 					     const u32 *registers,
972 					     const u32 array_size)
973 {
974 	u32 tmp, reg, and_mask, or_mask;
975 	int i;
976 
977 	if (array_size % 3)
978 		return;
979 
980 	for (i = 0; i < array_size; i +=3) {
981 		reg = registers[i + 0];
982 		and_mask = registers[i + 1];
983 		or_mask = registers[i + 2];
984 
985 		if (and_mask == 0xffffffff) {
986 			tmp = or_mask;
987 		} else {
988 			tmp = RREG32(reg);
989 			tmp &= ~and_mask;
990 			if (adev->family >= AMDGPU_FAMILY_AI)
991 				tmp |= (or_mask & and_mask);
992 			else
993 				tmp |= or_mask;
994 		}
995 		WREG32(reg, tmp);
996 	}
997 }
998 
999 /**
1000  * amdgpu_device_pci_config_reset - reset the GPU
1001  *
1002  * @adev: amdgpu_device pointer
1003  *
1004  * Resets the GPU using the pci config reset sequence.
1005  * Only applicable to asics prior to vega10.
1006  */
1007 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1008 {
1009 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1010 }
1011 
1012 /**
1013  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1014  *
1015  * @adev: amdgpu_device pointer
1016  *
1017  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1018  */
1019 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1020 {
1021 	return pci_reset_function(adev->pdev);
1022 }
1023 
1024 /*
1025  * GPU doorbell aperture helpers function.
1026  */
1027 /**
1028  * amdgpu_device_doorbell_init - Init doorbell driver information.
1029  *
1030  * @adev: amdgpu_device pointer
1031  *
1032  * Init doorbell driver information (CIK)
1033  * Returns 0 on success, error on failure.
1034  */
1035 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
1036 {
1037 
1038 	/* No doorbell on SI hardware generation */
1039 	if (adev->asic_type < CHIP_BONAIRE) {
1040 		adev->doorbell.base = 0;
1041 		adev->doorbell.size = 0;
1042 		adev->doorbell.num_doorbells = 0;
1043 		adev->doorbell.ptr = NULL;
1044 		return 0;
1045 	}
1046 
1047 	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1048 		return -EINVAL;
1049 
1050 	amdgpu_asic_init_doorbell_index(adev);
1051 
1052 	/* doorbell bar mapping */
1053 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1054 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1055 
1056 	if (adev->enable_mes) {
1057 		adev->doorbell.num_doorbells =
1058 			adev->doorbell.size / sizeof(u32);
1059 	} else {
1060 		adev->doorbell.num_doorbells =
1061 			min_t(u32, adev->doorbell.size / sizeof(u32),
1062 			      adev->doorbell_index.max_assignment+1);
1063 		if (adev->doorbell.num_doorbells == 0)
1064 			return -EINVAL;
1065 
1066 		/* For Vega, reserve and map two pages on doorbell BAR since SDMA
1067 		 * paging queue doorbell use the second page. The
1068 		 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1069 		 * doorbells are in the first page. So with paging queue enabled,
1070 		 * the max num_doorbells should + 1 page (0x400 in dword)
1071 		 */
1072 		if (adev->asic_type >= CHIP_VEGA10)
1073 			adev->doorbell.num_doorbells += 0x400;
1074 	}
1075 
1076 	adev->doorbell.ptr = ioremap(adev->doorbell.base,
1077 				     adev->doorbell.num_doorbells *
1078 				     sizeof(u32));
1079 	if (adev->doorbell.ptr == NULL)
1080 		return -ENOMEM;
1081 
1082 	return 0;
1083 }
1084 
1085 /**
1086  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
1087  *
1088  * @adev: amdgpu_device pointer
1089  *
1090  * Tear down doorbell driver information (CIK)
1091  */
1092 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
1093 {
1094 	iounmap(adev->doorbell.ptr);
1095 	adev->doorbell.ptr = NULL;
1096 }
1097 
1098 
1099 
1100 /*
1101  * amdgpu_device_wb_*()
1102  * Writeback is the method by which the GPU updates special pages in memory
1103  * with the status of certain GPU events (fences, ring pointers,etc.).
1104  */
1105 
1106 /**
1107  * amdgpu_device_wb_fini - Disable Writeback and free memory
1108  *
1109  * @adev: amdgpu_device pointer
1110  *
1111  * Disables Writeback and frees the Writeback memory (all asics).
1112  * Used at driver shutdown.
1113  */
1114 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1115 {
1116 	if (adev->wb.wb_obj) {
1117 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1118 				      &adev->wb.gpu_addr,
1119 				      (void **)&adev->wb.wb);
1120 		adev->wb.wb_obj = NULL;
1121 	}
1122 }
1123 
1124 /**
1125  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1126  *
1127  * @adev: amdgpu_device pointer
1128  *
1129  * Initializes writeback and allocates writeback memory (all asics).
1130  * Used at driver startup.
1131  * Returns 0 on success or an -error on failure.
1132  */
1133 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1134 {
1135 	int r;
1136 
1137 	if (adev->wb.wb_obj == NULL) {
1138 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1139 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1140 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1141 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1142 					    (void **)&adev->wb.wb);
1143 		if (r) {
1144 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1145 			return r;
1146 		}
1147 
1148 		adev->wb.num_wb = AMDGPU_MAX_WB;
1149 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1150 
1151 		/* clear wb memory */
1152 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1153 	}
1154 
1155 	return 0;
1156 }
1157 
1158 /**
1159  * amdgpu_device_wb_get - Allocate a wb entry
1160  *
1161  * @adev: amdgpu_device pointer
1162  * @wb: wb index
1163  *
1164  * Allocate a wb slot for use by the driver (all asics).
1165  * Returns 0 on success or -EINVAL on failure.
1166  */
1167 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1168 {
1169 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1170 
1171 	if (offset < adev->wb.num_wb) {
1172 		__set_bit(offset, adev->wb.used);
1173 		*wb = offset << 3; /* convert to dw offset */
1174 		return 0;
1175 	} else {
1176 		return -EINVAL;
1177 	}
1178 }
1179 
1180 /**
1181  * amdgpu_device_wb_free - Free a wb entry
1182  *
1183  * @adev: amdgpu_device pointer
1184  * @wb: wb index
1185  *
1186  * Free a wb slot allocated for use by the driver (all asics)
1187  */
1188 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1189 {
1190 	wb >>= 3;
1191 	if (wb < adev->wb.num_wb)
1192 		__clear_bit(wb, adev->wb.used);
1193 }
1194 
1195 /**
1196  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1197  *
1198  * @adev: amdgpu_device pointer
1199  *
1200  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1201  * to fail, but if any of the BARs is not accessible after the size we abort
1202  * driver loading by returning -ENODEV.
1203  */
1204 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1205 {
1206 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1207 	struct pci_bus *root;
1208 	struct resource *res;
1209 	unsigned i;
1210 	u16 cmd;
1211 	int r;
1212 
1213 	/* Bypass for VF */
1214 	if (amdgpu_sriov_vf(adev))
1215 		return 0;
1216 
1217 	/* skip if the bios has already enabled large BAR */
1218 	if (adev->gmc.real_vram_size &&
1219 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1220 		return 0;
1221 
1222 	/* Check if the root BUS has 64bit memory resources */
1223 	root = adev->pdev->bus;
1224 	while (root->parent)
1225 		root = root->parent;
1226 
1227 	pci_bus_for_each_resource(root, res, i) {
1228 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1229 		    res->start > 0x100000000ull)
1230 			break;
1231 	}
1232 
1233 	/* Trying to resize is pointless without a root hub window above 4GB */
1234 	if (!res)
1235 		return 0;
1236 
1237 	/* Limit the BAR size to what is available */
1238 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1239 			rbar_size);
1240 
1241 	/* Disable memory decoding while we change the BAR addresses and size */
1242 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1243 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1244 			      cmd & ~PCI_COMMAND_MEMORY);
1245 
1246 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1247 	amdgpu_device_doorbell_fini(adev);
1248 	if (adev->asic_type >= CHIP_BONAIRE)
1249 		pci_release_resource(adev->pdev, 2);
1250 
1251 	pci_release_resource(adev->pdev, 0);
1252 
1253 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1254 	if (r == -ENOSPC)
1255 		DRM_INFO("Not enough PCI address space for a large BAR.");
1256 	else if (r && r != -ENOTSUPP)
1257 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1258 
1259 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1260 
1261 	/* When the doorbell or fb BAR isn't available we have no chance of
1262 	 * using the device.
1263 	 */
1264 	r = amdgpu_device_doorbell_init(adev);
1265 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1266 		return -ENODEV;
1267 
1268 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1269 
1270 	return 0;
1271 }
1272 
1273 /*
1274  * GPU helpers function.
1275  */
1276 /**
1277  * amdgpu_device_need_post - check if the hw need post or not
1278  *
1279  * @adev: amdgpu_device pointer
1280  *
1281  * Check if the asic has been initialized (all asics) at driver startup
1282  * or post is needed if  hw reset is performed.
1283  * Returns true if need or false if not.
1284  */
1285 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1286 {
1287 	uint32_t reg;
1288 
1289 	if (amdgpu_sriov_vf(adev))
1290 		return false;
1291 
1292 	if (amdgpu_passthrough(adev)) {
1293 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1294 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1295 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1296 		 * vpost executed for smc version below 22.15
1297 		 */
1298 		if (adev->asic_type == CHIP_FIJI) {
1299 			int err;
1300 			uint32_t fw_ver;
1301 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1302 			/* force vPost if error occured */
1303 			if (err)
1304 				return true;
1305 
1306 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1307 			if (fw_ver < 0x00160e00)
1308 				return true;
1309 		}
1310 	}
1311 
1312 	/* Don't post if we need to reset whole hive on init */
1313 	if (adev->gmc.xgmi.pending_reset)
1314 		return false;
1315 
1316 	if (adev->has_hw_reset) {
1317 		adev->has_hw_reset = false;
1318 		return true;
1319 	}
1320 
1321 	/* bios scratch used on CIK+ */
1322 	if (adev->asic_type >= CHIP_BONAIRE)
1323 		return amdgpu_atombios_scratch_need_asic_init(adev);
1324 
1325 	/* check MEM_SIZE for older asics */
1326 	reg = amdgpu_asic_get_config_memsize(adev);
1327 
1328 	if ((reg != 0) && (reg != 0xffffffff))
1329 		return false;
1330 
1331 	return true;
1332 }
1333 
1334 /**
1335  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1336  *
1337  * @adev: amdgpu_device pointer
1338  *
1339  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1340  * be set for this device.
1341  *
1342  * Returns true if it should be used or false if not.
1343  */
1344 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1345 {
1346 	switch (amdgpu_aspm) {
1347 	case -1:
1348 		break;
1349 	case 0:
1350 		return false;
1351 	case 1:
1352 		return true;
1353 	default:
1354 		return false;
1355 	}
1356 	return pcie_aspm_enabled(adev->pdev);
1357 }
1358 
1359 /* if we get transitioned to only one device, take VGA back */
1360 /**
1361  * amdgpu_device_vga_set_decode - enable/disable vga decode
1362  *
1363  * @pdev: PCI device pointer
1364  * @state: enable/disable vga decode
1365  *
1366  * Enable/disable vga decode (all asics).
1367  * Returns VGA resource flags.
1368  */
1369 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1370 		bool state)
1371 {
1372 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1373 	amdgpu_asic_set_vga_state(adev, state);
1374 	if (state)
1375 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1376 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1377 	else
1378 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1379 }
1380 
1381 /**
1382  * amdgpu_device_check_block_size - validate the vm block size
1383  *
1384  * @adev: amdgpu_device pointer
1385  *
1386  * Validates the vm block size specified via module parameter.
1387  * The vm block size defines number of bits in page table versus page directory,
1388  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1389  * page table and the remaining bits are in the page directory.
1390  */
1391 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1392 {
1393 	/* defines number of bits in page table versus page directory,
1394 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1395 	 * page table and the remaining bits are in the page directory */
1396 	if (amdgpu_vm_block_size == -1)
1397 		return;
1398 
1399 	if (amdgpu_vm_block_size < 9) {
1400 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1401 			 amdgpu_vm_block_size);
1402 		amdgpu_vm_block_size = -1;
1403 	}
1404 }
1405 
1406 /**
1407  * amdgpu_device_check_vm_size - validate the vm size
1408  *
1409  * @adev: amdgpu_device pointer
1410  *
1411  * Validates the vm size in GB specified via module parameter.
1412  * The VM size is the size of the GPU virtual memory space in GB.
1413  */
1414 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1415 {
1416 	/* no need to check the default value */
1417 	if (amdgpu_vm_size == -1)
1418 		return;
1419 
1420 	if (amdgpu_vm_size < 1) {
1421 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1422 			 amdgpu_vm_size);
1423 		amdgpu_vm_size = -1;
1424 	}
1425 }
1426 
1427 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1428 {
1429 	struct sysinfo si;
1430 	bool is_os_64 = (sizeof(void *) == 8);
1431 	uint64_t total_memory;
1432 	uint64_t dram_size_seven_GB = 0x1B8000000;
1433 	uint64_t dram_size_three_GB = 0xB8000000;
1434 
1435 	if (amdgpu_smu_memory_pool_size == 0)
1436 		return;
1437 
1438 	if (!is_os_64) {
1439 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1440 		goto def_value;
1441 	}
1442 	si_meminfo(&si);
1443 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1444 
1445 	if ((amdgpu_smu_memory_pool_size == 1) ||
1446 		(amdgpu_smu_memory_pool_size == 2)) {
1447 		if (total_memory < dram_size_three_GB)
1448 			goto def_value1;
1449 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1450 		(amdgpu_smu_memory_pool_size == 8)) {
1451 		if (total_memory < dram_size_seven_GB)
1452 			goto def_value1;
1453 	} else {
1454 		DRM_WARN("Smu memory pool size not supported\n");
1455 		goto def_value;
1456 	}
1457 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1458 
1459 	return;
1460 
1461 def_value1:
1462 	DRM_WARN("No enough system memory\n");
1463 def_value:
1464 	adev->pm.smu_prv_buffer_size = 0;
1465 }
1466 
1467 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1468 {
1469 	if (!(adev->flags & AMD_IS_APU) ||
1470 	    adev->asic_type < CHIP_RAVEN)
1471 		return 0;
1472 
1473 	switch (adev->asic_type) {
1474 	case CHIP_RAVEN:
1475 		if (adev->pdev->device == 0x15dd)
1476 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1477 		if (adev->pdev->device == 0x15d8)
1478 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1479 		break;
1480 	case CHIP_RENOIR:
1481 		if ((adev->pdev->device == 0x1636) ||
1482 		    (adev->pdev->device == 0x164c))
1483 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1484 		else
1485 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1486 		break;
1487 	case CHIP_VANGOGH:
1488 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1489 		break;
1490 	case CHIP_YELLOW_CARP:
1491 		break;
1492 	case CHIP_CYAN_SKILLFISH:
1493 		if ((adev->pdev->device == 0x13FE) ||
1494 		    (adev->pdev->device == 0x143F))
1495 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1496 		break;
1497 	default:
1498 		break;
1499 	}
1500 
1501 	return 0;
1502 }
1503 
1504 /**
1505  * amdgpu_device_check_arguments - validate module params
1506  *
1507  * @adev: amdgpu_device pointer
1508  *
1509  * Validates certain module parameters and updates
1510  * the associated values used by the driver (all asics).
1511  */
1512 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1513 {
1514 	if (amdgpu_sched_jobs < 4) {
1515 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1516 			 amdgpu_sched_jobs);
1517 		amdgpu_sched_jobs = 4;
1518 	} else if (!is_power_of_2(amdgpu_sched_jobs)){
1519 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1520 			 amdgpu_sched_jobs);
1521 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1522 	}
1523 
1524 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1525 		/* gart size must be greater or equal to 32M */
1526 		dev_warn(adev->dev, "gart size (%d) too small\n",
1527 			 amdgpu_gart_size);
1528 		amdgpu_gart_size = -1;
1529 	}
1530 
1531 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1532 		/* gtt size must be greater or equal to 32M */
1533 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1534 				 amdgpu_gtt_size);
1535 		amdgpu_gtt_size = -1;
1536 	}
1537 
1538 	/* valid range is between 4 and 9 inclusive */
1539 	if (amdgpu_vm_fragment_size != -1 &&
1540 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1541 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1542 		amdgpu_vm_fragment_size = -1;
1543 	}
1544 
1545 	if (amdgpu_sched_hw_submission < 2) {
1546 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1547 			 amdgpu_sched_hw_submission);
1548 		amdgpu_sched_hw_submission = 2;
1549 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1550 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1551 			 amdgpu_sched_hw_submission);
1552 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1553 	}
1554 
1555 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1556 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1557 		amdgpu_reset_method = -1;
1558 	}
1559 
1560 	amdgpu_device_check_smu_prv_buffer_size(adev);
1561 
1562 	amdgpu_device_check_vm_size(adev);
1563 
1564 	amdgpu_device_check_block_size(adev);
1565 
1566 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1567 
1568 	return 0;
1569 }
1570 
1571 /**
1572  * amdgpu_switcheroo_set_state - set switcheroo state
1573  *
1574  * @pdev: pci dev pointer
1575  * @state: vga_switcheroo state
1576  *
1577  * Callback for the switcheroo driver.  Suspends or resumes
1578  * the asics before or after it is powered up using ACPI methods.
1579  */
1580 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1581 					enum vga_switcheroo_state state)
1582 {
1583 	struct drm_device *dev = pci_get_drvdata(pdev);
1584 	int r;
1585 
1586 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1587 		return;
1588 
1589 	if (state == VGA_SWITCHEROO_ON) {
1590 		pr_info("switched on\n");
1591 		/* don't suspend or resume card normally */
1592 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1593 
1594 		pci_set_power_state(pdev, PCI_D0);
1595 		amdgpu_device_load_pci_state(pdev);
1596 		r = pci_enable_device(pdev);
1597 		if (r)
1598 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1599 		amdgpu_device_resume(dev, true);
1600 
1601 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1602 	} else {
1603 		pr_info("switched off\n");
1604 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1605 		amdgpu_device_suspend(dev, true);
1606 		amdgpu_device_cache_pci_state(pdev);
1607 		/* Shut down the device */
1608 		pci_disable_device(pdev);
1609 		pci_set_power_state(pdev, PCI_D3cold);
1610 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1611 	}
1612 }
1613 
1614 /**
1615  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1616  *
1617  * @pdev: pci dev pointer
1618  *
1619  * Callback for the switcheroo driver.  Check of the switcheroo
1620  * state can be changed.
1621  * Returns true if the state can be changed, false if not.
1622  */
1623 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1624 {
1625 	struct drm_device *dev = pci_get_drvdata(pdev);
1626 
1627 	/*
1628 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1629 	* locking inversion with the driver load path. And the access here is
1630 	* completely racy anyway. So don't bother with locking for now.
1631 	*/
1632 	return atomic_read(&dev->open_count) == 0;
1633 }
1634 
1635 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1636 	.set_gpu_state = amdgpu_switcheroo_set_state,
1637 	.reprobe = NULL,
1638 	.can_switch = amdgpu_switcheroo_can_switch,
1639 };
1640 
1641 /**
1642  * amdgpu_device_ip_set_clockgating_state - set the CG state
1643  *
1644  * @dev: amdgpu_device pointer
1645  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1646  * @state: clockgating state (gate or ungate)
1647  *
1648  * Sets the requested clockgating state for all instances of
1649  * the hardware IP specified.
1650  * Returns the error code from the last instance.
1651  */
1652 int amdgpu_device_ip_set_clockgating_state(void *dev,
1653 					   enum amd_ip_block_type block_type,
1654 					   enum amd_clockgating_state state)
1655 {
1656 	struct amdgpu_device *adev = dev;
1657 	int i, r = 0;
1658 
1659 	for (i = 0; i < adev->num_ip_blocks; i++) {
1660 		if (!adev->ip_blocks[i].status.valid)
1661 			continue;
1662 		if (adev->ip_blocks[i].version->type != block_type)
1663 			continue;
1664 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1665 			continue;
1666 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1667 			(void *)adev, state);
1668 		if (r)
1669 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1670 				  adev->ip_blocks[i].version->funcs->name, r);
1671 	}
1672 	return r;
1673 }
1674 
1675 /**
1676  * amdgpu_device_ip_set_powergating_state - set the PG state
1677  *
1678  * @dev: amdgpu_device pointer
1679  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1680  * @state: powergating state (gate or ungate)
1681  *
1682  * Sets the requested powergating state for all instances of
1683  * the hardware IP specified.
1684  * Returns the error code from the last instance.
1685  */
1686 int amdgpu_device_ip_set_powergating_state(void *dev,
1687 					   enum amd_ip_block_type block_type,
1688 					   enum amd_powergating_state state)
1689 {
1690 	struct amdgpu_device *adev = dev;
1691 	int i, r = 0;
1692 
1693 	for (i = 0; i < adev->num_ip_blocks; i++) {
1694 		if (!adev->ip_blocks[i].status.valid)
1695 			continue;
1696 		if (adev->ip_blocks[i].version->type != block_type)
1697 			continue;
1698 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1699 			continue;
1700 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1701 			(void *)adev, state);
1702 		if (r)
1703 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1704 				  adev->ip_blocks[i].version->funcs->name, r);
1705 	}
1706 	return r;
1707 }
1708 
1709 /**
1710  * amdgpu_device_ip_get_clockgating_state - get the CG state
1711  *
1712  * @adev: amdgpu_device pointer
1713  * @flags: clockgating feature flags
1714  *
1715  * Walks the list of IPs on the device and updates the clockgating
1716  * flags for each IP.
1717  * Updates @flags with the feature flags for each hardware IP where
1718  * clockgating is enabled.
1719  */
1720 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1721 					    u64 *flags)
1722 {
1723 	int i;
1724 
1725 	for (i = 0; i < adev->num_ip_blocks; i++) {
1726 		if (!adev->ip_blocks[i].status.valid)
1727 			continue;
1728 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1729 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1730 	}
1731 }
1732 
1733 /**
1734  * amdgpu_device_ip_wait_for_idle - wait for idle
1735  *
1736  * @adev: amdgpu_device pointer
1737  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1738  *
1739  * Waits for the request hardware IP to be idle.
1740  * Returns 0 for success or a negative error code on failure.
1741  */
1742 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1743 				   enum amd_ip_block_type block_type)
1744 {
1745 	int i, r;
1746 
1747 	for (i = 0; i < adev->num_ip_blocks; i++) {
1748 		if (!adev->ip_blocks[i].status.valid)
1749 			continue;
1750 		if (adev->ip_blocks[i].version->type == block_type) {
1751 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1752 			if (r)
1753 				return r;
1754 			break;
1755 		}
1756 	}
1757 	return 0;
1758 
1759 }
1760 
1761 /**
1762  * amdgpu_device_ip_is_idle - is the hardware IP idle
1763  *
1764  * @adev: amdgpu_device pointer
1765  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1766  *
1767  * Check if the hardware IP is idle or not.
1768  * Returns true if it the IP is idle, false if not.
1769  */
1770 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1771 			      enum amd_ip_block_type block_type)
1772 {
1773 	int i;
1774 
1775 	for (i = 0; i < adev->num_ip_blocks; i++) {
1776 		if (!adev->ip_blocks[i].status.valid)
1777 			continue;
1778 		if (adev->ip_blocks[i].version->type == block_type)
1779 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1780 	}
1781 	return true;
1782 
1783 }
1784 
1785 /**
1786  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1787  *
1788  * @adev: amdgpu_device pointer
1789  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1790  *
1791  * Returns a pointer to the hardware IP block structure
1792  * if it exists for the asic, otherwise NULL.
1793  */
1794 struct amdgpu_ip_block *
1795 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1796 			      enum amd_ip_block_type type)
1797 {
1798 	int i;
1799 
1800 	for (i = 0; i < adev->num_ip_blocks; i++)
1801 		if (adev->ip_blocks[i].version->type == type)
1802 			return &adev->ip_blocks[i];
1803 
1804 	return NULL;
1805 }
1806 
1807 /**
1808  * amdgpu_device_ip_block_version_cmp
1809  *
1810  * @adev: amdgpu_device pointer
1811  * @type: enum amd_ip_block_type
1812  * @major: major version
1813  * @minor: minor version
1814  *
1815  * return 0 if equal or greater
1816  * return 1 if smaller or the ip_block doesn't exist
1817  */
1818 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1819 				       enum amd_ip_block_type type,
1820 				       u32 major, u32 minor)
1821 {
1822 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1823 
1824 	if (ip_block && ((ip_block->version->major > major) ||
1825 			((ip_block->version->major == major) &&
1826 			(ip_block->version->minor >= minor))))
1827 		return 0;
1828 
1829 	return 1;
1830 }
1831 
1832 /**
1833  * amdgpu_device_ip_block_add
1834  *
1835  * @adev: amdgpu_device pointer
1836  * @ip_block_version: pointer to the IP to add
1837  *
1838  * Adds the IP block driver information to the collection of IPs
1839  * on the asic.
1840  */
1841 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1842 			       const struct amdgpu_ip_block_version *ip_block_version)
1843 {
1844 	if (!ip_block_version)
1845 		return -EINVAL;
1846 
1847 	switch (ip_block_version->type) {
1848 	case AMD_IP_BLOCK_TYPE_VCN:
1849 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1850 			return 0;
1851 		break;
1852 	case AMD_IP_BLOCK_TYPE_JPEG:
1853 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1854 			return 0;
1855 		break;
1856 	default:
1857 		break;
1858 	}
1859 
1860 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1861 		  ip_block_version->funcs->name);
1862 
1863 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1864 
1865 	return 0;
1866 }
1867 
1868 /**
1869  * amdgpu_device_enable_virtual_display - enable virtual display feature
1870  *
1871  * @adev: amdgpu_device pointer
1872  *
1873  * Enabled the virtual display feature if the user has enabled it via
1874  * the module parameter virtual_display.  This feature provides a virtual
1875  * display hardware on headless boards or in virtualized environments.
1876  * This function parses and validates the configuration string specified by
1877  * the user and configues the virtual display configuration (number of
1878  * virtual connectors, crtcs, etc.) specified.
1879  */
1880 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1881 {
1882 	adev->enable_virtual_display = false;
1883 
1884 	if (amdgpu_virtual_display) {
1885 		const char *pci_address_name = pci_name(adev->pdev);
1886 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1887 
1888 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1889 		pciaddstr_tmp = pciaddstr;
1890 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1891 			pciaddname = strsep(&pciaddname_tmp, ",");
1892 			if (!strcmp("all", pciaddname)
1893 			    || !strcmp(pci_address_name, pciaddname)) {
1894 				long num_crtc;
1895 				int res = -1;
1896 
1897 				adev->enable_virtual_display = true;
1898 
1899 				if (pciaddname_tmp)
1900 					res = kstrtol(pciaddname_tmp, 10,
1901 						      &num_crtc);
1902 
1903 				if (!res) {
1904 					if (num_crtc < 1)
1905 						num_crtc = 1;
1906 					if (num_crtc > 6)
1907 						num_crtc = 6;
1908 					adev->mode_info.num_crtc = num_crtc;
1909 				} else {
1910 					adev->mode_info.num_crtc = 1;
1911 				}
1912 				break;
1913 			}
1914 		}
1915 
1916 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1917 			 amdgpu_virtual_display, pci_address_name,
1918 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1919 
1920 		kfree(pciaddstr);
1921 	}
1922 }
1923 
1924 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1925 {
1926 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1927 		adev->mode_info.num_crtc = 1;
1928 		adev->enable_virtual_display = true;
1929 		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1930 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1931 	}
1932 }
1933 
1934 /**
1935  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1936  *
1937  * @adev: amdgpu_device pointer
1938  *
1939  * Parses the asic configuration parameters specified in the gpu info
1940  * firmware and makes them availale to the driver for use in configuring
1941  * the asic.
1942  * Returns 0 on success, -EINVAL on failure.
1943  */
1944 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1945 {
1946 	const char *chip_name;
1947 	char fw_name[40];
1948 	int err;
1949 	const struct gpu_info_firmware_header_v1_0 *hdr;
1950 
1951 	adev->firmware.gpu_info_fw = NULL;
1952 
1953 	if (adev->mman.discovery_bin) {
1954 		/*
1955 		 * FIXME: The bounding box is still needed by Navi12, so
1956 		 * temporarily read it from gpu_info firmware. Should be dropped
1957 		 * when DAL no longer needs it.
1958 		 */
1959 		if (adev->asic_type != CHIP_NAVI12)
1960 			return 0;
1961 	}
1962 
1963 	switch (adev->asic_type) {
1964 	default:
1965 		return 0;
1966 	case CHIP_VEGA10:
1967 		chip_name = "vega10";
1968 		break;
1969 	case CHIP_VEGA12:
1970 		chip_name = "vega12";
1971 		break;
1972 	case CHIP_RAVEN:
1973 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1974 			chip_name = "raven2";
1975 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1976 			chip_name = "picasso";
1977 		else
1978 			chip_name = "raven";
1979 		break;
1980 	case CHIP_ARCTURUS:
1981 		chip_name = "arcturus";
1982 		break;
1983 	case CHIP_NAVI12:
1984 		chip_name = "navi12";
1985 		break;
1986 	}
1987 
1988 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1989 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
1990 	if (err) {
1991 		dev_err(adev->dev,
1992 			"Failed to get gpu_info firmware \"%s\"\n",
1993 			fw_name);
1994 		goto out;
1995 	}
1996 
1997 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1998 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1999 
2000 	switch (hdr->version_major) {
2001 	case 1:
2002 	{
2003 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2004 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2005 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2006 
2007 		/*
2008 		 * Should be droped when DAL no longer needs it.
2009 		 */
2010 		if (adev->asic_type == CHIP_NAVI12)
2011 			goto parse_soc_bounding_box;
2012 
2013 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2014 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2015 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2016 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2017 		adev->gfx.config.max_texture_channel_caches =
2018 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2019 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2020 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2021 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2022 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2023 		adev->gfx.config.double_offchip_lds_buf =
2024 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2025 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2026 		adev->gfx.cu_info.max_waves_per_simd =
2027 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2028 		adev->gfx.cu_info.max_scratch_slots_per_cu =
2029 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2030 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2031 		if (hdr->version_minor >= 1) {
2032 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2033 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2034 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2035 			adev->gfx.config.num_sc_per_sh =
2036 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2037 			adev->gfx.config.num_packer_per_sc =
2038 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2039 		}
2040 
2041 parse_soc_bounding_box:
2042 		/*
2043 		 * soc bounding box info is not integrated in disocovery table,
2044 		 * we always need to parse it from gpu info firmware if needed.
2045 		 */
2046 		if (hdr->version_minor == 2) {
2047 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2048 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2049 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2050 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2051 		}
2052 		break;
2053 	}
2054 	default:
2055 		dev_err(adev->dev,
2056 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2057 		err = -EINVAL;
2058 		goto out;
2059 	}
2060 out:
2061 	return err;
2062 }
2063 
2064 /**
2065  * amdgpu_device_ip_early_init - run early init for hardware IPs
2066  *
2067  * @adev: amdgpu_device pointer
2068  *
2069  * Early initialization pass for hardware IPs.  The hardware IPs that make
2070  * up each asic are discovered each IP's early_init callback is run.  This
2071  * is the first stage in initializing the asic.
2072  * Returns 0 on success, negative error code on failure.
2073  */
2074 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2075 {
2076 	struct drm_device *dev = adev_to_drm(adev);
2077 	struct pci_dev *parent;
2078 	int i, r;
2079 	bool total;
2080 
2081 	amdgpu_device_enable_virtual_display(adev);
2082 
2083 	if (amdgpu_sriov_vf(adev)) {
2084 		r = amdgpu_virt_request_full_gpu(adev, true);
2085 		if (r)
2086 			return r;
2087 	}
2088 
2089 	switch (adev->asic_type) {
2090 #ifdef CONFIG_DRM_AMDGPU_SI
2091 	case CHIP_VERDE:
2092 	case CHIP_TAHITI:
2093 	case CHIP_PITCAIRN:
2094 	case CHIP_OLAND:
2095 	case CHIP_HAINAN:
2096 		adev->family = AMDGPU_FAMILY_SI;
2097 		r = si_set_ip_blocks(adev);
2098 		if (r)
2099 			return r;
2100 		break;
2101 #endif
2102 #ifdef CONFIG_DRM_AMDGPU_CIK
2103 	case CHIP_BONAIRE:
2104 	case CHIP_HAWAII:
2105 	case CHIP_KAVERI:
2106 	case CHIP_KABINI:
2107 	case CHIP_MULLINS:
2108 		if (adev->flags & AMD_IS_APU)
2109 			adev->family = AMDGPU_FAMILY_KV;
2110 		else
2111 			adev->family = AMDGPU_FAMILY_CI;
2112 
2113 		r = cik_set_ip_blocks(adev);
2114 		if (r)
2115 			return r;
2116 		break;
2117 #endif
2118 	case CHIP_TOPAZ:
2119 	case CHIP_TONGA:
2120 	case CHIP_FIJI:
2121 	case CHIP_POLARIS10:
2122 	case CHIP_POLARIS11:
2123 	case CHIP_POLARIS12:
2124 	case CHIP_VEGAM:
2125 	case CHIP_CARRIZO:
2126 	case CHIP_STONEY:
2127 		if (adev->flags & AMD_IS_APU)
2128 			adev->family = AMDGPU_FAMILY_CZ;
2129 		else
2130 			adev->family = AMDGPU_FAMILY_VI;
2131 
2132 		r = vi_set_ip_blocks(adev);
2133 		if (r)
2134 			return r;
2135 		break;
2136 	default:
2137 		r = amdgpu_discovery_set_ip_blocks(adev);
2138 		if (r)
2139 			return r;
2140 		break;
2141 	}
2142 
2143 	if (amdgpu_has_atpx() &&
2144 	    (amdgpu_is_atpx_hybrid() ||
2145 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2146 	    ((adev->flags & AMD_IS_APU) == 0) &&
2147 	    !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2148 		adev->flags |= AMD_IS_PX;
2149 
2150 	if (!(adev->flags & AMD_IS_APU)) {
2151 		parent = pci_upstream_bridge(adev->pdev);
2152 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2153 	}
2154 
2155 	amdgpu_amdkfd_device_probe(adev);
2156 
2157 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2158 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2159 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2160 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2161 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2162 
2163 	total = true;
2164 	for (i = 0; i < adev->num_ip_blocks; i++) {
2165 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2166 			DRM_ERROR("disabled ip block: %d <%s>\n",
2167 				  i, adev->ip_blocks[i].version->funcs->name);
2168 			adev->ip_blocks[i].status.valid = false;
2169 		} else {
2170 			if (adev->ip_blocks[i].version->funcs->early_init) {
2171 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2172 				if (r == -ENOENT) {
2173 					adev->ip_blocks[i].status.valid = false;
2174 				} else if (r) {
2175 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2176 						  adev->ip_blocks[i].version->funcs->name, r);
2177 					total = false;
2178 				} else {
2179 					adev->ip_blocks[i].status.valid = true;
2180 				}
2181 			} else {
2182 				adev->ip_blocks[i].status.valid = true;
2183 			}
2184 		}
2185 		/* get the vbios after the asic_funcs are set up */
2186 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2187 			r = amdgpu_device_parse_gpu_info_fw(adev);
2188 			if (r)
2189 				return r;
2190 
2191 			/* Read BIOS */
2192 			if (!amdgpu_get_bios(adev))
2193 				return -EINVAL;
2194 
2195 			r = amdgpu_atombios_init(adev);
2196 			if (r) {
2197 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2198 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2199 				return r;
2200 			}
2201 
2202 			/*get pf2vf msg info at it's earliest time*/
2203 			if (amdgpu_sriov_vf(adev))
2204 				amdgpu_virt_init_data_exchange(adev);
2205 
2206 		}
2207 	}
2208 	if (!total)
2209 		return -ENODEV;
2210 
2211 	adev->cg_flags &= amdgpu_cg_mask;
2212 	adev->pg_flags &= amdgpu_pg_mask;
2213 
2214 	return 0;
2215 }
2216 
2217 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2218 {
2219 	int i, r;
2220 
2221 	for (i = 0; i < adev->num_ip_blocks; i++) {
2222 		if (!adev->ip_blocks[i].status.sw)
2223 			continue;
2224 		if (adev->ip_blocks[i].status.hw)
2225 			continue;
2226 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2227 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2228 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2229 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2230 			if (r) {
2231 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2232 					  adev->ip_blocks[i].version->funcs->name, r);
2233 				return r;
2234 			}
2235 			adev->ip_blocks[i].status.hw = true;
2236 		}
2237 	}
2238 
2239 	return 0;
2240 }
2241 
2242 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2243 {
2244 	int i, r;
2245 
2246 	for (i = 0; i < adev->num_ip_blocks; i++) {
2247 		if (!adev->ip_blocks[i].status.sw)
2248 			continue;
2249 		if (adev->ip_blocks[i].status.hw)
2250 			continue;
2251 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2252 		if (r) {
2253 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2254 				  adev->ip_blocks[i].version->funcs->name, r);
2255 			return r;
2256 		}
2257 		adev->ip_blocks[i].status.hw = true;
2258 	}
2259 
2260 	return 0;
2261 }
2262 
2263 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2264 {
2265 	int r = 0;
2266 	int i;
2267 	uint32_t smu_version;
2268 
2269 	if (adev->asic_type >= CHIP_VEGA10) {
2270 		for (i = 0; i < adev->num_ip_blocks; i++) {
2271 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2272 				continue;
2273 
2274 			if (!adev->ip_blocks[i].status.sw)
2275 				continue;
2276 
2277 			/* no need to do the fw loading again if already done*/
2278 			if (adev->ip_blocks[i].status.hw == true)
2279 				break;
2280 
2281 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2282 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2283 				if (r) {
2284 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2285 							  adev->ip_blocks[i].version->funcs->name, r);
2286 					return r;
2287 				}
2288 			} else {
2289 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2290 				if (r) {
2291 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2292 							  adev->ip_blocks[i].version->funcs->name, r);
2293 					return r;
2294 				}
2295 			}
2296 
2297 			adev->ip_blocks[i].status.hw = true;
2298 			break;
2299 		}
2300 	}
2301 
2302 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2303 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2304 
2305 	return r;
2306 }
2307 
2308 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2309 {
2310 	long timeout;
2311 	int r, i;
2312 
2313 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2314 		struct amdgpu_ring *ring = adev->rings[i];
2315 
2316 		/* No need to setup the GPU scheduler for rings that don't need it */
2317 		if (!ring || ring->no_scheduler)
2318 			continue;
2319 
2320 		switch (ring->funcs->type) {
2321 		case AMDGPU_RING_TYPE_GFX:
2322 			timeout = adev->gfx_timeout;
2323 			break;
2324 		case AMDGPU_RING_TYPE_COMPUTE:
2325 			timeout = adev->compute_timeout;
2326 			break;
2327 		case AMDGPU_RING_TYPE_SDMA:
2328 			timeout = adev->sdma_timeout;
2329 			break;
2330 		default:
2331 			timeout = adev->video_timeout;
2332 			break;
2333 		}
2334 
2335 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2336 				   ring->num_hw_submission, amdgpu_job_hang_limit,
2337 				   timeout, adev->reset_domain->wq,
2338 				   ring->sched_score, ring->name,
2339 				   adev->dev);
2340 		if (r) {
2341 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2342 				  ring->name);
2343 			return r;
2344 		}
2345 	}
2346 
2347 	return 0;
2348 }
2349 
2350 
2351 /**
2352  * amdgpu_device_ip_init - run init for hardware IPs
2353  *
2354  * @adev: amdgpu_device pointer
2355  *
2356  * Main initialization pass for hardware IPs.  The list of all the hardware
2357  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2358  * are run.  sw_init initializes the software state associated with each IP
2359  * and hw_init initializes the hardware associated with each IP.
2360  * Returns 0 on success, negative error code on failure.
2361  */
2362 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2363 {
2364 	int i, r;
2365 
2366 	r = amdgpu_ras_init(adev);
2367 	if (r)
2368 		return r;
2369 
2370 	for (i = 0; i < adev->num_ip_blocks; i++) {
2371 		if (!adev->ip_blocks[i].status.valid)
2372 			continue;
2373 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2374 		if (r) {
2375 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2376 				  adev->ip_blocks[i].version->funcs->name, r);
2377 			goto init_failed;
2378 		}
2379 		adev->ip_blocks[i].status.sw = true;
2380 
2381 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2382 			/* need to do common hw init early so everything is set up for gmc */
2383 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2384 			if (r) {
2385 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2386 				goto init_failed;
2387 			}
2388 			adev->ip_blocks[i].status.hw = true;
2389 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2390 			/* need to do gmc hw init early so we can allocate gpu mem */
2391 			/* Try to reserve bad pages early */
2392 			if (amdgpu_sriov_vf(adev))
2393 				amdgpu_virt_exchange_data(adev);
2394 
2395 			r = amdgpu_device_mem_scratch_init(adev);
2396 			if (r) {
2397 				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2398 				goto init_failed;
2399 			}
2400 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2401 			if (r) {
2402 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2403 				goto init_failed;
2404 			}
2405 			r = amdgpu_device_wb_init(adev);
2406 			if (r) {
2407 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2408 				goto init_failed;
2409 			}
2410 			adev->ip_blocks[i].status.hw = true;
2411 
2412 			/* right after GMC hw init, we create CSA */
2413 			if (amdgpu_mcbp) {
2414 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2415 							       AMDGPU_GEM_DOMAIN_VRAM |
2416 							       AMDGPU_GEM_DOMAIN_GTT,
2417 							       AMDGPU_CSA_SIZE);
2418 				if (r) {
2419 					DRM_ERROR("allocate CSA failed %d\n", r);
2420 					goto init_failed;
2421 				}
2422 			}
2423 		}
2424 	}
2425 
2426 	if (amdgpu_sriov_vf(adev))
2427 		amdgpu_virt_init_data_exchange(adev);
2428 
2429 	r = amdgpu_ib_pool_init(adev);
2430 	if (r) {
2431 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2432 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2433 		goto init_failed;
2434 	}
2435 
2436 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2437 	if (r)
2438 		goto init_failed;
2439 
2440 	r = amdgpu_device_ip_hw_init_phase1(adev);
2441 	if (r)
2442 		goto init_failed;
2443 
2444 	r = amdgpu_device_fw_loading(adev);
2445 	if (r)
2446 		goto init_failed;
2447 
2448 	r = amdgpu_device_ip_hw_init_phase2(adev);
2449 	if (r)
2450 		goto init_failed;
2451 
2452 	/*
2453 	 * retired pages will be loaded from eeprom and reserved here,
2454 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2455 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2456 	 * for I2C communication which only true at this point.
2457 	 *
2458 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2459 	 * failure from bad gpu situation and stop amdgpu init process
2460 	 * accordingly. For other failed cases, it will still release all
2461 	 * the resource and print error message, rather than returning one
2462 	 * negative value to upper level.
2463 	 *
2464 	 * Note: theoretically, this should be called before all vram allocations
2465 	 * to protect retired page from abusing
2466 	 */
2467 	r = amdgpu_ras_recovery_init(adev);
2468 	if (r)
2469 		goto init_failed;
2470 
2471 	/**
2472 	 * In case of XGMI grab extra reference for reset domain for this device
2473 	 */
2474 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2475 		if (amdgpu_xgmi_add_device(adev) == 0) {
2476 			if (!amdgpu_sriov_vf(adev)) {
2477 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2478 
2479 				if (WARN_ON(!hive)) {
2480 					r = -ENOENT;
2481 					goto init_failed;
2482 				}
2483 
2484 				if (!hive->reset_domain ||
2485 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2486 					r = -ENOENT;
2487 					amdgpu_put_xgmi_hive(hive);
2488 					goto init_failed;
2489 				}
2490 
2491 				/* Drop the early temporary reset domain we created for device */
2492 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2493 				adev->reset_domain = hive->reset_domain;
2494 				amdgpu_put_xgmi_hive(hive);
2495 			}
2496 		}
2497 	}
2498 
2499 	r = amdgpu_device_init_schedulers(adev);
2500 	if (r)
2501 		goto init_failed;
2502 
2503 	/* Don't init kfd if whole hive need to be reset during init */
2504 	if (!adev->gmc.xgmi.pending_reset)
2505 		amdgpu_amdkfd_device_init(adev);
2506 
2507 	amdgpu_fru_get_product_info(adev);
2508 
2509 init_failed:
2510 	if (amdgpu_sriov_vf(adev))
2511 		amdgpu_virt_release_full_gpu(adev, true);
2512 
2513 	return r;
2514 }
2515 
2516 /**
2517  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2518  *
2519  * @adev: amdgpu_device pointer
2520  *
2521  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2522  * this function before a GPU reset.  If the value is retained after a
2523  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2524  */
2525 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2526 {
2527 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2528 }
2529 
2530 /**
2531  * amdgpu_device_check_vram_lost - check if vram is valid
2532  *
2533  * @adev: amdgpu_device pointer
2534  *
2535  * Checks the reset magic value written to the gart pointer in VRAM.
2536  * The driver calls this after a GPU reset to see if the contents of
2537  * VRAM is lost or now.
2538  * returns true if vram is lost, false if not.
2539  */
2540 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2541 {
2542 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2543 			AMDGPU_RESET_MAGIC_NUM))
2544 		return true;
2545 
2546 	if (!amdgpu_in_reset(adev))
2547 		return false;
2548 
2549 	/*
2550 	 * For all ASICs with baco/mode1 reset, the VRAM is
2551 	 * always assumed to be lost.
2552 	 */
2553 	switch (amdgpu_asic_reset_method(adev)) {
2554 	case AMD_RESET_METHOD_BACO:
2555 	case AMD_RESET_METHOD_MODE1:
2556 		return true;
2557 	default:
2558 		return false;
2559 	}
2560 }
2561 
2562 /**
2563  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2564  *
2565  * @adev: amdgpu_device pointer
2566  * @state: clockgating state (gate or ungate)
2567  *
2568  * The list of all the hardware IPs that make up the asic is walked and the
2569  * set_clockgating_state callbacks are run.
2570  * Late initialization pass enabling clockgating for hardware IPs.
2571  * Fini or suspend, pass disabling clockgating for hardware IPs.
2572  * Returns 0 on success, negative error code on failure.
2573  */
2574 
2575 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2576 			       enum amd_clockgating_state state)
2577 {
2578 	int i, j, r;
2579 
2580 	if (amdgpu_emu_mode == 1)
2581 		return 0;
2582 
2583 	for (j = 0; j < adev->num_ip_blocks; j++) {
2584 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2585 		if (!adev->ip_blocks[i].status.late_initialized)
2586 			continue;
2587 		/* skip CG for GFX, SDMA on S0ix */
2588 		if (adev->in_s0ix &&
2589 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2590 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2591 			continue;
2592 		/* skip CG for VCE/UVD, it's handled specially */
2593 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2594 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2595 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2596 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2597 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2598 			/* enable clockgating to save power */
2599 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2600 										     state);
2601 			if (r) {
2602 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2603 					  adev->ip_blocks[i].version->funcs->name, r);
2604 				return r;
2605 			}
2606 		}
2607 	}
2608 
2609 	return 0;
2610 }
2611 
2612 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2613 			       enum amd_powergating_state state)
2614 {
2615 	int i, j, r;
2616 
2617 	if (amdgpu_emu_mode == 1)
2618 		return 0;
2619 
2620 	for (j = 0; j < adev->num_ip_blocks; j++) {
2621 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2622 		if (!adev->ip_blocks[i].status.late_initialized)
2623 			continue;
2624 		/* skip PG for GFX, SDMA on S0ix */
2625 		if (adev->in_s0ix &&
2626 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2627 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2628 			continue;
2629 		/* skip CG for VCE/UVD, it's handled specially */
2630 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2631 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2632 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2633 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2634 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2635 			/* enable powergating to save power */
2636 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2637 											state);
2638 			if (r) {
2639 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2640 					  adev->ip_blocks[i].version->funcs->name, r);
2641 				return r;
2642 			}
2643 		}
2644 	}
2645 	return 0;
2646 }
2647 
2648 static int amdgpu_device_enable_mgpu_fan_boost(void)
2649 {
2650 	struct amdgpu_gpu_instance *gpu_ins;
2651 	struct amdgpu_device *adev;
2652 	int i, ret = 0;
2653 
2654 	mutex_lock(&mgpu_info.mutex);
2655 
2656 	/*
2657 	 * MGPU fan boost feature should be enabled
2658 	 * only when there are two or more dGPUs in
2659 	 * the system
2660 	 */
2661 	if (mgpu_info.num_dgpu < 2)
2662 		goto out;
2663 
2664 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2665 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2666 		adev = gpu_ins->adev;
2667 		if (!(adev->flags & AMD_IS_APU) &&
2668 		    !gpu_ins->mgpu_fan_enabled) {
2669 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2670 			if (ret)
2671 				break;
2672 
2673 			gpu_ins->mgpu_fan_enabled = 1;
2674 		}
2675 	}
2676 
2677 out:
2678 	mutex_unlock(&mgpu_info.mutex);
2679 
2680 	return ret;
2681 }
2682 
2683 /**
2684  * amdgpu_device_ip_late_init - run late init for hardware IPs
2685  *
2686  * @adev: amdgpu_device pointer
2687  *
2688  * Late initialization pass for hardware IPs.  The list of all the hardware
2689  * IPs that make up the asic is walked and the late_init callbacks are run.
2690  * late_init covers any special initialization that an IP requires
2691  * after all of the have been initialized or something that needs to happen
2692  * late in the init process.
2693  * Returns 0 on success, negative error code on failure.
2694  */
2695 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2696 {
2697 	struct amdgpu_gpu_instance *gpu_instance;
2698 	int i = 0, r;
2699 
2700 	for (i = 0; i < adev->num_ip_blocks; i++) {
2701 		if (!adev->ip_blocks[i].status.hw)
2702 			continue;
2703 		if (adev->ip_blocks[i].version->funcs->late_init) {
2704 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2705 			if (r) {
2706 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
2707 					  adev->ip_blocks[i].version->funcs->name, r);
2708 				return r;
2709 			}
2710 		}
2711 		adev->ip_blocks[i].status.late_initialized = true;
2712 	}
2713 
2714 	r = amdgpu_ras_late_init(adev);
2715 	if (r) {
2716 		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2717 		return r;
2718 	}
2719 
2720 	amdgpu_ras_set_error_query_ready(adev, true);
2721 
2722 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2723 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2724 
2725 	amdgpu_device_fill_reset_magic(adev);
2726 
2727 	r = amdgpu_device_enable_mgpu_fan_boost();
2728 	if (r)
2729 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2730 
2731 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2732 	if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2733 			       adev->asic_type == CHIP_ALDEBARAN ))
2734 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
2735 
2736 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2737 		mutex_lock(&mgpu_info.mutex);
2738 
2739 		/*
2740 		 * Reset device p-state to low as this was booted with high.
2741 		 *
2742 		 * This should be performed only after all devices from the same
2743 		 * hive get initialized.
2744 		 *
2745 		 * However, it's unknown how many device in the hive in advance.
2746 		 * As this is counted one by one during devices initializations.
2747 		 *
2748 		 * So, we wait for all XGMI interlinked devices initialized.
2749 		 * This may bring some delays as those devices may come from
2750 		 * different hives. But that should be OK.
2751 		 */
2752 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2753 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2754 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2755 				if (gpu_instance->adev->flags & AMD_IS_APU)
2756 					continue;
2757 
2758 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2759 						AMDGPU_XGMI_PSTATE_MIN);
2760 				if (r) {
2761 					DRM_ERROR("pstate setting failed (%d).\n", r);
2762 					break;
2763 				}
2764 			}
2765 		}
2766 
2767 		mutex_unlock(&mgpu_info.mutex);
2768 	}
2769 
2770 	return 0;
2771 }
2772 
2773 /**
2774  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2775  *
2776  * @adev: amdgpu_device pointer
2777  *
2778  * For ASICs need to disable SMC first
2779  */
2780 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2781 {
2782 	int i, r;
2783 
2784 	if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2785 		return;
2786 
2787 	for (i = 0; i < adev->num_ip_blocks; i++) {
2788 		if (!adev->ip_blocks[i].status.hw)
2789 			continue;
2790 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2791 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2792 			/* XXX handle errors */
2793 			if (r) {
2794 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2795 					  adev->ip_blocks[i].version->funcs->name, r);
2796 			}
2797 			adev->ip_blocks[i].status.hw = false;
2798 			break;
2799 		}
2800 	}
2801 }
2802 
2803 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2804 {
2805 	int i, r;
2806 
2807 	for (i = 0; i < adev->num_ip_blocks; i++) {
2808 		if (!adev->ip_blocks[i].version->funcs->early_fini)
2809 			continue;
2810 
2811 		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2812 		if (r) {
2813 			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2814 				  adev->ip_blocks[i].version->funcs->name, r);
2815 		}
2816 	}
2817 
2818 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2819 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2820 
2821 	amdgpu_amdkfd_suspend(adev, false);
2822 
2823 	/* Workaroud for ASICs need to disable SMC first */
2824 	amdgpu_device_smu_fini_early(adev);
2825 
2826 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2827 		if (!adev->ip_blocks[i].status.hw)
2828 			continue;
2829 
2830 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2831 		/* XXX handle errors */
2832 		if (r) {
2833 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2834 				  adev->ip_blocks[i].version->funcs->name, r);
2835 		}
2836 
2837 		adev->ip_blocks[i].status.hw = false;
2838 	}
2839 
2840 	if (amdgpu_sriov_vf(adev)) {
2841 		if (amdgpu_virt_release_full_gpu(adev, false))
2842 			DRM_ERROR("failed to release exclusive mode on fini\n");
2843 	}
2844 
2845 	return 0;
2846 }
2847 
2848 /**
2849  * amdgpu_device_ip_fini - run fini for hardware IPs
2850  *
2851  * @adev: amdgpu_device pointer
2852  *
2853  * Main teardown pass for hardware IPs.  The list of all the hardware
2854  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2855  * are run.  hw_fini tears down the hardware associated with each IP
2856  * and sw_fini tears down any software state associated with each IP.
2857  * Returns 0 on success, negative error code on failure.
2858  */
2859 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2860 {
2861 	int i, r;
2862 
2863 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2864 		amdgpu_virt_release_ras_err_handler_data(adev);
2865 
2866 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2867 		amdgpu_xgmi_remove_device(adev);
2868 
2869 	amdgpu_amdkfd_device_fini_sw(adev);
2870 
2871 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2872 		if (!adev->ip_blocks[i].status.sw)
2873 			continue;
2874 
2875 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2876 			amdgpu_ucode_free_bo(adev);
2877 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2878 			amdgpu_device_wb_fini(adev);
2879 			amdgpu_device_mem_scratch_fini(adev);
2880 			amdgpu_ib_pool_fini(adev);
2881 		}
2882 
2883 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2884 		/* XXX handle errors */
2885 		if (r) {
2886 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2887 				  adev->ip_blocks[i].version->funcs->name, r);
2888 		}
2889 		adev->ip_blocks[i].status.sw = false;
2890 		adev->ip_blocks[i].status.valid = false;
2891 	}
2892 
2893 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2894 		if (!adev->ip_blocks[i].status.late_initialized)
2895 			continue;
2896 		if (adev->ip_blocks[i].version->funcs->late_fini)
2897 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2898 		adev->ip_blocks[i].status.late_initialized = false;
2899 	}
2900 
2901 	amdgpu_ras_fini(adev);
2902 
2903 	return 0;
2904 }
2905 
2906 /**
2907  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2908  *
2909  * @work: work_struct.
2910  */
2911 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2912 {
2913 	struct amdgpu_device *adev =
2914 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2915 	int r;
2916 
2917 	r = amdgpu_ib_ring_tests(adev);
2918 	if (r)
2919 		DRM_ERROR("ib ring test failed (%d).\n", r);
2920 }
2921 
2922 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2923 {
2924 	struct amdgpu_device *adev =
2925 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2926 
2927 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
2928 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2929 
2930 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2931 		adev->gfx.gfx_off_state = true;
2932 }
2933 
2934 /**
2935  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2936  *
2937  * @adev: amdgpu_device pointer
2938  *
2939  * Main suspend function for hardware IPs.  The list of all the hardware
2940  * IPs that make up the asic is walked, clockgating is disabled and the
2941  * suspend callbacks are run.  suspend puts the hardware and software state
2942  * in each IP into a state suitable for suspend.
2943  * Returns 0 on success, negative error code on failure.
2944  */
2945 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2946 {
2947 	int i, r;
2948 
2949 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2950 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2951 
2952 	/*
2953 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
2954 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2955 	 * scenario. Add the missing df cstate disablement here.
2956 	 */
2957 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2958 		dev_warn(adev->dev, "Failed to disallow df cstate");
2959 
2960 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2961 		if (!adev->ip_blocks[i].status.valid)
2962 			continue;
2963 
2964 		/* displays are handled separately */
2965 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2966 			continue;
2967 
2968 		/* XXX handle errors */
2969 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2970 		/* XXX handle errors */
2971 		if (r) {
2972 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2973 				  adev->ip_blocks[i].version->funcs->name, r);
2974 			return r;
2975 		}
2976 
2977 		adev->ip_blocks[i].status.hw = false;
2978 	}
2979 
2980 	return 0;
2981 }
2982 
2983 /**
2984  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2985  *
2986  * @adev: amdgpu_device pointer
2987  *
2988  * Main suspend function for hardware IPs.  The list of all the hardware
2989  * IPs that make up the asic is walked, clockgating is disabled and the
2990  * suspend callbacks are run.  suspend puts the hardware and software state
2991  * in each IP into a state suitable for suspend.
2992  * Returns 0 on success, negative error code on failure.
2993  */
2994 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2995 {
2996 	int i, r;
2997 
2998 	if (adev->in_s0ix)
2999 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3000 
3001 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3002 		if (!adev->ip_blocks[i].status.valid)
3003 			continue;
3004 		/* displays are handled in phase1 */
3005 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3006 			continue;
3007 		/* PSP lost connection when err_event_athub occurs */
3008 		if (amdgpu_ras_intr_triggered() &&
3009 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3010 			adev->ip_blocks[i].status.hw = false;
3011 			continue;
3012 		}
3013 
3014 		/* skip unnecessary suspend if we do not initialize them yet */
3015 		if (adev->gmc.xgmi.pending_reset &&
3016 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3017 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3018 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3019 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3020 			adev->ip_blocks[i].status.hw = false;
3021 			continue;
3022 		}
3023 
3024 		/* skip suspend of gfx/mes and psp for S0ix
3025 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3026 		 * like at runtime. PSP is also part of the always on hardware
3027 		 * so no need to suspend it.
3028 		 */
3029 		if (adev->in_s0ix &&
3030 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3031 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3032 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3033 			continue;
3034 
3035 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3036 		if (adev->in_s0ix &&
3037 		    (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3038 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3039 			continue;
3040 
3041 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3042 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3043 		 * from this location and RLC Autoload automatically also gets loaded
3044 		 * from here based on PMFW -> PSP message during re-init sequence.
3045 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3046 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3047 		 */
3048 		if (amdgpu_in_reset(adev) &&
3049 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3050 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3051 			continue;
3052 
3053 		/* XXX handle errors */
3054 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3055 		/* XXX handle errors */
3056 		if (r) {
3057 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3058 				  adev->ip_blocks[i].version->funcs->name, r);
3059 		}
3060 		adev->ip_blocks[i].status.hw = false;
3061 		/* handle putting the SMC in the appropriate state */
3062 		if(!amdgpu_sriov_vf(adev)){
3063 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3064 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3065 				if (r) {
3066 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3067 							adev->mp1_state, r);
3068 					return r;
3069 				}
3070 			}
3071 		}
3072 	}
3073 
3074 	return 0;
3075 }
3076 
3077 /**
3078  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3079  *
3080  * @adev: amdgpu_device pointer
3081  *
3082  * Main suspend function for hardware IPs.  The list of all the hardware
3083  * IPs that make up the asic is walked, clockgating is disabled and the
3084  * suspend callbacks are run.  suspend puts the hardware and software state
3085  * in each IP into a state suitable for suspend.
3086  * Returns 0 on success, negative error code on failure.
3087  */
3088 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3089 {
3090 	int r;
3091 
3092 	if (amdgpu_sriov_vf(adev)) {
3093 		amdgpu_virt_fini_data_exchange(adev);
3094 		amdgpu_virt_request_full_gpu(adev, false);
3095 	}
3096 
3097 	r = amdgpu_device_ip_suspend_phase1(adev);
3098 	if (r)
3099 		return r;
3100 	r = amdgpu_device_ip_suspend_phase2(adev);
3101 
3102 	if (amdgpu_sriov_vf(adev))
3103 		amdgpu_virt_release_full_gpu(adev, false);
3104 
3105 	return r;
3106 }
3107 
3108 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3109 {
3110 	int i, r;
3111 
3112 	static enum amd_ip_block_type ip_order[] = {
3113 		AMD_IP_BLOCK_TYPE_COMMON,
3114 		AMD_IP_BLOCK_TYPE_GMC,
3115 		AMD_IP_BLOCK_TYPE_PSP,
3116 		AMD_IP_BLOCK_TYPE_IH,
3117 	};
3118 
3119 	for (i = 0; i < adev->num_ip_blocks; i++) {
3120 		int j;
3121 		struct amdgpu_ip_block *block;
3122 
3123 		block = &adev->ip_blocks[i];
3124 		block->status.hw = false;
3125 
3126 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3127 
3128 			if (block->version->type != ip_order[j] ||
3129 				!block->status.valid)
3130 				continue;
3131 
3132 			r = block->version->funcs->hw_init(adev);
3133 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3134 			if (r)
3135 				return r;
3136 			block->status.hw = true;
3137 		}
3138 	}
3139 
3140 	return 0;
3141 }
3142 
3143 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3144 {
3145 	int i, r;
3146 
3147 	static enum amd_ip_block_type ip_order[] = {
3148 		AMD_IP_BLOCK_TYPE_SMC,
3149 		AMD_IP_BLOCK_TYPE_DCE,
3150 		AMD_IP_BLOCK_TYPE_GFX,
3151 		AMD_IP_BLOCK_TYPE_SDMA,
3152 		AMD_IP_BLOCK_TYPE_UVD,
3153 		AMD_IP_BLOCK_TYPE_VCE,
3154 		AMD_IP_BLOCK_TYPE_VCN
3155 	};
3156 
3157 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3158 		int j;
3159 		struct amdgpu_ip_block *block;
3160 
3161 		for (j = 0; j < adev->num_ip_blocks; j++) {
3162 			block = &adev->ip_blocks[j];
3163 
3164 			if (block->version->type != ip_order[i] ||
3165 				!block->status.valid ||
3166 				block->status.hw)
3167 				continue;
3168 
3169 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3170 				r = block->version->funcs->resume(adev);
3171 			else
3172 				r = block->version->funcs->hw_init(adev);
3173 
3174 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3175 			if (r)
3176 				return r;
3177 			block->status.hw = true;
3178 		}
3179 	}
3180 
3181 	return 0;
3182 }
3183 
3184 /**
3185  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3186  *
3187  * @adev: amdgpu_device pointer
3188  *
3189  * First resume function for hardware IPs.  The list of all the hardware
3190  * IPs that make up the asic is walked and the resume callbacks are run for
3191  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3192  * after a suspend and updates the software state as necessary.  This
3193  * function is also used for restoring the GPU after a GPU reset.
3194  * Returns 0 on success, negative error code on failure.
3195  */
3196 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3197 {
3198 	int i, r;
3199 
3200 	for (i = 0; i < adev->num_ip_blocks; i++) {
3201 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3202 			continue;
3203 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3204 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3205 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3206 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3207 
3208 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3209 			if (r) {
3210 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3211 					  adev->ip_blocks[i].version->funcs->name, r);
3212 				return r;
3213 			}
3214 			adev->ip_blocks[i].status.hw = true;
3215 		}
3216 	}
3217 
3218 	return 0;
3219 }
3220 
3221 /**
3222  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3223  *
3224  * @adev: amdgpu_device pointer
3225  *
3226  * First resume function for hardware IPs.  The list of all the hardware
3227  * IPs that make up the asic is walked and the resume callbacks are run for
3228  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3229  * functional state after a suspend and updates the software state as
3230  * necessary.  This function is also used for restoring the GPU after a GPU
3231  * reset.
3232  * Returns 0 on success, negative error code on failure.
3233  */
3234 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3235 {
3236 	int i, r;
3237 
3238 	for (i = 0; i < adev->num_ip_blocks; i++) {
3239 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3240 			continue;
3241 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3242 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3243 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3244 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3245 			continue;
3246 		r = adev->ip_blocks[i].version->funcs->resume(adev);
3247 		if (r) {
3248 			DRM_ERROR("resume of IP block <%s> failed %d\n",
3249 				  adev->ip_blocks[i].version->funcs->name, r);
3250 			return r;
3251 		}
3252 		adev->ip_blocks[i].status.hw = true;
3253 	}
3254 
3255 	return 0;
3256 }
3257 
3258 /**
3259  * amdgpu_device_ip_resume - run resume for hardware IPs
3260  *
3261  * @adev: amdgpu_device pointer
3262  *
3263  * Main resume function for hardware IPs.  The hardware IPs
3264  * are split into two resume functions because they are
3265  * are also used in in recovering from a GPU reset and some additional
3266  * steps need to be take between them.  In this case (S3/S4) they are
3267  * run sequentially.
3268  * Returns 0 on success, negative error code on failure.
3269  */
3270 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3271 {
3272 	int r;
3273 
3274 	r = amdgpu_amdkfd_resume_iommu(adev);
3275 	if (r)
3276 		return r;
3277 
3278 	r = amdgpu_device_ip_resume_phase1(adev);
3279 	if (r)
3280 		return r;
3281 
3282 	r = amdgpu_device_fw_loading(adev);
3283 	if (r)
3284 		return r;
3285 
3286 	r = amdgpu_device_ip_resume_phase2(adev);
3287 
3288 	return r;
3289 }
3290 
3291 /**
3292  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3293  *
3294  * @adev: amdgpu_device pointer
3295  *
3296  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3297  */
3298 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3299 {
3300 	if (amdgpu_sriov_vf(adev)) {
3301 		if (adev->is_atom_fw) {
3302 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3303 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3304 		} else {
3305 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3306 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3307 		}
3308 
3309 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3310 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3311 	}
3312 }
3313 
3314 /**
3315  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3316  *
3317  * @asic_type: AMD asic type
3318  *
3319  * Check if there is DC (new modesetting infrastructre) support for an asic.
3320  * returns true if DC has support, false if not.
3321  */
3322 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3323 {
3324 	switch (asic_type) {
3325 #ifdef CONFIG_DRM_AMDGPU_SI
3326 	case CHIP_HAINAN:
3327 #endif
3328 	case CHIP_TOPAZ:
3329 		/* chips with no display hardware */
3330 		return false;
3331 #if defined(CONFIG_DRM_AMD_DC)
3332 	case CHIP_TAHITI:
3333 	case CHIP_PITCAIRN:
3334 	case CHIP_VERDE:
3335 	case CHIP_OLAND:
3336 		/*
3337 		 * We have systems in the wild with these ASICs that require
3338 		 * LVDS and VGA support which is not supported with DC.
3339 		 *
3340 		 * Fallback to the non-DC driver here by default so as not to
3341 		 * cause regressions.
3342 		 */
3343 #if defined(CONFIG_DRM_AMD_DC_SI)
3344 		return amdgpu_dc > 0;
3345 #else
3346 		return false;
3347 #endif
3348 	case CHIP_BONAIRE:
3349 	case CHIP_KAVERI:
3350 	case CHIP_KABINI:
3351 	case CHIP_MULLINS:
3352 		/*
3353 		 * We have systems in the wild with these ASICs that require
3354 		 * VGA support which is not supported with DC.
3355 		 *
3356 		 * Fallback to the non-DC driver here by default so as not to
3357 		 * cause regressions.
3358 		 */
3359 		return amdgpu_dc > 0;
3360 	default:
3361 		return amdgpu_dc != 0;
3362 #else
3363 	default:
3364 		if (amdgpu_dc > 0)
3365 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
3366 					 "but isn't supported by ASIC, ignoring\n");
3367 		return false;
3368 #endif
3369 	}
3370 }
3371 
3372 /**
3373  * amdgpu_device_has_dc_support - check if dc is supported
3374  *
3375  * @adev: amdgpu_device pointer
3376  *
3377  * Returns true for supported, false for not supported
3378  */
3379 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3380 {
3381 	if (adev->enable_virtual_display ||
3382 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3383 		return false;
3384 
3385 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3386 }
3387 
3388 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3389 {
3390 	struct amdgpu_device *adev =
3391 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3392 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3393 
3394 	/* It's a bug to not have a hive within this function */
3395 	if (WARN_ON(!hive))
3396 		return;
3397 
3398 	/*
3399 	 * Use task barrier to synchronize all xgmi reset works across the
3400 	 * hive. task_barrier_enter and task_barrier_exit will block
3401 	 * until all the threads running the xgmi reset works reach
3402 	 * those points. task_barrier_full will do both blocks.
3403 	 */
3404 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3405 
3406 		task_barrier_enter(&hive->tb);
3407 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3408 
3409 		if (adev->asic_reset_res)
3410 			goto fail;
3411 
3412 		task_barrier_exit(&hive->tb);
3413 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3414 
3415 		if (adev->asic_reset_res)
3416 			goto fail;
3417 
3418 		if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3419 		    adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3420 			adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
3421 	} else {
3422 
3423 		task_barrier_full(&hive->tb);
3424 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3425 	}
3426 
3427 fail:
3428 	if (adev->asic_reset_res)
3429 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3430 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3431 	amdgpu_put_xgmi_hive(hive);
3432 }
3433 
3434 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3435 {
3436 	char *input = amdgpu_lockup_timeout;
3437 	char *timeout_setting = NULL;
3438 	int index = 0;
3439 	long timeout;
3440 	int ret = 0;
3441 
3442 	/*
3443 	 * By default timeout for non compute jobs is 10000
3444 	 * and 60000 for compute jobs.
3445 	 * In SR-IOV or passthrough mode, timeout for compute
3446 	 * jobs are 60000 by default.
3447 	 */
3448 	adev->gfx_timeout = msecs_to_jiffies(10000);
3449 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3450 	if (amdgpu_sriov_vf(adev))
3451 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3452 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3453 	else
3454 		adev->compute_timeout =  msecs_to_jiffies(60000);
3455 
3456 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3457 		while ((timeout_setting = strsep(&input, ",")) &&
3458 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3459 			ret = kstrtol(timeout_setting, 0, &timeout);
3460 			if (ret)
3461 				return ret;
3462 
3463 			if (timeout == 0) {
3464 				index++;
3465 				continue;
3466 			} else if (timeout < 0) {
3467 				timeout = MAX_SCHEDULE_TIMEOUT;
3468 				dev_warn(adev->dev, "lockup timeout disabled");
3469 				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3470 			} else {
3471 				timeout = msecs_to_jiffies(timeout);
3472 			}
3473 
3474 			switch (index++) {
3475 			case 0:
3476 				adev->gfx_timeout = timeout;
3477 				break;
3478 			case 1:
3479 				adev->compute_timeout = timeout;
3480 				break;
3481 			case 2:
3482 				adev->sdma_timeout = timeout;
3483 				break;
3484 			case 3:
3485 				adev->video_timeout = timeout;
3486 				break;
3487 			default:
3488 				break;
3489 			}
3490 		}
3491 		/*
3492 		 * There is only one value specified and
3493 		 * it should apply to all non-compute jobs.
3494 		 */
3495 		if (index == 1) {
3496 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3497 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3498 				adev->compute_timeout = adev->gfx_timeout;
3499 		}
3500 	}
3501 
3502 	return ret;
3503 }
3504 
3505 /**
3506  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3507  *
3508  * @adev: amdgpu_device pointer
3509  *
3510  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3511  */
3512 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3513 {
3514 	struct iommu_domain *domain;
3515 
3516 	domain = iommu_get_domain_for_dev(adev->dev);
3517 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3518 		adev->ram_is_direct_mapped = true;
3519 }
3520 
3521 static const struct attribute *amdgpu_dev_attributes[] = {
3522 	&dev_attr_product_name.attr,
3523 	&dev_attr_product_number.attr,
3524 	&dev_attr_serial_number.attr,
3525 	&dev_attr_pcie_replay_count.attr,
3526 	NULL
3527 };
3528 
3529 /**
3530  * amdgpu_device_init - initialize the driver
3531  *
3532  * @adev: amdgpu_device pointer
3533  * @flags: driver flags
3534  *
3535  * Initializes the driver info and hw (all asics).
3536  * Returns 0 for success or an error on failure.
3537  * Called at driver startup.
3538  */
3539 int amdgpu_device_init(struct amdgpu_device *adev,
3540 		       uint32_t flags)
3541 {
3542 	struct drm_device *ddev = adev_to_drm(adev);
3543 	struct pci_dev *pdev = adev->pdev;
3544 	int r, i;
3545 	bool px = false;
3546 	u32 max_MBps;
3547 
3548 	adev->shutdown = false;
3549 	adev->flags = flags;
3550 
3551 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3552 		adev->asic_type = amdgpu_force_asic_type;
3553 	else
3554 		adev->asic_type = flags & AMD_ASIC_MASK;
3555 
3556 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3557 	if (amdgpu_emu_mode == 1)
3558 		adev->usec_timeout *= 10;
3559 	adev->gmc.gart_size = 512 * 1024 * 1024;
3560 	adev->accel_working = false;
3561 	adev->num_rings = 0;
3562 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3563 	adev->mman.buffer_funcs = NULL;
3564 	adev->mman.buffer_funcs_ring = NULL;
3565 	adev->vm_manager.vm_pte_funcs = NULL;
3566 	adev->vm_manager.vm_pte_num_scheds = 0;
3567 	adev->gmc.gmc_funcs = NULL;
3568 	adev->harvest_ip_mask = 0x0;
3569 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3570 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3571 
3572 	adev->smc_rreg = &amdgpu_invalid_rreg;
3573 	adev->smc_wreg = &amdgpu_invalid_wreg;
3574 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3575 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3576 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3577 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3578 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3579 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3580 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3581 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3582 	adev->didt_rreg = &amdgpu_invalid_rreg;
3583 	adev->didt_wreg = &amdgpu_invalid_wreg;
3584 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3585 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3586 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3587 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3588 
3589 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3590 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3591 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3592 
3593 	/* mutex initialization are all done here so we
3594 	 * can recall function without having locking issues */
3595 	mutex_init(&adev->firmware.mutex);
3596 	mutex_init(&adev->pm.mutex);
3597 	mutex_init(&adev->gfx.gpu_clock_mutex);
3598 	mutex_init(&adev->srbm_mutex);
3599 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3600 	mutex_init(&adev->gfx.gfx_off_mutex);
3601 	mutex_init(&adev->grbm_idx_mutex);
3602 	mutex_init(&adev->mn_lock);
3603 	mutex_init(&adev->virt.vf_errors.lock);
3604 	hash_init(adev->mn_hash);
3605 	mutex_init(&adev->psp.mutex);
3606 	mutex_init(&adev->notifier_lock);
3607 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3608 	mutex_init(&adev->benchmark_mutex);
3609 
3610 	amdgpu_device_init_apu_flags(adev);
3611 
3612 	r = amdgpu_device_check_arguments(adev);
3613 	if (r)
3614 		return r;
3615 
3616 	spin_lock_init(&adev->mmio_idx_lock);
3617 	spin_lock_init(&adev->smc_idx_lock);
3618 	spin_lock_init(&adev->pcie_idx_lock);
3619 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3620 	spin_lock_init(&adev->didt_idx_lock);
3621 	spin_lock_init(&adev->gc_cac_idx_lock);
3622 	spin_lock_init(&adev->se_cac_idx_lock);
3623 	spin_lock_init(&adev->audio_endpt_idx_lock);
3624 	spin_lock_init(&adev->mm_stats.lock);
3625 
3626 	INIT_LIST_HEAD(&adev->shadow_list);
3627 	mutex_init(&adev->shadow_list_lock);
3628 
3629 	INIT_LIST_HEAD(&adev->reset_list);
3630 
3631 	INIT_LIST_HEAD(&adev->ras_list);
3632 
3633 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3634 			  amdgpu_device_delayed_init_work_handler);
3635 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3636 			  amdgpu_device_delay_enable_gfx_off);
3637 
3638 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3639 
3640 	adev->gfx.gfx_off_req_count = 1;
3641 	adev->gfx.gfx_off_residency = 0;
3642 	adev->gfx.gfx_off_entrycount = 0;
3643 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3644 
3645 	atomic_set(&adev->throttling_logging_enabled, 1);
3646 	/*
3647 	 * If throttling continues, logging will be performed every minute
3648 	 * to avoid log flooding. "-1" is subtracted since the thermal
3649 	 * throttling interrupt comes every second. Thus, the total logging
3650 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3651 	 * for throttling interrupt) = 60 seconds.
3652 	 */
3653 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3654 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3655 
3656 	/* Registers mapping */
3657 	/* TODO: block userspace mapping of io register */
3658 	if (adev->asic_type >= CHIP_BONAIRE) {
3659 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3660 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3661 	} else {
3662 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3663 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3664 	}
3665 
3666 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3667 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3668 
3669 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3670 	if (adev->rmmio == NULL) {
3671 		return -ENOMEM;
3672 	}
3673 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3674 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3675 
3676 	amdgpu_device_get_pcie_info(adev);
3677 
3678 	if (amdgpu_mcbp)
3679 		DRM_INFO("MCBP is enabled\n");
3680 
3681 	/*
3682 	 * Reset domain needs to be present early, before XGMI hive discovered
3683 	 * (if any) and intitialized to use reset sem and in_gpu reset flag
3684 	 * early on during init and before calling to RREG32.
3685 	 */
3686 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3687 	if (!adev->reset_domain)
3688 		return -ENOMEM;
3689 
3690 	/* detect hw virtualization here */
3691 	amdgpu_detect_virtualization(adev);
3692 
3693 	r = amdgpu_device_get_job_timeout_settings(adev);
3694 	if (r) {
3695 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3696 		return r;
3697 	}
3698 
3699 	/* early init functions */
3700 	r = amdgpu_device_ip_early_init(adev);
3701 	if (r)
3702 		return r;
3703 
3704 	/* Get rid of things like offb */
3705 	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3706 	if (r)
3707 		return r;
3708 
3709 	/* Enable TMZ based on IP_VERSION */
3710 	amdgpu_gmc_tmz_set(adev);
3711 
3712 	amdgpu_gmc_noretry_set(adev);
3713 	/* Need to get xgmi info early to decide the reset behavior*/
3714 	if (adev->gmc.xgmi.supported) {
3715 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
3716 		if (r)
3717 			return r;
3718 	}
3719 
3720 	/* enable PCIE atomic ops */
3721 	if (amdgpu_sriov_vf(adev))
3722 		adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3723 			adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3724 			(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3725 	else
3726 		adev->have_atomics_support =
3727 			!pci_enable_atomic_ops_to_root(adev->pdev,
3728 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3729 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3730 	if (!adev->have_atomics_support)
3731 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3732 
3733 	/* doorbell bar mapping and doorbell index init*/
3734 	amdgpu_device_doorbell_init(adev);
3735 
3736 	if (amdgpu_emu_mode == 1) {
3737 		/* post the asic on emulation mode */
3738 		emu_soc_asic_init(adev);
3739 		goto fence_driver_init;
3740 	}
3741 
3742 	amdgpu_reset_init(adev);
3743 
3744 	/* detect if we are with an SRIOV vbios */
3745 	amdgpu_device_detect_sriov_bios(adev);
3746 
3747 	/* check if we need to reset the asic
3748 	 *  E.g., driver was not cleanly unloaded previously, etc.
3749 	 */
3750 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3751 		if (adev->gmc.xgmi.num_physical_nodes) {
3752 			dev_info(adev->dev, "Pending hive reset.\n");
3753 			adev->gmc.xgmi.pending_reset = true;
3754 			/* Only need to init necessary block for SMU to handle the reset */
3755 			for (i = 0; i < adev->num_ip_blocks; i++) {
3756 				if (!adev->ip_blocks[i].status.valid)
3757 					continue;
3758 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3759 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3760 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3761 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
3762 					DRM_DEBUG("IP %s disabled for hw_init.\n",
3763 						adev->ip_blocks[i].version->funcs->name);
3764 					adev->ip_blocks[i].status.hw = true;
3765 				}
3766 			}
3767 		} else {
3768 			r = amdgpu_asic_reset(adev);
3769 			if (r) {
3770 				dev_err(adev->dev, "asic reset on init failed\n");
3771 				goto failed;
3772 			}
3773 		}
3774 	}
3775 
3776 	pci_enable_pcie_error_reporting(adev->pdev);
3777 
3778 	/* Post card if necessary */
3779 	if (amdgpu_device_need_post(adev)) {
3780 		if (!adev->bios) {
3781 			dev_err(adev->dev, "no vBIOS found\n");
3782 			r = -EINVAL;
3783 			goto failed;
3784 		}
3785 		DRM_INFO("GPU posting now...\n");
3786 		r = amdgpu_device_asic_init(adev);
3787 		if (r) {
3788 			dev_err(adev->dev, "gpu post error!\n");
3789 			goto failed;
3790 		}
3791 	}
3792 
3793 	if (adev->is_atom_fw) {
3794 		/* Initialize clocks */
3795 		r = amdgpu_atomfirmware_get_clock_info(adev);
3796 		if (r) {
3797 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3798 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3799 			goto failed;
3800 		}
3801 	} else {
3802 		/* Initialize clocks */
3803 		r = amdgpu_atombios_get_clock_info(adev);
3804 		if (r) {
3805 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3806 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3807 			goto failed;
3808 		}
3809 		/* init i2c buses */
3810 		if (!amdgpu_device_has_dc_support(adev))
3811 			amdgpu_atombios_i2c_init(adev);
3812 	}
3813 
3814 fence_driver_init:
3815 	/* Fence driver */
3816 	r = amdgpu_fence_driver_sw_init(adev);
3817 	if (r) {
3818 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
3819 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3820 		goto failed;
3821 	}
3822 
3823 	/* init the mode config */
3824 	drm_mode_config_init(adev_to_drm(adev));
3825 
3826 	r = amdgpu_device_ip_init(adev);
3827 	if (r) {
3828 		/* failed in exclusive mode due to timeout */
3829 		if (amdgpu_sriov_vf(adev) &&
3830 		    !amdgpu_sriov_runtime(adev) &&
3831 		    amdgpu_virt_mmio_blocked(adev) &&
3832 		    !amdgpu_virt_wait_reset(adev)) {
3833 			dev_err(adev->dev, "VF exclusive mode timeout\n");
3834 			/* Don't send request since VF is inactive. */
3835 			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3836 			adev->virt.ops = NULL;
3837 			r = -EAGAIN;
3838 			goto release_ras_con;
3839 		}
3840 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3841 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3842 		goto release_ras_con;
3843 	}
3844 
3845 	amdgpu_fence_driver_hw_init(adev);
3846 
3847 	dev_info(adev->dev,
3848 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3849 			adev->gfx.config.max_shader_engines,
3850 			adev->gfx.config.max_sh_per_se,
3851 			adev->gfx.config.max_cu_per_sh,
3852 			adev->gfx.cu_info.number);
3853 
3854 	adev->accel_working = true;
3855 
3856 	amdgpu_vm_check_compute_bug(adev);
3857 
3858 	/* Initialize the buffer migration limit. */
3859 	if (amdgpu_moverate >= 0)
3860 		max_MBps = amdgpu_moverate;
3861 	else
3862 		max_MBps = 8; /* Allow 8 MB/s. */
3863 	/* Get a log2 for easy divisions. */
3864 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3865 
3866 	r = amdgpu_pm_sysfs_init(adev);
3867 	if (r) {
3868 		adev->pm_sysfs_en = false;
3869 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3870 	} else
3871 		adev->pm_sysfs_en = true;
3872 
3873 	r = amdgpu_ucode_sysfs_init(adev);
3874 	if (r) {
3875 		adev->ucode_sysfs_en = false;
3876 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3877 	} else
3878 		adev->ucode_sysfs_en = true;
3879 
3880 	r = amdgpu_psp_sysfs_init(adev);
3881 	if (r) {
3882 		adev->psp_sysfs_en = false;
3883 		if (!amdgpu_sriov_vf(adev))
3884 			DRM_ERROR("Creating psp sysfs failed\n");
3885 	} else
3886 		adev->psp_sysfs_en = true;
3887 
3888 	/*
3889 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3890 	 * Otherwise the mgpu fan boost feature will be skipped due to the
3891 	 * gpu instance is counted less.
3892 	 */
3893 	amdgpu_register_gpu_instance(adev);
3894 
3895 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
3896 	 * explicit gating rather than handling it automatically.
3897 	 */
3898 	if (!adev->gmc.xgmi.pending_reset) {
3899 		r = amdgpu_device_ip_late_init(adev);
3900 		if (r) {
3901 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3902 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3903 			goto release_ras_con;
3904 		}
3905 		/* must succeed. */
3906 		amdgpu_ras_resume(adev);
3907 		queue_delayed_work(system_wq, &adev->delayed_init_work,
3908 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
3909 	}
3910 
3911 	if (amdgpu_sriov_vf(adev))
3912 		flush_delayed_work(&adev->delayed_init_work);
3913 
3914 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3915 	if (r)
3916 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3917 
3918 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3919 		r = amdgpu_pmu_init(adev);
3920 	if (r)
3921 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3922 
3923 	/* Have stored pci confspace at hand for restore in sudden PCI error */
3924 	if (amdgpu_device_cache_pci_state(adev->pdev))
3925 		pci_restore_state(pdev);
3926 
3927 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3928 	/* this will fail for cards that aren't VGA class devices, just
3929 	 * ignore it */
3930 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3931 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
3932 
3933 	if (amdgpu_device_supports_px(ddev)) {
3934 		px = true;
3935 		vga_switcheroo_register_client(adev->pdev,
3936 					       &amdgpu_switcheroo_ops, px);
3937 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3938 	}
3939 
3940 	if (adev->gmc.xgmi.pending_reset)
3941 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3942 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
3943 
3944 	amdgpu_device_check_iommu_direct_map(adev);
3945 
3946 	return 0;
3947 
3948 release_ras_con:
3949 	amdgpu_release_ras_context(adev);
3950 
3951 failed:
3952 	amdgpu_vf_error_trans_all(adev);
3953 
3954 	return r;
3955 }
3956 
3957 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3958 {
3959 
3960 	/* Clear all CPU mappings pointing to this device */
3961 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3962 
3963 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
3964 	amdgpu_device_doorbell_fini(adev);
3965 
3966 	iounmap(adev->rmmio);
3967 	adev->rmmio = NULL;
3968 	if (adev->mman.aper_base_kaddr)
3969 		iounmap(adev->mman.aper_base_kaddr);
3970 	adev->mman.aper_base_kaddr = NULL;
3971 
3972 	/* Memory manager related */
3973 	if (!adev->gmc.xgmi.connected_to_cpu) {
3974 		arch_phys_wc_del(adev->gmc.vram_mtrr);
3975 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3976 	}
3977 }
3978 
3979 /**
3980  * amdgpu_device_fini_hw - tear down the driver
3981  *
3982  * @adev: amdgpu_device pointer
3983  *
3984  * Tear down the driver info (all asics).
3985  * Called at driver shutdown.
3986  */
3987 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
3988 {
3989 	dev_info(adev->dev, "amdgpu: finishing device.\n");
3990 	flush_delayed_work(&adev->delayed_init_work);
3991 	adev->shutdown = true;
3992 
3993 	/* make sure IB test finished before entering exclusive mode
3994 	 * to avoid preemption on IB test
3995 	 * */
3996 	if (amdgpu_sriov_vf(adev)) {
3997 		amdgpu_virt_request_full_gpu(adev, false);
3998 		amdgpu_virt_fini_data_exchange(adev);
3999 	}
4000 
4001 	/* disable all interrupts */
4002 	amdgpu_irq_disable_all(adev);
4003 	if (adev->mode_info.mode_config_initialized){
4004 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4005 			drm_helper_force_disable_all(adev_to_drm(adev));
4006 		else
4007 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4008 	}
4009 	amdgpu_fence_driver_hw_fini(adev);
4010 
4011 	if (adev->mman.initialized)
4012 		drain_workqueue(adev->mman.bdev.wq);
4013 
4014 	if (adev->pm_sysfs_en)
4015 		amdgpu_pm_sysfs_fini(adev);
4016 	if (adev->ucode_sysfs_en)
4017 		amdgpu_ucode_sysfs_fini(adev);
4018 	if (adev->psp_sysfs_en)
4019 		amdgpu_psp_sysfs_fini(adev);
4020 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4021 
4022 	/* disable ras feature must before hw fini */
4023 	amdgpu_ras_pre_fini(adev);
4024 
4025 	amdgpu_device_ip_fini_early(adev);
4026 
4027 	amdgpu_irq_fini_hw(adev);
4028 
4029 	if (adev->mman.initialized)
4030 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4031 
4032 	amdgpu_gart_dummy_page_fini(adev);
4033 
4034 	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4035 		amdgpu_device_unmap_mmio(adev);
4036 
4037 }
4038 
4039 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4040 {
4041 	int idx;
4042 
4043 	amdgpu_fence_driver_sw_fini(adev);
4044 	amdgpu_device_ip_fini(adev);
4045 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4046 	adev->accel_working = false;
4047 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4048 
4049 	amdgpu_reset_fini(adev);
4050 
4051 	/* free i2c buses */
4052 	if (!amdgpu_device_has_dc_support(adev))
4053 		amdgpu_i2c_fini(adev);
4054 
4055 	if (amdgpu_emu_mode != 1)
4056 		amdgpu_atombios_fini(adev);
4057 
4058 	kfree(adev->bios);
4059 	adev->bios = NULL;
4060 	if (amdgpu_device_supports_px(adev_to_drm(adev))) {
4061 		vga_switcheroo_unregister_client(adev->pdev);
4062 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4063 	}
4064 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4065 		vga_client_unregister(adev->pdev);
4066 
4067 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4068 
4069 		iounmap(adev->rmmio);
4070 		adev->rmmio = NULL;
4071 		amdgpu_device_doorbell_fini(adev);
4072 		drm_dev_exit(idx);
4073 	}
4074 
4075 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4076 		amdgpu_pmu_fini(adev);
4077 	if (adev->mman.discovery_bin)
4078 		amdgpu_discovery_fini(adev);
4079 
4080 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4081 	adev->reset_domain = NULL;
4082 
4083 	kfree(adev->pci_state);
4084 
4085 }
4086 
4087 /**
4088  * amdgpu_device_evict_resources - evict device resources
4089  * @adev: amdgpu device object
4090  *
4091  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4092  * of the vram memory type. Mainly used for evicting device resources
4093  * at suspend time.
4094  *
4095  */
4096 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4097 {
4098 	int ret;
4099 
4100 	/* No need to evict vram on APUs for suspend to ram or s2idle */
4101 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4102 		return 0;
4103 
4104 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4105 	if (ret)
4106 		DRM_WARN("evicting device resources failed\n");
4107 	return ret;
4108 }
4109 
4110 /*
4111  * Suspend & resume.
4112  */
4113 /**
4114  * amdgpu_device_suspend - initiate device suspend
4115  *
4116  * @dev: drm dev pointer
4117  * @fbcon : notify the fbdev of suspend
4118  *
4119  * Puts the hw in the suspend state (all asics).
4120  * Returns 0 for success or an error on failure.
4121  * Called at driver suspend.
4122  */
4123 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4124 {
4125 	struct amdgpu_device *adev = drm_to_adev(dev);
4126 	int r = 0;
4127 
4128 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4129 		return 0;
4130 
4131 	adev->in_suspend = true;
4132 
4133 	/* Evict the majority of BOs before grabbing the full access */
4134 	r = amdgpu_device_evict_resources(adev);
4135 	if (r)
4136 		return r;
4137 
4138 	if (amdgpu_sriov_vf(adev)) {
4139 		amdgpu_virt_fini_data_exchange(adev);
4140 		r = amdgpu_virt_request_full_gpu(adev, false);
4141 		if (r)
4142 			return r;
4143 	}
4144 
4145 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4146 		DRM_WARN("smart shift update failed\n");
4147 
4148 	drm_kms_helper_poll_disable(dev);
4149 
4150 	if (fbcon)
4151 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4152 
4153 	cancel_delayed_work_sync(&adev->delayed_init_work);
4154 
4155 	amdgpu_ras_suspend(adev);
4156 
4157 	amdgpu_device_ip_suspend_phase1(adev);
4158 
4159 	if (!adev->in_s0ix)
4160 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4161 
4162 	r = amdgpu_device_evict_resources(adev);
4163 	if (r)
4164 		return r;
4165 
4166 	amdgpu_fence_driver_hw_fini(adev);
4167 
4168 	amdgpu_device_ip_suspend_phase2(adev);
4169 
4170 	if (amdgpu_sriov_vf(adev))
4171 		amdgpu_virt_release_full_gpu(adev, false);
4172 
4173 	return 0;
4174 }
4175 
4176 /**
4177  * amdgpu_device_resume - initiate device resume
4178  *
4179  * @dev: drm dev pointer
4180  * @fbcon : notify the fbdev of resume
4181  *
4182  * Bring the hw back to operating state (all asics).
4183  * Returns 0 for success or an error on failure.
4184  * Called at driver resume.
4185  */
4186 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4187 {
4188 	struct amdgpu_device *adev = drm_to_adev(dev);
4189 	int r = 0;
4190 
4191 	if (amdgpu_sriov_vf(adev)) {
4192 		r = amdgpu_virt_request_full_gpu(adev, true);
4193 		if (r)
4194 			return r;
4195 	}
4196 
4197 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4198 		return 0;
4199 
4200 	if (adev->in_s0ix)
4201 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4202 
4203 	/* post card */
4204 	if (amdgpu_device_need_post(adev)) {
4205 		r = amdgpu_device_asic_init(adev);
4206 		if (r)
4207 			dev_err(adev->dev, "amdgpu asic init failed\n");
4208 	}
4209 
4210 	r = amdgpu_device_ip_resume(adev);
4211 
4212 	if (r) {
4213 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4214 		goto exit;
4215 	}
4216 	amdgpu_fence_driver_hw_init(adev);
4217 
4218 	r = amdgpu_device_ip_late_init(adev);
4219 	if (r)
4220 		goto exit;
4221 
4222 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4223 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4224 
4225 	if (!adev->in_s0ix) {
4226 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4227 		if (r)
4228 			goto exit;
4229 	}
4230 
4231 exit:
4232 	if (amdgpu_sriov_vf(adev)) {
4233 		amdgpu_virt_init_data_exchange(adev);
4234 		amdgpu_virt_release_full_gpu(adev, true);
4235 	}
4236 
4237 	if (r)
4238 		return r;
4239 
4240 	/* Make sure IB tests flushed */
4241 	flush_delayed_work(&adev->delayed_init_work);
4242 
4243 	if (fbcon)
4244 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4245 
4246 	drm_kms_helper_poll_enable(dev);
4247 
4248 	amdgpu_ras_resume(adev);
4249 
4250 	if (adev->mode_info.num_crtc) {
4251 		/*
4252 		 * Most of the connector probing functions try to acquire runtime pm
4253 		 * refs to ensure that the GPU is powered on when connector polling is
4254 		 * performed. Since we're calling this from a runtime PM callback,
4255 		 * trying to acquire rpm refs will cause us to deadlock.
4256 		 *
4257 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4258 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4259 		 */
4260 #ifdef CONFIG_PM
4261 		dev->dev->power.disable_depth++;
4262 #endif
4263 		if (!adev->dc_enabled)
4264 			drm_helper_hpd_irq_event(dev);
4265 		else
4266 			drm_kms_helper_hotplug_event(dev);
4267 #ifdef CONFIG_PM
4268 		dev->dev->power.disable_depth--;
4269 #endif
4270 	}
4271 	adev->in_suspend = false;
4272 
4273 	if (adev->enable_mes)
4274 		amdgpu_mes_self_test(adev);
4275 
4276 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4277 		DRM_WARN("smart shift update failed\n");
4278 
4279 	return 0;
4280 }
4281 
4282 /**
4283  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4284  *
4285  * @adev: amdgpu_device pointer
4286  *
4287  * The list of all the hardware IPs that make up the asic is walked and
4288  * the check_soft_reset callbacks are run.  check_soft_reset determines
4289  * if the asic is still hung or not.
4290  * Returns true if any of the IPs are still in a hung state, false if not.
4291  */
4292 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4293 {
4294 	int i;
4295 	bool asic_hang = false;
4296 
4297 	if (amdgpu_sriov_vf(adev))
4298 		return true;
4299 
4300 	if (amdgpu_asic_need_full_reset(adev))
4301 		return true;
4302 
4303 	for (i = 0; i < adev->num_ip_blocks; i++) {
4304 		if (!adev->ip_blocks[i].status.valid)
4305 			continue;
4306 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4307 			adev->ip_blocks[i].status.hang =
4308 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4309 		if (adev->ip_blocks[i].status.hang) {
4310 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4311 			asic_hang = true;
4312 		}
4313 	}
4314 	return asic_hang;
4315 }
4316 
4317 /**
4318  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4319  *
4320  * @adev: amdgpu_device pointer
4321  *
4322  * The list of all the hardware IPs that make up the asic is walked and the
4323  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4324  * handles any IP specific hardware or software state changes that are
4325  * necessary for a soft reset to succeed.
4326  * Returns 0 on success, negative error code on failure.
4327  */
4328 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4329 {
4330 	int i, r = 0;
4331 
4332 	for (i = 0; i < adev->num_ip_blocks; i++) {
4333 		if (!adev->ip_blocks[i].status.valid)
4334 			continue;
4335 		if (adev->ip_blocks[i].status.hang &&
4336 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4337 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4338 			if (r)
4339 				return r;
4340 		}
4341 	}
4342 
4343 	return 0;
4344 }
4345 
4346 /**
4347  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4348  *
4349  * @adev: amdgpu_device pointer
4350  *
4351  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4352  * reset is necessary to recover.
4353  * Returns true if a full asic reset is required, false if not.
4354  */
4355 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4356 {
4357 	int i;
4358 
4359 	if (amdgpu_asic_need_full_reset(adev))
4360 		return true;
4361 
4362 	for (i = 0; i < adev->num_ip_blocks; i++) {
4363 		if (!adev->ip_blocks[i].status.valid)
4364 			continue;
4365 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4366 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4367 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4368 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4369 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4370 			if (adev->ip_blocks[i].status.hang) {
4371 				dev_info(adev->dev, "Some block need full reset!\n");
4372 				return true;
4373 			}
4374 		}
4375 	}
4376 	return false;
4377 }
4378 
4379 /**
4380  * amdgpu_device_ip_soft_reset - do a soft reset
4381  *
4382  * @adev: amdgpu_device pointer
4383  *
4384  * The list of all the hardware IPs that make up the asic is walked and the
4385  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4386  * IP specific hardware or software state changes that are necessary to soft
4387  * reset the IP.
4388  * Returns 0 on success, negative error code on failure.
4389  */
4390 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4391 {
4392 	int i, r = 0;
4393 
4394 	for (i = 0; i < adev->num_ip_blocks; i++) {
4395 		if (!adev->ip_blocks[i].status.valid)
4396 			continue;
4397 		if (adev->ip_blocks[i].status.hang &&
4398 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4399 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4400 			if (r)
4401 				return r;
4402 		}
4403 	}
4404 
4405 	return 0;
4406 }
4407 
4408 /**
4409  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4410  *
4411  * @adev: amdgpu_device pointer
4412  *
4413  * The list of all the hardware IPs that make up the asic is walked and the
4414  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4415  * handles any IP specific hardware or software state changes that are
4416  * necessary after the IP has been soft reset.
4417  * Returns 0 on success, negative error code on failure.
4418  */
4419 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4420 {
4421 	int i, r = 0;
4422 
4423 	for (i = 0; i < adev->num_ip_blocks; i++) {
4424 		if (!adev->ip_blocks[i].status.valid)
4425 			continue;
4426 		if (adev->ip_blocks[i].status.hang &&
4427 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4428 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4429 		if (r)
4430 			return r;
4431 	}
4432 
4433 	return 0;
4434 }
4435 
4436 /**
4437  * amdgpu_device_recover_vram - Recover some VRAM contents
4438  *
4439  * @adev: amdgpu_device pointer
4440  *
4441  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4442  * restore things like GPUVM page tables after a GPU reset where
4443  * the contents of VRAM might be lost.
4444  *
4445  * Returns:
4446  * 0 on success, negative error code on failure.
4447  */
4448 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4449 {
4450 	struct dma_fence *fence = NULL, *next = NULL;
4451 	struct amdgpu_bo *shadow;
4452 	struct amdgpu_bo_vm *vmbo;
4453 	long r = 1, tmo;
4454 
4455 	if (amdgpu_sriov_runtime(adev))
4456 		tmo = msecs_to_jiffies(8000);
4457 	else
4458 		tmo = msecs_to_jiffies(100);
4459 
4460 	dev_info(adev->dev, "recover vram bo from shadow start\n");
4461 	mutex_lock(&adev->shadow_list_lock);
4462 	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4463 		shadow = &vmbo->bo;
4464 		/* No need to recover an evicted BO */
4465 		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4466 		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4467 		    shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4468 			continue;
4469 
4470 		r = amdgpu_bo_restore_shadow(shadow, &next);
4471 		if (r)
4472 			break;
4473 
4474 		if (fence) {
4475 			tmo = dma_fence_wait_timeout(fence, false, tmo);
4476 			dma_fence_put(fence);
4477 			fence = next;
4478 			if (tmo == 0) {
4479 				r = -ETIMEDOUT;
4480 				break;
4481 			} else if (tmo < 0) {
4482 				r = tmo;
4483 				break;
4484 			}
4485 		} else {
4486 			fence = next;
4487 		}
4488 	}
4489 	mutex_unlock(&adev->shadow_list_lock);
4490 
4491 	if (fence)
4492 		tmo = dma_fence_wait_timeout(fence, false, tmo);
4493 	dma_fence_put(fence);
4494 
4495 	if (r < 0 || tmo <= 0) {
4496 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4497 		return -EIO;
4498 	}
4499 
4500 	dev_info(adev->dev, "recover vram bo from shadow done\n");
4501 	return 0;
4502 }
4503 
4504 
4505 /**
4506  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4507  *
4508  * @adev: amdgpu_device pointer
4509  * @from_hypervisor: request from hypervisor
4510  *
4511  * do VF FLR and reinitialize Asic
4512  * return 0 means succeeded otherwise failed
4513  */
4514 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4515 				     bool from_hypervisor)
4516 {
4517 	int r;
4518 	struct amdgpu_hive_info *hive = NULL;
4519 	int retry_limit = 0;
4520 
4521 retry:
4522 	amdgpu_amdkfd_pre_reset(adev);
4523 
4524 	if (from_hypervisor)
4525 		r = amdgpu_virt_request_full_gpu(adev, true);
4526 	else
4527 		r = amdgpu_virt_reset_gpu(adev);
4528 	if (r)
4529 		return r;
4530 
4531 	/* Resume IP prior to SMC */
4532 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4533 	if (r)
4534 		goto error;
4535 
4536 	amdgpu_virt_init_data_exchange(adev);
4537 
4538 	r = amdgpu_device_fw_loading(adev);
4539 	if (r)
4540 		return r;
4541 
4542 	/* now we are okay to resume SMC/CP/SDMA */
4543 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4544 	if (r)
4545 		goto error;
4546 
4547 	hive = amdgpu_get_xgmi_hive(adev);
4548 	/* Update PSP FW topology after reset */
4549 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4550 		r = amdgpu_xgmi_update_topology(hive, adev);
4551 
4552 	if (hive)
4553 		amdgpu_put_xgmi_hive(hive);
4554 
4555 	if (!r) {
4556 		amdgpu_irq_gpu_reset_resume_helper(adev);
4557 		r = amdgpu_ib_ring_tests(adev);
4558 
4559 		amdgpu_amdkfd_post_reset(adev);
4560 	}
4561 
4562 error:
4563 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
4564 		amdgpu_inc_vram_lost(adev);
4565 		r = amdgpu_device_recover_vram(adev);
4566 	}
4567 	amdgpu_virt_release_full_gpu(adev, true);
4568 
4569 	if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4570 		if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4571 			retry_limit++;
4572 			goto retry;
4573 		} else
4574 			DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4575 	}
4576 
4577 	return r;
4578 }
4579 
4580 /**
4581  * amdgpu_device_has_job_running - check if there is any job in mirror list
4582  *
4583  * @adev: amdgpu_device pointer
4584  *
4585  * check if there is any job in mirror list
4586  */
4587 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4588 {
4589 	int i;
4590 	struct drm_sched_job *job;
4591 
4592 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4593 		struct amdgpu_ring *ring = adev->rings[i];
4594 
4595 		if (!ring || !ring->sched.thread)
4596 			continue;
4597 
4598 		spin_lock(&ring->sched.job_list_lock);
4599 		job = list_first_entry_or_null(&ring->sched.pending_list,
4600 					       struct drm_sched_job, list);
4601 		spin_unlock(&ring->sched.job_list_lock);
4602 		if (job)
4603 			return true;
4604 	}
4605 	return false;
4606 }
4607 
4608 /**
4609  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4610  *
4611  * @adev: amdgpu_device pointer
4612  *
4613  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4614  * a hung GPU.
4615  */
4616 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4617 {
4618 
4619 	if (amdgpu_gpu_recovery == 0)
4620 		goto disabled;
4621 
4622 	/* Skip soft reset check in fatal error mode */
4623 	if (!amdgpu_ras_is_poison_mode_supported(adev))
4624 		return true;
4625 
4626 	if (amdgpu_sriov_vf(adev))
4627 		return true;
4628 
4629 	if (amdgpu_gpu_recovery == -1) {
4630 		switch (adev->asic_type) {
4631 #ifdef CONFIG_DRM_AMDGPU_SI
4632 		case CHIP_VERDE:
4633 		case CHIP_TAHITI:
4634 		case CHIP_PITCAIRN:
4635 		case CHIP_OLAND:
4636 		case CHIP_HAINAN:
4637 #endif
4638 #ifdef CONFIG_DRM_AMDGPU_CIK
4639 		case CHIP_KAVERI:
4640 		case CHIP_KABINI:
4641 		case CHIP_MULLINS:
4642 #endif
4643 		case CHIP_CARRIZO:
4644 		case CHIP_STONEY:
4645 		case CHIP_CYAN_SKILLFISH:
4646 			goto disabled;
4647 		default:
4648 			break;
4649 		}
4650 	}
4651 
4652 	return true;
4653 
4654 disabled:
4655 		dev_info(adev->dev, "GPU recovery disabled.\n");
4656 		return false;
4657 }
4658 
4659 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4660 {
4661         u32 i;
4662         int ret = 0;
4663 
4664         amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4665 
4666         dev_info(adev->dev, "GPU mode1 reset\n");
4667 
4668         /* disable BM */
4669         pci_clear_master(adev->pdev);
4670 
4671         amdgpu_device_cache_pci_state(adev->pdev);
4672 
4673         if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4674                 dev_info(adev->dev, "GPU smu mode1 reset\n");
4675                 ret = amdgpu_dpm_mode1_reset(adev);
4676         } else {
4677                 dev_info(adev->dev, "GPU psp mode1 reset\n");
4678                 ret = psp_gpu_reset(adev);
4679         }
4680 
4681         if (ret)
4682                 dev_err(adev->dev, "GPU mode1 reset failed\n");
4683 
4684         amdgpu_device_load_pci_state(adev->pdev);
4685 
4686         /* wait for asic to come out of reset */
4687         for (i = 0; i < adev->usec_timeout; i++) {
4688                 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4689 
4690                 if (memsize != 0xffffffff)
4691                         break;
4692                 udelay(1);
4693         }
4694 
4695         amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4696         return ret;
4697 }
4698 
4699 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4700 				 struct amdgpu_reset_context *reset_context)
4701 {
4702 	int i, r = 0;
4703 	struct amdgpu_job *job = NULL;
4704 	bool need_full_reset =
4705 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4706 
4707 	if (reset_context->reset_req_dev == adev)
4708 		job = reset_context->job;
4709 
4710 	if (amdgpu_sriov_vf(adev)) {
4711 		/* stop the data exchange thread */
4712 		amdgpu_virt_fini_data_exchange(adev);
4713 	}
4714 
4715 	amdgpu_fence_driver_isr_toggle(adev, true);
4716 
4717 	/* block all schedulers and reset given job's ring */
4718 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4719 		struct amdgpu_ring *ring = adev->rings[i];
4720 
4721 		if (!ring || !ring->sched.thread)
4722 			continue;
4723 
4724 		/*clear job fence from fence drv to avoid force_completion
4725 		 *leave NULL and vm flush fence in fence drv */
4726 		amdgpu_fence_driver_clear_job_fences(ring);
4727 
4728 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4729 		amdgpu_fence_driver_force_completion(ring);
4730 	}
4731 
4732 	amdgpu_fence_driver_isr_toggle(adev, false);
4733 
4734 	if (job && job->vm)
4735 		drm_sched_increase_karma(&job->base);
4736 
4737 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
4738 	/* If reset handler not implemented, continue; otherwise return */
4739 	if (r == -ENOSYS)
4740 		r = 0;
4741 	else
4742 		return r;
4743 
4744 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
4745 	if (!amdgpu_sriov_vf(adev)) {
4746 
4747 		if (!need_full_reset)
4748 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4749 
4750 		if (!need_full_reset && amdgpu_gpu_recovery &&
4751 		    amdgpu_device_ip_check_soft_reset(adev)) {
4752 			amdgpu_device_ip_pre_soft_reset(adev);
4753 			r = amdgpu_device_ip_soft_reset(adev);
4754 			amdgpu_device_ip_post_soft_reset(adev);
4755 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4756 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
4757 				need_full_reset = true;
4758 			}
4759 		}
4760 
4761 		if (need_full_reset)
4762 			r = amdgpu_device_ip_suspend(adev);
4763 		if (need_full_reset)
4764 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4765 		else
4766 			clear_bit(AMDGPU_NEED_FULL_RESET,
4767 				  &reset_context->flags);
4768 	}
4769 
4770 	return r;
4771 }
4772 
4773 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4774 {
4775 	int i;
4776 
4777 	lockdep_assert_held(&adev->reset_domain->sem);
4778 
4779 	for (i = 0; i < adev->num_regs; i++) {
4780 		adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4781 		trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4782 					     adev->reset_dump_reg_value[i]);
4783 	}
4784 
4785 	return 0;
4786 }
4787 
4788 #ifdef CONFIG_DEV_COREDUMP
4789 static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4790 		size_t count, void *data, size_t datalen)
4791 {
4792 	struct drm_printer p;
4793 	struct amdgpu_device *adev = data;
4794 	struct drm_print_iterator iter;
4795 	int i;
4796 
4797 	iter.data = buffer;
4798 	iter.offset = 0;
4799 	iter.start = offset;
4800 	iter.remain = count;
4801 
4802 	p = drm_coredump_printer(&iter);
4803 
4804 	drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4805 	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4806 	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4807 	drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4808 	if (adev->reset_task_info.pid)
4809 		drm_printf(&p, "process_name: %s PID: %d\n",
4810 			   adev->reset_task_info.process_name,
4811 			   adev->reset_task_info.pid);
4812 
4813 	if (adev->reset_vram_lost)
4814 		drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4815 	if (adev->num_regs) {
4816 		drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n");
4817 
4818 		for (i = 0; i < adev->num_regs; i++)
4819 			drm_printf(&p, "0x%08x: 0x%08x\n",
4820 				   adev->reset_dump_reg_list[i],
4821 				   adev->reset_dump_reg_value[i]);
4822 	}
4823 
4824 	return count - iter.remain;
4825 }
4826 
4827 static void amdgpu_devcoredump_free(void *data)
4828 {
4829 }
4830 
4831 static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4832 {
4833 	struct drm_device *dev = adev_to_drm(adev);
4834 
4835 	ktime_get_ts64(&adev->reset_time);
4836 	dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4837 		      amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4838 }
4839 #endif
4840 
4841 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4842 			 struct amdgpu_reset_context *reset_context)
4843 {
4844 	struct amdgpu_device *tmp_adev = NULL;
4845 	bool need_full_reset, skip_hw_reset, vram_lost = false;
4846 	int r = 0;
4847 	bool gpu_reset_for_dev_remove = 0;
4848 
4849 	/* Try reset handler method first */
4850 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4851 				    reset_list);
4852 	amdgpu_reset_reg_dumps(tmp_adev);
4853 
4854 	reset_context->reset_device_list = device_list_handle;
4855 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
4856 	/* If reset handler not implemented, continue; otherwise return */
4857 	if (r == -ENOSYS)
4858 		r = 0;
4859 	else
4860 		return r;
4861 
4862 	/* Reset handler not implemented, use the default method */
4863 	need_full_reset =
4864 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4865 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4866 
4867 	gpu_reset_for_dev_remove =
4868 		test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4869 			test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4870 
4871 	/*
4872 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
4873 	 * to allow proper links negotiation in FW (within 1 sec)
4874 	 */
4875 	if (!skip_hw_reset && need_full_reset) {
4876 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4877 			/* For XGMI run all resets in parallel to speed up the process */
4878 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4879 				tmp_adev->gmc.xgmi.pending_reset = false;
4880 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4881 					r = -EALREADY;
4882 			} else
4883 				r = amdgpu_asic_reset(tmp_adev);
4884 
4885 			if (r) {
4886 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4887 					 r, adev_to_drm(tmp_adev)->unique);
4888 				break;
4889 			}
4890 		}
4891 
4892 		/* For XGMI wait for all resets to complete before proceed */
4893 		if (!r) {
4894 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4895 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4896 					flush_work(&tmp_adev->xgmi_reset_work);
4897 					r = tmp_adev->asic_reset_res;
4898 					if (r)
4899 						break;
4900 				}
4901 			}
4902 		}
4903 	}
4904 
4905 	if (!r && amdgpu_ras_intr_triggered()) {
4906 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4907 			if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4908 			    tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4909 				tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
4910 		}
4911 
4912 		amdgpu_ras_intr_cleared();
4913 	}
4914 
4915 	/* Since the mode1 reset affects base ip blocks, the
4916 	 * phase1 ip blocks need to be resumed. Otherwise there
4917 	 * will be a BIOS signature error and the psp bootloader
4918 	 * can't load kdb on the next amdgpu install.
4919 	 */
4920 	if (gpu_reset_for_dev_remove) {
4921 		list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4922 			amdgpu_device_ip_resume_phase1(tmp_adev);
4923 
4924 		goto end;
4925 	}
4926 
4927 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4928 		if (need_full_reset) {
4929 			/* post card */
4930 			r = amdgpu_device_asic_init(tmp_adev);
4931 			if (r) {
4932 				dev_warn(tmp_adev->dev, "asic atom init failed!");
4933 			} else {
4934 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4935 				r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4936 				if (r)
4937 					goto out;
4938 
4939 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
4940 				if (r)
4941 					goto out;
4942 
4943 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4944 #ifdef CONFIG_DEV_COREDUMP
4945 				tmp_adev->reset_vram_lost = vram_lost;
4946 				memset(&tmp_adev->reset_task_info, 0,
4947 						sizeof(tmp_adev->reset_task_info));
4948 				if (reset_context->job && reset_context->job->vm)
4949 					tmp_adev->reset_task_info =
4950 						reset_context->job->vm->task_info;
4951 				amdgpu_reset_capture_coredumpm(tmp_adev);
4952 #endif
4953 				if (vram_lost) {
4954 					DRM_INFO("VRAM is lost due to GPU reset!\n");
4955 					amdgpu_inc_vram_lost(tmp_adev);
4956 				}
4957 
4958 				r = amdgpu_device_fw_loading(tmp_adev);
4959 				if (r)
4960 					return r;
4961 
4962 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
4963 				if (r)
4964 					goto out;
4965 
4966 				if (vram_lost)
4967 					amdgpu_device_fill_reset_magic(tmp_adev);
4968 
4969 				/*
4970 				 * Add this ASIC as tracked as reset was already
4971 				 * complete successfully.
4972 				 */
4973 				amdgpu_register_gpu_instance(tmp_adev);
4974 
4975 				if (!reset_context->hive &&
4976 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4977 					amdgpu_xgmi_add_device(tmp_adev);
4978 
4979 				r = amdgpu_device_ip_late_init(tmp_adev);
4980 				if (r)
4981 					goto out;
4982 
4983 				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
4984 
4985 				/*
4986 				 * The GPU enters bad state once faulty pages
4987 				 * by ECC has reached the threshold, and ras
4988 				 * recovery is scheduled next. So add one check
4989 				 * here to break recovery if it indeed exceeds
4990 				 * bad page threshold, and remind user to
4991 				 * retire this GPU or setting one bigger
4992 				 * bad_page_threshold value to fix this once
4993 				 * probing driver again.
4994 				 */
4995 				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
4996 					/* must succeed. */
4997 					amdgpu_ras_resume(tmp_adev);
4998 				} else {
4999 					r = -EINVAL;
5000 					goto out;
5001 				}
5002 
5003 				/* Update PSP FW topology after reset */
5004 				if (reset_context->hive &&
5005 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5006 					r = amdgpu_xgmi_update_topology(
5007 						reset_context->hive, tmp_adev);
5008 			}
5009 		}
5010 
5011 out:
5012 		if (!r) {
5013 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5014 			r = amdgpu_ib_ring_tests(tmp_adev);
5015 			if (r) {
5016 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5017 				need_full_reset = true;
5018 				r = -EAGAIN;
5019 				goto end;
5020 			}
5021 		}
5022 
5023 		if (!r)
5024 			r = amdgpu_device_recover_vram(tmp_adev);
5025 		else
5026 			tmp_adev->asic_reset_res = r;
5027 	}
5028 
5029 end:
5030 	if (need_full_reset)
5031 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5032 	else
5033 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5034 	return r;
5035 }
5036 
5037 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5038 {
5039 
5040 	switch (amdgpu_asic_reset_method(adev)) {
5041 	case AMD_RESET_METHOD_MODE1:
5042 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5043 		break;
5044 	case AMD_RESET_METHOD_MODE2:
5045 		adev->mp1_state = PP_MP1_STATE_RESET;
5046 		break;
5047 	default:
5048 		adev->mp1_state = PP_MP1_STATE_NONE;
5049 		break;
5050 	}
5051 }
5052 
5053 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5054 {
5055 	amdgpu_vf_error_trans_all(adev);
5056 	adev->mp1_state = PP_MP1_STATE_NONE;
5057 }
5058 
5059 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5060 {
5061 	struct pci_dev *p = NULL;
5062 
5063 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5064 			adev->pdev->bus->number, 1);
5065 	if (p) {
5066 		pm_runtime_enable(&(p->dev));
5067 		pm_runtime_resume(&(p->dev));
5068 	}
5069 
5070 	pci_dev_put(p);
5071 }
5072 
5073 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5074 {
5075 	enum amd_reset_method reset_method;
5076 	struct pci_dev *p = NULL;
5077 	u64 expires;
5078 
5079 	/*
5080 	 * For now, only BACO and mode1 reset are confirmed
5081 	 * to suffer the audio issue without proper suspended.
5082 	 */
5083 	reset_method = amdgpu_asic_reset_method(adev);
5084 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5085 	     (reset_method != AMD_RESET_METHOD_MODE1))
5086 		return -EINVAL;
5087 
5088 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5089 			adev->pdev->bus->number, 1);
5090 	if (!p)
5091 		return -ENODEV;
5092 
5093 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5094 	if (!expires)
5095 		/*
5096 		 * If we cannot get the audio device autosuspend delay,
5097 		 * a fixed 4S interval will be used. Considering 3S is
5098 		 * the audio controller default autosuspend delay setting.
5099 		 * 4S used here is guaranteed to cover that.
5100 		 */
5101 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5102 
5103 	while (!pm_runtime_status_suspended(&(p->dev))) {
5104 		if (!pm_runtime_suspend(&(p->dev)))
5105 			break;
5106 
5107 		if (expires < ktime_get_mono_fast_ns()) {
5108 			dev_warn(adev->dev, "failed to suspend display audio\n");
5109 			pci_dev_put(p);
5110 			/* TODO: abort the succeeding gpu reset? */
5111 			return -ETIMEDOUT;
5112 		}
5113 	}
5114 
5115 	pm_runtime_disable(&(p->dev));
5116 
5117 	pci_dev_put(p);
5118 	return 0;
5119 }
5120 
5121 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5122 {
5123 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5124 
5125 #if defined(CONFIG_DEBUG_FS)
5126 	if (!amdgpu_sriov_vf(adev))
5127 		cancel_work(&adev->reset_work);
5128 #endif
5129 
5130 	if (adev->kfd.dev)
5131 		cancel_work(&adev->kfd.reset_work);
5132 
5133 	if (amdgpu_sriov_vf(adev))
5134 		cancel_work(&adev->virt.flr_work);
5135 
5136 	if (con && adev->ras_enabled)
5137 		cancel_work(&con->recovery_work);
5138 
5139 }
5140 
5141 /**
5142  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5143  *
5144  * @adev: amdgpu_device pointer
5145  * @job: which job trigger hang
5146  *
5147  * Attempt to reset the GPU if it has hung (all asics).
5148  * Attempt to do soft-reset or full-reset and reinitialize Asic
5149  * Returns 0 for success or an error on failure.
5150  */
5151 
5152 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5153 			      struct amdgpu_job *job,
5154 			      struct amdgpu_reset_context *reset_context)
5155 {
5156 	struct list_head device_list, *device_list_handle =  NULL;
5157 	bool job_signaled = false;
5158 	struct amdgpu_hive_info *hive = NULL;
5159 	struct amdgpu_device *tmp_adev = NULL;
5160 	int i, r = 0;
5161 	bool need_emergency_restart = false;
5162 	bool audio_suspended = false;
5163 	bool gpu_reset_for_dev_remove = false;
5164 
5165 	gpu_reset_for_dev_remove =
5166 			test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5167 				test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5168 
5169 	/*
5170 	 * Special case: RAS triggered and full reset isn't supported
5171 	 */
5172 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5173 
5174 	/*
5175 	 * Flush RAM to disk so that after reboot
5176 	 * the user can read log and see why the system rebooted.
5177 	 */
5178 	if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
5179 		DRM_WARN("Emergency reboot.");
5180 
5181 		ksys_sync_helper();
5182 		emergency_restart();
5183 	}
5184 
5185 	dev_info(adev->dev, "GPU %s begin!\n",
5186 		need_emergency_restart ? "jobs stop":"reset");
5187 
5188 	if (!amdgpu_sriov_vf(adev))
5189 		hive = amdgpu_get_xgmi_hive(adev);
5190 	if (hive)
5191 		mutex_lock(&hive->hive_lock);
5192 
5193 	reset_context->job = job;
5194 	reset_context->hive = hive;
5195 	/*
5196 	 * Build list of devices to reset.
5197 	 * In case we are in XGMI hive mode, resort the device list
5198 	 * to put adev in the 1st position.
5199 	 */
5200 	INIT_LIST_HEAD(&device_list);
5201 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5202 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5203 			list_add_tail(&tmp_adev->reset_list, &device_list);
5204 			if (gpu_reset_for_dev_remove && adev->shutdown)
5205 				tmp_adev->shutdown = true;
5206 		}
5207 		if (!list_is_first(&adev->reset_list, &device_list))
5208 			list_rotate_to_front(&adev->reset_list, &device_list);
5209 		device_list_handle = &device_list;
5210 	} else {
5211 		list_add_tail(&adev->reset_list, &device_list);
5212 		device_list_handle = &device_list;
5213 	}
5214 
5215 	/* We need to lock reset domain only once both for XGMI and single device */
5216 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5217 				    reset_list);
5218 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5219 
5220 	/* block all schedulers and reset given job's ring */
5221 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5222 
5223 		amdgpu_device_set_mp1_state(tmp_adev);
5224 
5225 		/*
5226 		 * Try to put the audio codec into suspend state
5227 		 * before gpu reset started.
5228 		 *
5229 		 * Due to the power domain of the graphics device
5230 		 * is shared with AZ power domain. Without this,
5231 		 * we may change the audio hardware from behind
5232 		 * the audio driver's back. That will trigger
5233 		 * some audio codec errors.
5234 		 */
5235 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5236 			audio_suspended = true;
5237 
5238 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5239 
5240 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5241 
5242 		if (!amdgpu_sriov_vf(tmp_adev))
5243 			amdgpu_amdkfd_pre_reset(tmp_adev);
5244 
5245 		/*
5246 		 * Mark these ASICs to be reseted as untracked first
5247 		 * And add them back after reset completed
5248 		 */
5249 		amdgpu_unregister_gpu_instance(tmp_adev);
5250 
5251 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5252 
5253 		/* disable ras on ALL IPs */
5254 		if (!need_emergency_restart &&
5255 		      amdgpu_device_ip_need_full_reset(tmp_adev))
5256 			amdgpu_ras_suspend(tmp_adev);
5257 
5258 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5259 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5260 
5261 			if (!ring || !ring->sched.thread)
5262 				continue;
5263 
5264 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5265 
5266 			if (need_emergency_restart)
5267 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5268 		}
5269 		atomic_inc(&tmp_adev->gpu_reset_counter);
5270 	}
5271 
5272 	if (need_emergency_restart)
5273 		goto skip_sched_resume;
5274 
5275 	/*
5276 	 * Must check guilty signal here since after this point all old
5277 	 * HW fences are force signaled.
5278 	 *
5279 	 * job->base holds a reference to parent fence
5280 	 */
5281 	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5282 		job_signaled = true;
5283 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5284 		goto skip_hw_reset;
5285 	}
5286 
5287 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5288 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5289 		if (gpu_reset_for_dev_remove) {
5290 			/* Workaroud for ASICs need to disable SMC first */
5291 			amdgpu_device_smu_fini_early(tmp_adev);
5292 		}
5293 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5294 		/*TODO Should we stop ?*/
5295 		if (r) {
5296 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5297 				  r, adev_to_drm(tmp_adev)->unique);
5298 			tmp_adev->asic_reset_res = r;
5299 		}
5300 
5301 		/*
5302 		 * Drop all pending non scheduler resets. Scheduler resets
5303 		 * were already dropped during drm_sched_stop
5304 		 */
5305 		amdgpu_device_stop_pending_resets(tmp_adev);
5306 	}
5307 
5308 	/* Actual ASIC resets if needed.*/
5309 	/* Host driver will handle XGMI hive reset for SRIOV */
5310 	if (amdgpu_sriov_vf(adev)) {
5311 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
5312 		if (r)
5313 			adev->asic_reset_res = r;
5314 
5315 		/* Aldebaran supports ras in SRIOV, so need resume ras during reset */
5316 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5317 			amdgpu_ras_resume(adev);
5318 	} else {
5319 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5320 		if (r && r == -EAGAIN)
5321 			goto retry;
5322 
5323 		if (!r && gpu_reset_for_dev_remove)
5324 			goto recover_end;
5325 	}
5326 
5327 skip_hw_reset:
5328 
5329 	/* Post ASIC reset for all devs .*/
5330 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5331 
5332 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5333 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5334 
5335 			if (!ring || !ring->sched.thread)
5336 				continue;
5337 
5338 			drm_sched_start(&ring->sched, true);
5339 		}
5340 
5341 		if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
5342 			amdgpu_mes_self_test(tmp_adev);
5343 
5344 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
5345 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5346 		}
5347 
5348 		if (tmp_adev->asic_reset_res)
5349 			r = tmp_adev->asic_reset_res;
5350 
5351 		tmp_adev->asic_reset_res = 0;
5352 
5353 		if (r) {
5354 			/* bad news, how to tell it to userspace ? */
5355 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5356 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5357 		} else {
5358 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5359 			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5360 				DRM_WARN("smart shift update failed\n");
5361 		}
5362 	}
5363 
5364 skip_sched_resume:
5365 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5366 		/* unlock kfd: SRIOV would do it separately */
5367 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5368 			amdgpu_amdkfd_post_reset(tmp_adev);
5369 
5370 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5371 		 * need to bring up kfd here if it's not be initialized before
5372 		 */
5373 		if (!adev->kfd.init_complete)
5374 			amdgpu_amdkfd_device_init(adev);
5375 
5376 		if (audio_suspended)
5377 			amdgpu_device_resume_display_audio(tmp_adev);
5378 
5379 		amdgpu_device_unset_mp1_state(tmp_adev);
5380 
5381 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5382 	}
5383 
5384 recover_end:
5385 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5386 					    reset_list);
5387 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5388 
5389 	if (hive) {
5390 		mutex_unlock(&hive->hive_lock);
5391 		amdgpu_put_xgmi_hive(hive);
5392 	}
5393 
5394 	if (r)
5395 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5396 
5397 	atomic_set(&adev->reset_domain->reset_res, r);
5398 	return r;
5399 }
5400 
5401 /**
5402  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5403  *
5404  * @adev: amdgpu_device pointer
5405  *
5406  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5407  * and lanes) of the slot the device is in. Handles APUs and
5408  * virtualized environments where PCIE config space may not be available.
5409  */
5410 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5411 {
5412 	struct pci_dev *pdev;
5413 	enum pci_bus_speed speed_cap, platform_speed_cap;
5414 	enum pcie_link_width platform_link_width;
5415 
5416 	if (amdgpu_pcie_gen_cap)
5417 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5418 
5419 	if (amdgpu_pcie_lane_cap)
5420 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5421 
5422 	/* covers APUs as well */
5423 	if (pci_is_root_bus(adev->pdev->bus)) {
5424 		if (adev->pm.pcie_gen_mask == 0)
5425 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5426 		if (adev->pm.pcie_mlw_mask == 0)
5427 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5428 		return;
5429 	}
5430 
5431 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5432 		return;
5433 
5434 	pcie_bandwidth_available(adev->pdev, NULL,
5435 				 &platform_speed_cap, &platform_link_width);
5436 
5437 	if (adev->pm.pcie_gen_mask == 0) {
5438 		/* asic caps */
5439 		pdev = adev->pdev;
5440 		speed_cap = pcie_get_speed_cap(pdev);
5441 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5442 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5443 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5444 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5445 		} else {
5446 			if (speed_cap == PCIE_SPEED_32_0GT)
5447 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5448 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5449 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5450 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5451 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5452 			else if (speed_cap == PCIE_SPEED_16_0GT)
5453 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5454 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5455 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5456 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5457 			else if (speed_cap == PCIE_SPEED_8_0GT)
5458 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5459 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5460 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5461 			else if (speed_cap == PCIE_SPEED_5_0GT)
5462 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5463 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5464 			else
5465 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5466 		}
5467 		/* platform caps */
5468 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5469 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5470 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5471 		} else {
5472 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
5473 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5474 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5475 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5476 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5477 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5478 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5479 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5480 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5481 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5482 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5483 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5484 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5485 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5486 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5487 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5488 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5489 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5490 			else
5491 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5492 
5493 		}
5494 	}
5495 	if (adev->pm.pcie_mlw_mask == 0) {
5496 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5497 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5498 		} else {
5499 			switch (platform_link_width) {
5500 			case PCIE_LNK_X32:
5501 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5502 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5503 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5504 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5505 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5506 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5507 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5508 				break;
5509 			case PCIE_LNK_X16:
5510 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5511 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5512 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5513 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5514 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5515 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5516 				break;
5517 			case PCIE_LNK_X12:
5518 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5519 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5520 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5521 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5522 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5523 				break;
5524 			case PCIE_LNK_X8:
5525 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5526 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5527 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5528 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5529 				break;
5530 			case PCIE_LNK_X4:
5531 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5532 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5533 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5534 				break;
5535 			case PCIE_LNK_X2:
5536 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5537 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5538 				break;
5539 			case PCIE_LNK_X1:
5540 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5541 				break;
5542 			default:
5543 				break;
5544 			}
5545 		}
5546 	}
5547 }
5548 
5549 /**
5550  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5551  *
5552  * @adev: amdgpu_device pointer
5553  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5554  *
5555  * Return true if @peer_adev can access (DMA) @adev through the PCIe
5556  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5557  * @peer_adev.
5558  */
5559 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5560 				      struct amdgpu_device *peer_adev)
5561 {
5562 #ifdef CONFIG_HSA_AMD_P2P
5563 	uint64_t address_mask = peer_adev->dev->dma_mask ?
5564 		~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5565 	resource_size_t aper_limit =
5566 		adev->gmc.aper_base + adev->gmc.aper_size - 1;
5567 	bool p2p_access =
5568 		!adev->gmc.xgmi.connected_to_cpu &&
5569 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
5570 
5571 	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5572 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5573 		!(adev->gmc.aper_base & address_mask ||
5574 		  aper_limit & address_mask));
5575 #else
5576 	return false;
5577 #endif
5578 }
5579 
5580 int amdgpu_device_baco_enter(struct drm_device *dev)
5581 {
5582 	struct amdgpu_device *adev = drm_to_adev(dev);
5583 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5584 
5585 	if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5586 		return -ENOTSUPP;
5587 
5588 	if (ras && adev->ras_enabled &&
5589 	    adev->nbio.funcs->enable_doorbell_interrupt)
5590 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5591 
5592 	return amdgpu_dpm_baco_enter(adev);
5593 }
5594 
5595 int amdgpu_device_baco_exit(struct drm_device *dev)
5596 {
5597 	struct amdgpu_device *adev = drm_to_adev(dev);
5598 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5599 	int ret = 0;
5600 
5601 	if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5602 		return -ENOTSUPP;
5603 
5604 	ret = amdgpu_dpm_baco_exit(adev);
5605 	if (ret)
5606 		return ret;
5607 
5608 	if (ras && adev->ras_enabled &&
5609 	    adev->nbio.funcs->enable_doorbell_interrupt)
5610 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5611 
5612 	if (amdgpu_passthrough(adev) &&
5613 	    adev->nbio.funcs->clear_doorbell_interrupt)
5614 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
5615 
5616 	return 0;
5617 }
5618 
5619 /**
5620  * amdgpu_pci_error_detected - Called when a PCI error is detected.
5621  * @pdev: PCI device struct
5622  * @state: PCI channel state
5623  *
5624  * Description: Called when a PCI error is detected.
5625  *
5626  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5627  */
5628 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5629 {
5630 	struct drm_device *dev = pci_get_drvdata(pdev);
5631 	struct amdgpu_device *adev = drm_to_adev(dev);
5632 	int i;
5633 
5634 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5635 
5636 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
5637 		DRM_WARN("No support for XGMI hive yet...");
5638 		return PCI_ERS_RESULT_DISCONNECT;
5639 	}
5640 
5641 	adev->pci_channel_state = state;
5642 
5643 	switch (state) {
5644 	case pci_channel_io_normal:
5645 		return PCI_ERS_RESULT_CAN_RECOVER;
5646 	/* Fatal error, prepare for slot reset */
5647 	case pci_channel_io_frozen:
5648 		/*
5649 		 * Locking adev->reset_domain->sem will prevent any external access
5650 		 * to GPU during PCI error recovery
5651 		 */
5652 		amdgpu_device_lock_reset_domain(adev->reset_domain);
5653 		amdgpu_device_set_mp1_state(adev);
5654 
5655 		/*
5656 		 * Block any work scheduling as we do for regular GPU reset
5657 		 * for the duration of the recovery
5658 		 */
5659 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5660 			struct amdgpu_ring *ring = adev->rings[i];
5661 
5662 			if (!ring || !ring->sched.thread)
5663 				continue;
5664 
5665 			drm_sched_stop(&ring->sched, NULL);
5666 		}
5667 		atomic_inc(&adev->gpu_reset_counter);
5668 		return PCI_ERS_RESULT_NEED_RESET;
5669 	case pci_channel_io_perm_failure:
5670 		/* Permanent error, prepare for device removal */
5671 		return PCI_ERS_RESULT_DISCONNECT;
5672 	}
5673 
5674 	return PCI_ERS_RESULT_NEED_RESET;
5675 }
5676 
5677 /**
5678  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5679  * @pdev: pointer to PCI device
5680  */
5681 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5682 {
5683 
5684 	DRM_INFO("PCI error: mmio enabled callback!!\n");
5685 
5686 	/* TODO - dump whatever for debugging purposes */
5687 
5688 	/* This called only if amdgpu_pci_error_detected returns
5689 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5690 	 * works, no need to reset slot.
5691 	 */
5692 
5693 	return PCI_ERS_RESULT_RECOVERED;
5694 }
5695 
5696 /**
5697  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5698  * @pdev: PCI device struct
5699  *
5700  * Description: This routine is called by the pci error recovery
5701  * code after the PCI slot has been reset, just before we
5702  * should resume normal operations.
5703  */
5704 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5705 {
5706 	struct drm_device *dev = pci_get_drvdata(pdev);
5707 	struct amdgpu_device *adev = drm_to_adev(dev);
5708 	int r, i;
5709 	struct amdgpu_reset_context reset_context;
5710 	u32 memsize;
5711 	struct list_head device_list;
5712 
5713 	DRM_INFO("PCI error: slot reset callback!!\n");
5714 
5715 	memset(&reset_context, 0, sizeof(reset_context));
5716 
5717 	INIT_LIST_HEAD(&device_list);
5718 	list_add_tail(&adev->reset_list, &device_list);
5719 
5720 	/* wait for asic to come out of reset */
5721 	msleep(500);
5722 
5723 	/* Restore PCI confspace */
5724 	amdgpu_device_load_pci_state(pdev);
5725 
5726 	/* confirm  ASIC came out of reset */
5727 	for (i = 0; i < adev->usec_timeout; i++) {
5728 		memsize = amdgpu_asic_get_config_memsize(adev);
5729 
5730 		if (memsize != 0xffffffff)
5731 			break;
5732 		udelay(1);
5733 	}
5734 	if (memsize == 0xffffffff) {
5735 		r = -ETIME;
5736 		goto out;
5737 	}
5738 
5739 	reset_context.method = AMD_RESET_METHOD_NONE;
5740 	reset_context.reset_req_dev = adev;
5741 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5742 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5743 
5744 	adev->no_hw_access = true;
5745 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
5746 	adev->no_hw_access = false;
5747 	if (r)
5748 		goto out;
5749 
5750 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
5751 
5752 out:
5753 	if (!r) {
5754 		if (amdgpu_device_cache_pci_state(adev->pdev))
5755 			pci_restore_state(adev->pdev);
5756 
5757 		DRM_INFO("PCIe error recovery succeeded\n");
5758 	} else {
5759 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
5760 		amdgpu_device_unset_mp1_state(adev);
5761 		amdgpu_device_unlock_reset_domain(adev->reset_domain);
5762 	}
5763 
5764 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5765 }
5766 
5767 /**
5768  * amdgpu_pci_resume() - resume normal ops after PCI reset
5769  * @pdev: pointer to PCI device
5770  *
5771  * Called when the error recovery driver tells us that its
5772  * OK to resume normal operation.
5773  */
5774 void amdgpu_pci_resume(struct pci_dev *pdev)
5775 {
5776 	struct drm_device *dev = pci_get_drvdata(pdev);
5777 	struct amdgpu_device *adev = drm_to_adev(dev);
5778 	int i;
5779 
5780 
5781 	DRM_INFO("PCI error: resume callback!!\n");
5782 
5783 	/* Only continue execution for the case of pci_channel_io_frozen */
5784 	if (adev->pci_channel_state != pci_channel_io_frozen)
5785 		return;
5786 
5787 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5788 		struct amdgpu_ring *ring = adev->rings[i];
5789 
5790 		if (!ring || !ring->sched.thread)
5791 			continue;
5792 
5793 		drm_sched_start(&ring->sched, true);
5794 	}
5795 
5796 	amdgpu_device_unset_mp1_state(adev);
5797 	amdgpu_device_unlock_reset_domain(adev->reset_domain);
5798 }
5799 
5800 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5801 {
5802 	struct drm_device *dev = pci_get_drvdata(pdev);
5803 	struct amdgpu_device *adev = drm_to_adev(dev);
5804 	int r;
5805 
5806 	r = pci_save_state(pdev);
5807 	if (!r) {
5808 		kfree(adev->pci_state);
5809 
5810 		adev->pci_state = pci_store_saved_state(pdev);
5811 
5812 		if (!adev->pci_state) {
5813 			DRM_ERROR("Failed to store PCI saved state");
5814 			return false;
5815 		}
5816 	} else {
5817 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
5818 		return false;
5819 	}
5820 
5821 	return true;
5822 }
5823 
5824 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5825 {
5826 	struct drm_device *dev = pci_get_drvdata(pdev);
5827 	struct amdgpu_device *adev = drm_to_adev(dev);
5828 	int r;
5829 
5830 	if (!adev->pci_state)
5831 		return false;
5832 
5833 	r = pci_load_saved_state(pdev, adev->pci_state);
5834 
5835 	if (!r) {
5836 		pci_restore_state(pdev);
5837 	} else {
5838 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
5839 		return false;
5840 	}
5841 
5842 	return true;
5843 }
5844 
5845 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5846 		struct amdgpu_ring *ring)
5847 {
5848 #ifdef CONFIG_X86_64
5849 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5850 		return;
5851 #endif
5852 	if (adev->gmc.xgmi.connected_to_cpu)
5853 		return;
5854 
5855 	if (ring && ring->funcs->emit_hdp_flush)
5856 		amdgpu_ring_emit_hdp_flush(ring);
5857 	else
5858 		amdgpu_asic_flush_hdp(adev, ring);
5859 }
5860 
5861 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5862 		struct amdgpu_ring *ring)
5863 {
5864 #ifdef CONFIG_X86_64
5865 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5866 		return;
5867 #endif
5868 	if (adev->gmc.xgmi.connected_to_cpu)
5869 		return;
5870 
5871 	amdgpu_asic_invalidate_hdp(adev, ring);
5872 }
5873 
5874 int amdgpu_in_reset(struct amdgpu_device *adev)
5875 {
5876 	return atomic_read(&adev->reset_domain->in_gpu_reset);
5877 }
5878 
5879 /**
5880  * amdgpu_device_halt() - bring hardware to some kind of halt state
5881  *
5882  * @adev: amdgpu_device pointer
5883  *
5884  * Bring hardware to some kind of halt state so that no one can touch it
5885  * any more. It will help to maintain error context when error occurred.
5886  * Compare to a simple hang, the system will keep stable at least for SSH
5887  * access. Then it should be trivial to inspect the hardware state and
5888  * see what's going on. Implemented as following:
5889  *
5890  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5891  *    clears all CPU mappings to device, disallows remappings through page faults
5892  * 2. amdgpu_irq_disable_all() disables all interrupts
5893  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5894  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5895  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5896  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5897  *    flush any in flight DMA operations
5898  */
5899 void amdgpu_device_halt(struct amdgpu_device *adev)
5900 {
5901 	struct pci_dev *pdev = adev->pdev;
5902 	struct drm_device *ddev = adev_to_drm(adev);
5903 
5904 	drm_dev_unplug(ddev);
5905 
5906 	amdgpu_irq_disable_all(adev);
5907 
5908 	amdgpu_fence_driver_hw_fini(adev);
5909 
5910 	adev->no_hw_access = true;
5911 
5912 	amdgpu_device_unmap_mmio(adev);
5913 
5914 	pci_disable_device(pdev);
5915 	pci_wait_for_pending_transaction(pdev);
5916 }
5917 
5918 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5919 				u32 reg)
5920 {
5921 	unsigned long flags, address, data;
5922 	u32 r;
5923 
5924 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5925 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5926 
5927 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5928 	WREG32(address, reg * 4);
5929 	(void)RREG32(address);
5930 	r = RREG32(data);
5931 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5932 	return r;
5933 }
5934 
5935 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5936 				u32 reg, u32 v)
5937 {
5938 	unsigned long flags, address, data;
5939 
5940 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5941 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5942 
5943 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5944 	WREG32(address, reg * 4);
5945 	(void)RREG32(address);
5946 	WREG32(data, v);
5947 	(void)RREG32(data);
5948 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5949 }
5950 
5951 /**
5952  * amdgpu_device_switch_gang - switch to a new gang
5953  * @adev: amdgpu_device pointer
5954  * @gang: the gang to switch to
5955  *
5956  * Try to switch to a new gang.
5957  * Returns: NULL if we switched to the new gang or a reference to the current
5958  * gang leader.
5959  */
5960 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5961 					    struct dma_fence *gang)
5962 {
5963 	struct dma_fence *old = NULL;
5964 
5965 	do {
5966 		dma_fence_put(old);
5967 		rcu_read_lock();
5968 		old = dma_fence_get_rcu_safe(&adev->gang_submit);
5969 		rcu_read_unlock();
5970 
5971 		if (old == gang)
5972 			break;
5973 
5974 		if (!dma_fence_is_signaled(old))
5975 			return old;
5976 
5977 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
5978 			 old, gang) != old);
5979 
5980 	dma_fence_put(old);
5981 	return NULL;
5982 }
5983 
5984 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
5985 {
5986 	switch (adev->asic_type) {
5987 #ifdef CONFIG_DRM_AMDGPU_SI
5988 	case CHIP_HAINAN:
5989 #endif
5990 	case CHIP_TOPAZ:
5991 		/* chips with no display hardware */
5992 		return false;
5993 #ifdef CONFIG_DRM_AMDGPU_SI
5994 	case CHIP_TAHITI:
5995 	case CHIP_PITCAIRN:
5996 	case CHIP_VERDE:
5997 	case CHIP_OLAND:
5998 #endif
5999 #ifdef CONFIG_DRM_AMDGPU_CIK
6000 	case CHIP_BONAIRE:
6001 	case CHIP_HAWAII:
6002 	case CHIP_KAVERI:
6003 	case CHIP_KABINI:
6004 	case CHIP_MULLINS:
6005 #endif
6006 	case CHIP_TONGA:
6007 	case CHIP_FIJI:
6008 	case CHIP_POLARIS10:
6009 	case CHIP_POLARIS11:
6010 	case CHIP_POLARIS12:
6011 	case CHIP_VEGAM:
6012 	case CHIP_CARRIZO:
6013 	case CHIP_STONEY:
6014 		/* chips with display hardware */
6015 		return true;
6016 	default:
6017 		/* IP discovery */
6018 		if (!adev->ip_versions[DCE_HWIP][0] ||
6019 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6020 			return false;
6021 		return true;
6022 	}
6023 }
6024