1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 
34 #include <drm/drm_atomic_helper.h>
35 #include <drm/drm_probe_helper.h>
36 #include <drm/amdgpu_drm.h>
37 #include <linux/vgaarb.h>
38 #include <linux/vga_switcheroo.h>
39 #include <linux/efi.h>
40 #include "amdgpu.h"
41 #include "amdgpu_trace.h"
42 #include "amdgpu_i2c.h"
43 #include "atom.h"
44 #include "amdgpu_atombios.h"
45 #include "amdgpu_atomfirmware.h"
46 #include "amd_pcie.h"
47 #ifdef CONFIG_DRM_AMDGPU_SI
48 #include "si.h"
49 #endif
50 #ifdef CONFIG_DRM_AMDGPU_CIK
51 #include "cik.h"
52 #endif
53 #include "vi.h"
54 #include "soc15.h"
55 #include "nv.h"
56 #include "bif/bif_4_1_d.h"
57 #include <linux/pci.h>
58 #include <linux/firmware.h>
59 #include "amdgpu_vf_error.h"
60 
61 #include "amdgpu_amdkfd.h"
62 #include "amdgpu_pm.h"
63 
64 #include "amdgpu_xgmi.h"
65 #include "amdgpu_ras.h"
66 #include "amdgpu_pmu.h"
67 #include "amdgpu_fru_eeprom.h"
68 #include "amdgpu_reset.h"
69 
70 #include <linux/suspend.h>
71 #include <drm/task_barrier.h>
72 #include <linux/pm_runtime.h>
73 
74 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
76 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
77 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
78 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
79 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
80 MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
81 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
82 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
83 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
84 MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
85 
86 #define AMDGPU_RESUME_MS		2000
87 
88 const char *amdgpu_asic_name[] = {
89 	"TAHITI",
90 	"PITCAIRN",
91 	"VERDE",
92 	"OLAND",
93 	"HAINAN",
94 	"BONAIRE",
95 	"KAVERI",
96 	"KABINI",
97 	"HAWAII",
98 	"MULLINS",
99 	"TOPAZ",
100 	"TONGA",
101 	"FIJI",
102 	"CARRIZO",
103 	"STONEY",
104 	"POLARIS10",
105 	"POLARIS11",
106 	"POLARIS12",
107 	"VEGAM",
108 	"VEGA10",
109 	"VEGA12",
110 	"VEGA20",
111 	"RAVEN",
112 	"ARCTURUS",
113 	"RENOIR",
114 	"ALDEBARAN",
115 	"NAVI10",
116 	"NAVI14",
117 	"NAVI12",
118 	"SIENNA_CICHLID",
119 	"NAVY_FLOUNDER",
120 	"VANGOGH",
121 	"DIMGREY_CAVEFISH",
122 	"LAST",
123 };
124 
125 /**
126  * DOC: pcie_replay_count
127  *
128  * The amdgpu driver provides a sysfs API for reporting the total number
129  * of PCIe replays (NAKs)
130  * The file pcie_replay_count is used for this and returns the total
131  * number of replays as a sum of the NAKs generated and NAKs received
132  */
133 
134 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
135 		struct device_attribute *attr, char *buf)
136 {
137 	struct drm_device *ddev = dev_get_drvdata(dev);
138 	struct amdgpu_device *adev = drm_to_adev(ddev);
139 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
140 
141 	return sysfs_emit(buf, "%llu\n", cnt);
142 }
143 
144 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
145 		amdgpu_device_get_pcie_replay_count, NULL);
146 
147 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
148 
149 /**
150  * DOC: product_name
151  *
152  * The amdgpu driver provides a sysfs API for reporting the product name
153  * for the device
154  * The file serial_number is used for this and returns the product name
155  * as returned from the FRU.
156  * NOTE: This is only available for certain server cards
157  */
158 
159 static ssize_t amdgpu_device_get_product_name(struct device *dev,
160 		struct device_attribute *attr, char *buf)
161 {
162 	struct drm_device *ddev = dev_get_drvdata(dev);
163 	struct amdgpu_device *adev = drm_to_adev(ddev);
164 
165 	return sysfs_emit(buf, "%s\n", adev->product_name);
166 }
167 
168 static DEVICE_ATTR(product_name, S_IRUGO,
169 		amdgpu_device_get_product_name, NULL);
170 
171 /**
172  * DOC: product_number
173  *
174  * The amdgpu driver provides a sysfs API for reporting the part number
175  * for the device
176  * The file serial_number is used for this and returns the part number
177  * as returned from the FRU.
178  * NOTE: This is only available for certain server cards
179  */
180 
181 static ssize_t amdgpu_device_get_product_number(struct device *dev,
182 		struct device_attribute *attr, char *buf)
183 {
184 	struct drm_device *ddev = dev_get_drvdata(dev);
185 	struct amdgpu_device *adev = drm_to_adev(ddev);
186 
187 	return sysfs_emit(buf, "%s\n", adev->product_number);
188 }
189 
190 static DEVICE_ATTR(product_number, S_IRUGO,
191 		amdgpu_device_get_product_number, NULL);
192 
193 /**
194  * DOC: serial_number
195  *
196  * The amdgpu driver provides a sysfs API for reporting the serial number
197  * for the device
198  * The file serial_number is used for this and returns the serial number
199  * as returned from the FRU.
200  * NOTE: This is only available for certain server cards
201  */
202 
203 static ssize_t amdgpu_device_get_serial_number(struct device *dev,
204 		struct device_attribute *attr, char *buf)
205 {
206 	struct drm_device *ddev = dev_get_drvdata(dev);
207 	struct amdgpu_device *adev = drm_to_adev(ddev);
208 
209 	return sysfs_emit(buf, "%s\n", adev->serial);
210 }
211 
212 static DEVICE_ATTR(serial_number, S_IRUGO,
213 		amdgpu_device_get_serial_number, NULL);
214 
215 /**
216  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
217  *
218  * @dev: drm_device pointer
219  *
220  * Returns true if the device is a dGPU with ATPX power control,
221  * otherwise return false.
222  */
223 bool amdgpu_device_supports_px(struct drm_device *dev)
224 {
225 	struct amdgpu_device *adev = drm_to_adev(dev);
226 
227 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
228 		return true;
229 	return false;
230 }
231 
232 /**
233  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
234  *
235  * @dev: drm_device pointer
236  *
237  * Returns true if the device is a dGPU with ACPI power control,
238  * otherwise return false.
239  */
240 bool amdgpu_device_supports_boco(struct drm_device *dev)
241 {
242 	struct amdgpu_device *adev = drm_to_adev(dev);
243 
244 	if (adev->has_pr3 ||
245 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
246 		return true;
247 	return false;
248 }
249 
250 /**
251  * amdgpu_device_supports_baco - Does the device support BACO
252  *
253  * @dev: drm_device pointer
254  *
255  * Returns true if the device supporte BACO,
256  * otherwise return false.
257  */
258 bool amdgpu_device_supports_baco(struct drm_device *dev)
259 {
260 	struct amdgpu_device *adev = drm_to_adev(dev);
261 
262 	return amdgpu_asic_supports_baco(adev);
263 }
264 
265 /*
266  * VRAM access helper functions
267  */
268 
269 /**
270  * amdgpu_device_vram_access - read/write a buffer in vram
271  *
272  * @adev: amdgpu_device pointer
273  * @pos: offset of the buffer in vram
274  * @buf: virtual address of the buffer in system memory
275  * @size: read/write size, sizeof(@buf) must > @size
276  * @write: true - write to vram, otherwise - read from vram
277  */
278 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
279 			       uint32_t *buf, size_t size, bool write)
280 {
281 	unsigned long flags;
282 	uint32_t hi = ~0;
283 	uint64_t last;
284 
285 
286 #ifdef CONFIG_64BIT
287 	last = min(pos + size, adev->gmc.visible_vram_size);
288 	if (last > pos) {
289 		void __iomem *addr = adev->mman.aper_base_kaddr + pos;
290 		size_t count = last - pos;
291 
292 		if (write) {
293 			memcpy_toio(addr, buf, count);
294 			mb();
295 			amdgpu_asic_flush_hdp(adev, NULL);
296 		} else {
297 			amdgpu_asic_invalidate_hdp(adev, NULL);
298 			mb();
299 			memcpy_fromio(buf, addr, count);
300 		}
301 
302 		if (count == size)
303 			return;
304 
305 		pos += count;
306 		buf += count / 4;
307 		size -= count;
308 	}
309 #endif
310 
311 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
312 	for (last = pos + size; pos < last; pos += 4) {
313 		uint32_t tmp = pos >> 31;
314 
315 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
316 		if (tmp != hi) {
317 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
318 			hi = tmp;
319 		}
320 		if (write)
321 			WREG32_NO_KIQ(mmMM_DATA, *buf++);
322 		else
323 			*buf++ = RREG32_NO_KIQ(mmMM_DATA);
324 	}
325 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
326 }
327 
328 /*
329  * register access helper functions.
330  */
331 
332 /* Check if hw access should be skipped because of hotplug or device error */
333 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
334 {
335 	if (adev->in_pci_err_recovery)
336 		return true;
337 
338 #ifdef CONFIG_LOCKDEP
339 	/*
340 	 * This is a bit complicated to understand, so worth a comment. What we assert
341 	 * here is that the GPU reset is not running on another thread in parallel.
342 	 *
343 	 * For this we trylock the read side of the reset semaphore, if that succeeds
344 	 * we know that the reset is not running in paralell.
345 	 *
346 	 * If the trylock fails we assert that we are either already holding the read
347 	 * side of the lock or are the reset thread itself and hold the write side of
348 	 * the lock.
349 	 */
350 	if (in_task()) {
351 		if (down_read_trylock(&adev->reset_sem))
352 			up_read(&adev->reset_sem);
353 		else
354 			lockdep_assert_held(&adev->reset_sem);
355 	}
356 #endif
357 	return false;
358 }
359 
360 /**
361  * amdgpu_device_rreg - read a memory mapped IO or indirect register
362  *
363  * @adev: amdgpu_device pointer
364  * @reg: dword aligned register offset
365  * @acc_flags: access flags which require special behavior
366  *
367  * Returns the 32 bit value from the offset specified.
368  */
369 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
370 			    uint32_t reg, uint32_t acc_flags)
371 {
372 	uint32_t ret;
373 
374 	if (amdgpu_device_skip_hw_access(adev))
375 		return 0;
376 
377 	if ((reg * 4) < adev->rmmio_size) {
378 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
379 		    amdgpu_sriov_runtime(adev) &&
380 		    down_read_trylock(&adev->reset_sem)) {
381 			ret = amdgpu_kiq_rreg(adev, reg);
382 			up_read(&adev->reset_sem);
383 		} else {
384 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
385 		}
386 	} else {
387 		ret = adev->pcie_rreg(adev, reg * 4);
388 	}
389 
390 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
391 
392 	return ret;
393 }
394 
395 /*
396  * MMIO register read with bytes helper functions
397  * @offset:bytes offset from MMIO start
398  *
399 */
400 
401 /**
402  * amdgpu_mm_rreg8 - read a memory mapped IO register
403  *
404  * @adev: amdgpu_device pointer
405  * @offset: byte aligned register offset
406  *
407  * Returns the 8 bit value from the offset specified.
408  */
409 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
410 {
411 	if (amdgpu_device_skip_hw_access(adev))
412 		return 0;
413 
414 	if (offset < adev->rmmio_size)
415 		return (readb(adev->rmmio + offset));
416 	BUG();
417 }
418 
419 /*
420  * MMIO register write with bytes helper functions
421  * @offset:bytes offset from MMIO start
422  * @value: the value want to be written to the register
423  *
424 */
425 /**
426  * amdgpu_mm_wreg8 - read a memory mapped IO register
427  *
428  * @adev: amdgpu_device pointer
429  * @offset: byte aligned register offset
430  * @value: 8 bit value to write
431  *
432  * Writes the value specified to the offset specified.
433  */
434 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
435 {
436 	if (amdgpu_device_skip_hw_access(adev))
437 		return;
438 
439 	if (offset < adev->rmmio_size)
440 		writeb(value, adev->rmmio + offset);
441 	else
442 		BUG();
443 }
444 
445 /**
446  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
447  *
448  * @adev: amdgpu_device pointer
449  * @reg: dword aligned register offset
450  * @v: 32 bit value to write to the register
451  * @acc_flags: access flags which require special behavior
452  *
453  * Writes the value specified to the offset specified.
454  */
455 void amdgpu_device_wreg(struct amdgpu_device *adev,
456 			uint32_t reg, uint32_t v,
457 			uint32_t acc_flags)
458 {
459 	if (amdgpu_device_skip_hw_access(adev))
460 		return;
461 
462 	if ((reg * 4) < adev->rmmio_size) {
463 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
464 		    amdgpu_sriov_runtime(adev) &&
465 		    down_read_trylock(&adev->reset_sem)) {
466 			amdgpu_kiq_wreg(adev, reg, v);
467 			up_read(&adev->reset_sem);
468 		} else {
469 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
470 		}
471 	} else {
472 		adev->pcie_wreg(adev, reg * 4, v);
473 	}
474 
475 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
476 }
477 
478 /*
479  * amdgpu_mm_wreg_mmio_rlc -  write register either with mmio or with RLC path if in range
480  *
481  * this function is invoked only the debugfs register access
482  * */
483 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
484 			     uint32_t reg, uint32_t v)
485 {
486 	if (amdgpu_device_skip_hw_access(adev))
487 		return;
488 
489 	if (amdgpu_sriov_fullaccess(adev) &&
490 	    adev->gfx.rlc.funcs &&
491 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
492 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
493 			return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v, 0);
494 	} else {
495 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
496 	}
497 }
498 
499 /**
500  * amdgpu_mm_rdoorbell - read a doorbell dword
501  *
502  * @adev: amdgpu_device pointer
503  * @index: doorbell index
504  *
505  * Returns the value in the doorbell aperture at the
506  * requested doorbell index (CIK).
507  */
508 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
509 {
510 	if (amdgpu_device_skip_hw_access(adev))
511 		return 0;
512 
513 	if (index < adev->doorbell.num_doorbells) {
514 		return readl(adev->doorbell.ptr + index);
515 	} else {
516 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
517 		return 0;
518 	}
519 }
520 
521 /**
522  * amdgpu_mm_wdoorbell - write a doorbell dword
523  *
524  * @adev: amdgpu_device pointer
525  * @index: doorbell index
526  * @v: value to write
527  *
528  * Writes @v to the doorbell aperture at the
529  * requested doorbell index (CIK).
530  */
531 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
532 {
533 	if (amdgpu_device_skip_hw_access(adev))
534 		return;
535 
536 	if (index < adev->doorbell.num_doorbells) {
537 		writel(v, adev->doorbell.ptr + index);
538 	} else {
539 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
540 	}
541 }
542 
543 /**
544  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
545  *
546  * @adev: amdgpu_device pointer
547  * @index: doorbell index
548  *
549  * Returns the value in the doorbell aperture at the
550  * requested doorbell index (VEGA10+).
551  */
552 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
553 {
554 	if (amdgpu_device_skip_hw_access(adev))
555 		return 0;
556 
557 	if (index < adev->doorbell.num_doorbells) {
558 		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
559 	} else {
560 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
561 		return 0;
562 	}
563 }
564 
565 /**
566  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
567  *
568  * @adev: amdgpu_device pointer
569  * @index: doorbell index
570  * @v: value to write
571  *
572  * Writes @v to the doorbell aperture at the
573  * requested doorbell index (VEGA10+).
574  */
575 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
576 {
577 	if (amdgpu_device_skip_hw_access(adev))
578 		return;
579 
580 	if (index < adev->doorbell.num_doorbells) {
581 		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
582 	} else {
583 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
584 	}
585 }
586 
587 /**
588  * amdgpu_device_indirect_rreg - read an indirect register
589  *
590  * @adev: amdgpu_device pointer
591  * @pcie_index: mmio register offset
592  * @pcie_data: mmio register offset
593  * @reg_addr: indirect register address to read from
594  *
595  * Returns the value of indirect register @reg_addr
596  */
597 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
598 				u32 pcie_index, u32 pcie_data,
599 				u32 reg_addr)
600 {
601 	unsigned long flags;
602 	u32 r;
603 	void __iomem *pcie_index_offset;
604 	void __iomem *pcie_data_offset;
605 
606 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
607 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
608 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
609 
610 	writel(reg_addr, pcie_index_offset);
611 	readl(pcie_index_offset);
612 	r = readl(pcie_data_offset);
613 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
614 
615 	return r;
616 }
617 
618 /**
619  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
620  *
621  * @adev: amdgpu_device pointer
622  * @pcie_index: mmio register offset
623  * @pcie_data: mmio register offset
624  * @reg_addr: indirect register address to read from
625  *
626  * Returns the value of indirect register @reg_addr
627  */
628 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
629 				  u32 pcie_index, u32 pcie_data,
630 				  u32 reg_addr)
631 {
632 	unsigned long flags;
633 	u64 r;
634 	void __iomem *pcie_index_offset;
635 	void __iomem *pcie_data_offset;
636 
637 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
638 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
639 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
640 
641 	/* read low 32 bits */
642 	writel(reg_addr, pcie_index_offset);
643 	readl(pcie_index_offset);
644 	r = readl(pcie_data_offset);
645 	/* read high 32 bits */
646 	writel(reg_addr + 4, pcie_index_offset);
647 	readl(pcie_index_offset);
648 	r |= ((u64)readl(pcie_data_offset) << 32);
649 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
650 
651 	return r;
652 }
653 
654 /**
655  * amdgpu_device_indirect_wreg - write an indirect register address
656  *
657  * @adev: amdgpu_device pointer
658  * @pcie_index: mmio register offset
659  * @pcie_data: mmio register offset
660  * @reg_addr: indirect register offset
661  * @reg_data: indirect register data
662  *
663  */
664 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
665 				 u32 pcie_index, u32 pcie_data,
666 				 u32 reg_addr, u32 reg_data)
667 {
668 	unsigned long flags;
669 	void __iomem *pcie_index_offset;
670 	void __iomem *pcie_data_offset;
671 
672 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
673 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
674 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
675 
676 	writel(reg_addr, pcie_index_offset);
677 	readl(pcie_index_offset);
678 	writel(reg_data, pcie_data_offset);
679 	readl(pcie_data_offset);
680 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
681 }
682 
683 /**
684  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
685  *
686  * @adev: amdgpu_device pointer
687  * @pcie_index: mmio register offset
688  * @pcie_data: mmio register offset
689  * @reg_addr: indirect register offset
690  * @reg_data: indirect register data
691  *
692  */
693 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
694 				   u32 pcie_index, u32 pcie_data,
695 				   u32 reg_addr, u64 reg_data)
696 {
697 	unsigned long flags;
698 	void __iomem *pcie_index_offset;
699 	void __iomem *pcie_data_offset;
700 
701 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
702 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
703 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
704 
705 	/* write low 32 bits */
706 	writel(reg_addr, pcie_index_offset);
707 	readl(pcie_index_offset);
708 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
709 	readl(pcie_data_offset);
710 	/* write high 32 bits */
711 	writel(reg_addr + 4, pcie_index_offset);
712 	readl(pcie_index_offset);
713 	writel((u32)(reg_data >> 32), pcie_data_offset);
714 	readl(pcie_data_offset);
715 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
716 }
717 
718 /**
719  * amdgpu_invalid_rreg - dummy reg read function
720  *
721  * @adev: amdgpu_device pointer
722  * @reg: offset of register
723  *
724  * Dummy register read function.  Used for register blocks
725  * that certain asics don't have (all asics).
726  * Returns the value in the register.
727  */
728 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
729 {
730 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
731 	BUG();
732 	return 0;
733 }
734 
735 /**
736  * amdgpu_invalid_wreg - dummy reg write function
737  *
738  * @adev: amdgpu_device pointer
739  * @reg: offset of register
740  * @v: value to write to the register
741  *
742  * Dummy register read function.  Used for register blocks
743  * that certain asics don't have (all asics).
744  */
745 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
746 {
747 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
748 		  reg, v);
749 	BUG();
750 }
751 
752 /**
753  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
754  *
755  * @adev: amdgpu_device pointer
756  * @reg: offset of register
757  *
758  * Dummy register read function.  Used for register blocks
759  * that certain asics don't have (all asics).
760  * Returns the value in the register.
761  */
762 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
763 {
764 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
765 	BUG();
766 	return 0;
767 }
768 
769 /**
770  * amdgpu_invalid_wreg64 - dummy reg write function
771  *
772  * @adev: amdgpu_device pointer
773  * @reg: offset of register
774  * @v: value to write to the register
775  *
776  * Dummy register read function.  Used for register blocks
777  * that certain asics don't have (all asics).
778  */
779 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
780 {
781 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
782 		  reg, v);
783 	BUG();
784 }
785 
786 /**
787  * amdgpu_block_invalid_rreg - dummy reg read function
788  *
789  * @adev: amdgpu_device pointer
790  * @block: offset of instance
791  * @reg: offset of register
792  *
793  * Dummy register read function.  Used for register blocks
794  * that certain asics don't have (all asics).
795  * Returns the value in the register.
796  */
797 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
798 					  uint32_t block, uint32_t reg)
799 {
800 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
801 		  reg, block);
802 	BUG();
803 	return 0;
804 }
805 
806 /**
807  * amdgpu_block_invalid_wreg - dummy reg write function
808  *
809  * @adev: amdgpu_device pointer
810  * @block: offset of instance
811  * @reg: offset of register
812  * @v: value to write to the register
813  *
814  * Dummy register read function.  Used for register blocks
815  * that certain asics don't have (all asics).
816  */
817 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
818 				      uint32_t block,
819 				      uint32_t reg, uint32_t v)
820 {
821 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
822 		  reg, block, v);
823 	BUG();
824 }
825 
826 /**
827  * amdgpu_device_asic_init - Wrapper for atom asic_init
828  *
829  * @adev: amdgpu_device pointer
830  *
831  * Does any asic specific work and then calls atom asic init.
832  */
833 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
834 {
835 	amdgpu_asic_pre_asic_init(adev);
836 
837 	return amdgpu_atom_asic_init(adev->mode_info.atom_context);
838 }
839 
840 /**
841  * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
842  *
843  * @adev: amdgpu_device pointer
844  *
845  * Allocates a scratch page of VRAM for use by various things in the
846  * driver.
847  */
848 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
849 {
850 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
851 				       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
852 				       &adev->vram_scratch.robj,
853 				       &adev->vram_scratch.gpu_addr,
854 				       (void **)&adev->vram_scratch.ptr);
855 }
856 
857 /**
858  * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
859  *
860  * @adev: amdgpu_device pointer
861  *
862  * Frees the VRAM scratch page.
863  */
864 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
865 {
866 	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
867 }
868 
869 /**
870  * amdgpu_device_program_register_sequence - program an array of registers.
871  *
872  * @adev: amdgpu_device pointer
873  * @registers: pointer to the register array
874  * @array_size: size of the register array
875  *
876  * Programs an array or registers with and and or masks.
877  * This is a helper for setting golden registers.
878  */
879 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
880 					     const u32 *registers,
881 					     const u32 array_size)
882 {
883 	u32 tmp, reg, and_mask, or_mask;
884 	int i;
885 
886 	if (array_size % 3)
887 		return;
888 
889 	for (i = 0; i < array_size; i +=3) {
890 		reg = registers[i + 0];
891 		and_mask = registers[i + 1];
892 		or_mask = registers[i + 2];
893 
894 		if (and_mask == 0xffffffff) {
895 			tmp = or_mask;
896 		} else {
897 			tmp = RREG32(reg);
898 			tmp &= ~and_mask;
899 			if (adev->family >= AMDGPU_FAMILY_AI)
900 				tmp |= (or_mask & and_mask);
901 			else
902 				tmp |= or_mask;
903 		}
904 		WREG32(reg, tmp);
905 	}
906 }
907 
908 /**
909  * amdgpu_device_pci_config_reset - reset the GPU
910  *
911  * @adev: amdgpu_device pointer
912  *
913  * Resets the GPU using the pci config reset sequence.
914  * Only applicable to asics prior to vega10.
915  */
916 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
917 {
918 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
919 }
920 
921 /**
922  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
923  *
924  * @adev: amdgpu_device pointer
925  *
926  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
927  */
928 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
929 {
930 	return pci_reset_function(adev->pdev);
931 }
932 
933 /*
934  * GPU doorbell aperture helpers function.
935  */
936 /**
937  * amdgpu_device_doorbell_init - Init doorbell driver information.
938  *
939  * @adev: amdgpu_device pointer
940  *
941  * Init doorbell driver information (CIK)
942  * Returns 0 on success, error on failure.
943  */
944 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
945 {
946 
947 	/* No doorbell on SI hardware generation */
948 	if (adev->asic_type < CHIP_BONAIRE) {
949 		adev->doorbell.base = 0;
950 		adev->doorbell.size = 0;
951 		adev->doorbell.num_doorbells = 0;
952 		adev->doorbell.ptr = NULL;
953 		return 0;
954 	}
955 
956 	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
957 		return -EINVAL;
958 
959 	amdgpu_asic_init_doorbell_index(adev);
960 
961 	/* doorbell bar mapping */
962 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
963 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
964 
965 	adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
966 					     adev->doorbell_index.max_assignment+1);
967 	if (adev->doorbell.num_doorbells == 0)
968 		return -EINVAL;
969 
970 	/* For Vega, reserve and map two pages on doorbell BAR since SDMA
971 	 * paging queue doorbell use the second page. The
972 	 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
973 	 * doorbells are in the first page. So with paging queue enabled,
974 	 * the max num_doorbells should + 1 page (0x400 in dword)
975 	 */
976 	if (adev->asic_type >= CHIP_VEGA10)
977 		adev->doorbell.num_doorbells += 0x400;
978 
979 	adev->doorbell.ptr = ioremap(adev->doorbell.base,
980 				     adev->doorbell.num_doorbells *
981 				     sizeof(u32));
982 	if (adev->doorbell.ptr == NULL)
983 		return -ENOMEM;
984 
985 	return 0;
986 }
987 
988 /**
989  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
990  *
991  * @adev: amdgpu_device pointer
992  *
993  * Tear down doorbell driver information (CIK)
994  */
995 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
996 {
997 	iounmap(adev->doorbell.ptr);
998 	adev->doorbell.ptr = NULL;
999 }
1000 
1001 
1002 
1003 /*
1004  * amdgpu_device_wb_*()
1005  * Writeback is the method by which the GPU updates special pages in memory
1006  * with the status of certain GPU events (fences, ring pointers,etc.).
1007  */
1008 
1009 /**
1010  * amdgpu_device_wb_fini - Disable Writeback and free memory
1011  *
1012  * @adev: amdgpu_device pointer
1013  *
1014  * Disables Writeback and frees the Writeback memory (all asics).
1015  * Used at driver shutdown.
1016  */
1017 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1018 {
1019 	if (adev->wb.wb_obj) {
1020 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1021 				      &adev->wb.gpu_addr,
1022 				      (void **)&adev->wb.wb);
1023 		adev->wb.wb_obj = NULL;
1024 	}
1025 }
1026 
1027 /**
1028  * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
1029  *
1030  * @adev: amdgpu_device pointer
1031  *
1032  * Initializes writeback and allocates writeback memory (all asics).
1033  * Used at driver startup.
1034  * Returns 0 on success or an -error on failure.
1035  */
1036 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1037 {
1038 	int r;
1039 
1040 	if (adev->wb.wb_obj == NULL) {
1041 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1042 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1043 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1044 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1045 					    (void **)&adev->wb.wb);
1046 		if (r) {
1047 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1048 			return r;
1049 		}
1050 
1051 		adev->wb.num_wb = AMDGPU_MAX_WB;
1052 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1053 
1054 		/* clear wb memory */
1055 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1056 	}
1057 
1058 	return 0;
1059 }
1060 
1061 /**
1062  * amdgpu_device_wb_get - Allocate a wb entry
1063  *
1064  * @adev: amdgpu_device pointer
1065  * @wb: wb index
1066  *
1067  * Allocate a wb slot for use by the driver (all asics).
1068  * Returns 0 on success or -EINVAL on failure.
1069  */
1070 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1071 {
1072 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1073 
1074 	if (offset < adev->wb.num_wb) {
1075 		__set_bit(offset, adev->wb.used);
1076 		*wb = offset << 3; /* convert to dw offset */
1077 		return 0;
1078 	} else {
1079 		return -EINVAL;
1080 	}
1081 }
1082 
1083 /**
1084  * amdgpu_device_wb_free - Free a wb entry
1085  *
1086  * @adev: amdgpu_device pointer
1087  * @wb: wb index
1088  *
1089  * Free a wb slot allocated for use by the driver (all asics)
1090  */
1091 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1092 {
1093 	wb >>= 3;
1094 	if (wb < adev->wb.num_wb)
1095 		__clear_bit(wb, adev->wb.used);
1096 }
1097 
1098 /**
1099  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1100  *
1101  * @adev: amdgpu_device pointer
1102  *
1103  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1104  * to fail, but if any of the BARs is not accessible after the size we abort
1105  * driver loading by returning -ENODEV.
1106  */
1107 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1108 {
1109 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1110 	struct pci_bus *root;
1111 	struct resource *res;
1112 	unsigned i;
1113 	u16 cmd;
1114 	int r;
1115 
1116 	/* Bypass for VF */
1117 	if (amdgpu_sriov_vf(adev))
1118 		return 0;
1119 
1120 	/* skip if the bios has already enabled large BAR */
1121 	if (adev->gmc.real_vram_size &&
1122 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1123 		return 0;
1124 
1125 	/* Check if the root BUS has 64bit memory resources */
1126 	root = adev->pdev->bus;
1127 	while (root->parent)
1128 		root = root->parent;
1129 
1130 	pci_bus_for_each_resource(root, res, i) {
1131 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1132 		    res->start > 0x100000000ull)
1133 			break;
1134 	}
1135 
1136 	/* Trying to resize is pointless without a root hub window above 4GB */
1137 	if (!res)
1138 		return 0;
1139 
1140 	/* Limit the BAR size to what is available */
1141 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1142 			rbar_size);
1143 
1144 	/* Disable memory decoding while we change the BAR addresses and size */
1145 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1146 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1147 			      cmd & ~PCI_COMMAND_MEMORY);
1148 
1149 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1150 	amdgpu_device_doorbell_fini(adev);
1151 	if (adev->asic_type >= CHIP_BONAIRE)
1152 		pci_release_resource(adev->pdev, 2);
1153 
1154 	pci_release_resource(adev->pdev, 0);
1155 
1156 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1157 	if (r == -ENOSPC)
1158 		DRM_INFO("Not enough PCI address space for a large BAR.");
1159 	else if (r && r != -ENOTSUPP)
1160 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1161 
1162 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1163 
1164 	/* When the doorbell or fb BAR isn't available we have no chance of
1165 	 * using the device.
1166 	 */
1167 	r = amdgpu_device_doorbell_init(adev);
1168 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1169 		return -ENODEV;
1170 
1171 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1172 
1173 	return 0;
1174 }
1175 
1176 /*
1177  * GPU helpers function.
1178  */
1179 /**
1180  * amdgpu_device_need_post - check if the hw need post or not
1181  *
1182  * @adev: amdgpu_device pointer
1183  *
1184  * Check if the asic has been initialized (all asics) at driver startup
1185  * or post is needed if  hw reset is performed.
1186  * Returns true if need or false if not.
1187  */
1188 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1189 {
1190 	uint32_t reg;
1191 
1192 	if (amdgpu_sriov_vf(adev))
1193 		return false;
1194 
1195 	if (amdgpu_passthrough(adev)) {
1196 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1197 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1198 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1199 		 * vpost executed for smc version below 22.15
1200 		 */
1201 		if (adev->asic_type == CHIP_FIJI) {
1202 			int err;
1203 			uint32_t fw_ver;
1204 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1205 			/* force vPost if error occured */
1206 			if (err)
1207 				return true;
1208 
1209 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1210 			if (fw_ver < 0x00160e00)
1211 				return true;
1212 		}
1213 	}
1214 
1215 	/* Don't post if we need to reset whole hive on init */
1216 	if (adev->gmc.xgmi.pending_reset)
1217 		return false;
1218 
1219 	if (adev->has_hw_reset) {
1220 		adev->has_hw_reset = false;
1221 		return true;
1222 	}
1223 
1224 	/* bios scratch used on CIK+ */
1225 	if (adev->asic_type >= CHIP_BONAIRE)
1226 		return amdgpu_atombios_scratch_need_asic_init(adev);
1227 
1228 	/* check MEM_SIZE for older asics */
1229 	reg = amdgpu_asic_get_config_memsize(adev);
1230 
1231 	if ((reg != 0) && (reg != 0xffffffff))
1232 		return false;
1233 
1234 	return true;
1235 }
1236 
1237 /* if we get transitioned to only one device, take VGA back */
1238 /**
1239  * amdgpu_device_vga_set_decode - enable/disable vga decode
1240  *
1241  * @cookie: amdgpu_device pointer
1242  * @state: enable/disable vga decode
1243  *
1244  * Enable/disable vga decode (all asics).
1245  * Returns VGA resource flags.
1246  */
1247 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
1248 {
1249 	struct amdgpu_device *adev = cookie;
1250 	amdgpu_asic_set_vga_state(adev, state);
1251 	if (state)
1252 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1253 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1254 	else
1255 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1256 }
1257 
1258 /**
1259  * amdgpu_device_check_block_size - validate the vm block size
1260  *
1261  * @adev: amdgpu_device pointer
1262  *
1263  * Validates the vm block size specified via module parameter.
1264  * The vm block size defines number of bits in page table versus page directory,
1265  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1266  * page table and the remaining bits are in the page directory.
1267  */
1268 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1269 {
1270 	/* defines number of bits in page table versus page directory,
1271 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1272 	 * page table and the remaining bits are in the page directory */
1273 	if (amdgpu_vm_block_size == -1)
1274 		return;
1275 
1276 	if (amdgpu_vm_block_size < 9) {
1277 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1278 			 amdgpu_vm_block_size);
1279 		amdgpu_vm_block_size = -1;
1280 	}
1281 }
1282 
1283 /**
1284  * amdgpu_device_check_vm_size - validate the vm size
1285  *
1286  * @adev: amdgpu_device pointer
1287  *
1288  * Validates the vm size in GB specified via module parameter.
1289  * The VM size is the size of the GPU virtual memory space in GB.
1290  */
1291 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1292 {
1293 	/* no need to check the default value */
1294 	if (amdgpu_vm_size == -1)
1295 		return;
1296 
1297 	if (amdgpu_vm_size < 1) {
1298 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1299 			 amdgpu_vm_size);
1300 		amdgpu_vm_size = -1;
1301 	}
1302 }
1303 
1304 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1305 {
1306 	struct sysinfo si;
1307 	bool is_os_64 = (sizeof(void *) == 8);
1308 	uint64_t total_memory;
1309 	uint64_t dram_size_seven_GB = 0x1B8000000;
1310 	uint64_t dram_size_three_GB = 0xB8000000;
1311 
1312 	if (amdgpu_smu_memory_pool_size == 0)
1313 		return;
1314 
1315 	if (!is_os_64) {
1316 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1317 		goto def_value;
1318 	}
1319 	si_meminfo(&si);
1320 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1321 
1322 	if ((amdgpu_smu_memory_pool_size == 1) ||
1323 		(amdgpu_smu_memory_pool_size == 2)) {
1324 		if (total_memory < dram_size_three_GB)
1325 			goto def_value1;
1326 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1327 		(amdgpu_smu_memory_pool_size == 8)) {
1328 		if (total_memory < dram_size_seven_GB)
1329 			goto def_value1;
1330 	} else {
1331 		DRM_WARN("Smu memory pool size not supported\n");
1332 		goto def_value;
1333 	}
1334 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1335 
1336 	return;
1337 
1338 def_value1:
1339 	DRM_WARN("No enough system memory\n");
1340 def_value:
1341 	adev->pm.smu_prv_buffer_size = 0;
1342 }
1343 
1344 /**
1345  * amdgpu_device_check_arguments - validate module params
1346  *
1347  * @adev: amdgpu_device pointer
1348  *
1349  * Validates certain module parameters and updates
1350  * the associated values used by the driver (all asics).
1351  */
1352 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1353 {
1354 	if (amdgpu_sched_jobs < 4) {
1355 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1356 			 amdgpu_sched_jobs);
1357 		amdgpu_sched_jobs = 4;
1358 	} else if (!is_power_of_2(amdgpu_sched_jobs)){
1359 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1360 			 amdgpu_sched_jobs);
1361 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1362 	}
1363 
1364 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1365 		/* gart size must be greater or equal to 32M */
1366 		dev_warn(adev->dev, "gart size (%d) too small\n",
1367 			 amdgpu_gart_size);
1368 		amdgpu_gart_size = -1;
1369 	}
1370 
1371 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1372 		/* gtt size must be greater or equal to 32M */
1373 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1374 				 amdgpu_gtt_size);
1375 		amdgpu_gtt_size = -1;
1376 	}
1377 
1378 	/* valid range is between 4 and 9 inclusive */
1379 	if (amdgpu_vm_fragment_size != -1 &&
1380 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1381 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1382 		amdgpu_vm_fragment_size = -1;
1383 	}
1384 
1385 	if (amdgpu_sched_hw_submission < 2) {
1386 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1387 			 amdgpu_sched_hw_submission);
1388 		amdgpu_sched_hw_submission = 2;
1389 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1390 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1391 			 amdgpu_sched_hw_submission);
1392 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1393 	}
1394 
1395 	amdgpu_device_check_smu_prv_buffer_size(adev);
1396 
1397 	amdgpu_device_check_vm_size(adev);
1398 
1399 	amdgpu_device_check_block_size(adev);
1400 
1401 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1402 
1403 	amdgpu_gmc_tmz_set(adev);
1404 
1405 	amdgpu_gmc_noretry_set(adev);
1406 
1407 	return 0;
1408 }
1409 
1410 /**
1411  * amdgpu_switcheroo_set_state - set switcheroo state
1412  *
1413  * @pdev: pci dev pointer
1414  * @state: vga_switcheroo state
1415  *
1416  * Callback for the switcheroo driver.  Suspends or resumes the
1417  * the asics before or after it is powered up using ACPI methods.
1418  */
1419 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1420 					enum vga_switcheroo_state state)
1421 {
1422 	struct drm_device *dev = pci_get_drvdata(pdev);
1423 	int r;
1424 
1425 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1426 		return;
1427 
1428 	if (state == VGA_SWITCHEROO_ON) {
1429 		pr_info("switched on\n");
1430 		/* don't suspend or resume card normally */
1431 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1432 
1433 		pci_set_power_state(pdev, PCI_D0);
1434 		amdgpu_device_load_pci_state(pdev);
1435 		r = pci_enable_device(pdev);
1436 		if (r)
1437 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1438 		amdgpu_device_resume(dev, true);
1439 
1440 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1441 	} else {
1442 		pr_info("switched off\n");
1443 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1444 		amdgpu_device_suspend(dev, true);
1445 		amdgpu_device_cache_pci_state(pdev);
1446 		/* Shut down the device */
1447 		pci_disable_device(pdev);
1448 		pci_set_power_state(pdev, PCI_D3cold);
1449 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1450 	}
1451 }
1452 
1453 /**
1454  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1455  *
1456  * @pdev: pci dev pointer
1457  *
1458  * Callback for the switcheroo driver.  Check of the switcheroo
1459  * state can be changed.
1460  * Returns true if the state can be changed, false if not.
1461  */
1462 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1463 {
1464 	struct drm_device *dev = pci_get_drvdata(pdev);
1465 
1466 	/*
1467 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1468 	* locking inversion with the driver load path. And the access here is
1469 	* completely racy anyway. So don't bother with locking for now.
1470 	*/
1471 	return atomic_read(&dev->open_count) == 0;
1472 }
1473 
1474 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1475 	.set_gpu_state = amdgpu_switcheroo_set_state,
1476 	.reprobe = NULL,
1477 	.can_switch = amdgpu_switcheroo_can_switch,
1478 };
1479 
1480 /**
1481  * amdgpu_device_ip_set_clockgating_state - set the CG state
1482  *
1483  * @dev: amdgpu_device pointer
1484  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1485  * @state: clockgating state (gate or ungate)
1486  *
1487  * Sets the requested clockgating state for all instances of
1488  * the hardware IP specified.
1489  * Returns the error code from the last instance.
1490  */
1491 int amdgpu_device_ip_set_clockgating_state(void *dev,
1492 					   enum amd_ip_block_type block_type,
1493 					   enum amd_clockgating_state state)
1494 {
1495 	struct amdgpu_device *adev = dev;
1496 	int i, r = 0;
1497 
1498 	for (i = 0; i < adev->num_ip_blocks; i++) {
1499 		if (!adev->ip_blocks[i].status.valid)
1500 			continue;
1501 		if (adev->ip_blocks[i].version->type != block_type)
1502 			continue;
1503 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1504 			continue;
1505 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1506 			(void *)adev, state);
1507 		if (r)
1508 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1509 				  adev->ip_blocks[i].version->funcs->name, r);
1510 	}
1511 	return r;
1512 }
1513 
1514 /**
1515  * amdgpu_device_ip_set_powergating_state - set the PG state
1516  *
1517  * @dev: amdgpu_device pointer
1518  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1519  * @state: powergating state (gate or ungate)
1520  *
1521  * Sets the requested powergating state for all instances of
1522  * the hardware IP specified.
1523  * Returns the error code from the last instance.
1524  */
1525 int amdgpu_device_ip_set_powergating_state(void *dev,
1526 					   enum amd_ip_block_type block_type,
1527 					   enum amd_powergating_state state)
1528 {
1529 	struct amdgpu_device *adev = dev;
1530 	int i, r = 0;
1531 
1532 	for (i = 0; i < adev->num_ip_blocks; i++) {
1533 		if (!adev->ip_blocks[i].status.valid)
1534 			continue;
1535 		if (adev->ip_blocks[i].version->type != block_type)
1536 			continue;
1537 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1538 			continue;
1539 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1540 			(void *)adev, state);
1541 		if (r)
1542 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1543 				  adev->ip_blocks[i].version->funcs->name, r);
1544 	}
1545 	return r;
1546 }
1547 
1548 /**
1549  * amdgpu_device_ip_get_clockgating_state - get the CG state
1550  *
1551  * @adev: amdgpu_device pointer
1552  * @flags: clockgating feature flags
1553  *
1554  * Walks the list of IPs on the device and updates the clockgating
1555  * flags for each IP.
1556  * Updates @flags with the feature flags for each hardware IP where
1557  * clockgating is enabled.
1558  */
1559 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1560 					    u32 *flags)
1561 {
1562 	int i;
1563 
1564 	for (i = 0; i < adev->num_ip_blocks; i++) {
1565 		if (!adev->ip_blocks[i].status.valid)
1566 			continue;
1567 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1568 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1569 	}
1570 }
1571 
1572 /**
1573  * amdgpu_device_ip_wait_for_idle - wait for idle
1574  *
1575  * @adev: amdgpu_device pointer
1576  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1577  *
1578  * Waits for the request hardware IP to be idle.
1579  * Returns 0 for success or a negative error code on failure.
1580  */
1581 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1582 				   enum amd_ip_block_type block_type)
1583 {
1584 	int i, r;
1585 
1586 	for (i = 0; i < adev->num_ip_blocks; i++) {
1587 		if (!adev->ip_blocks[i].status.valid)
1588 			continue;
1589 		if (adev->ip_blocks[i].version->type == block_type) {
1590 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1591 			if (r)
1592 				return r;
1593 			break;
1594 		}
1595 	}
1596 	return 0;
1597 
1598 }
1599 
1600 /**
1601  * amdgpu_device_ip_is_idle - is the hardware IP idle
1602  *
1603  * @adev: amdgpu_device pointer
1604  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1605  *
1606  * Check if the hardware IP is idle or not.
1607  * Returns true if it the IP is idle, false if not.
1608  */
1609 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1610 			      enum amd_ip_block_type block_type)
1611 {
1612 	int i;
1613 
1614 	for (i = 0; i < adev->num_ip_blocks; i++) {
1615 		if (!adev->ip_blocks[i].status.valid)
1616 			continue;
1617 		if (adev->ip_blocks[i].version->type == block_type)
1618 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1619 	}
1620 	return true;
1621 
1622 }
1623 
1624 /**
1625  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1626  *
1627  * @adev: amdgpu_device pointer
1628  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1629  *
1630  * Returns a pointer to the hardware IP block structure
1631  * if it exists for the asic, otherwise NULL.
1632  */
1633 struct amdgpu_ip_block *
1634 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1635 			      enum amd_ip_block_type type)
1636 {
1637 	int i;
1638 
1639 	for (i = 0; i < adev->num_ip_blocks; i++)
1640 		if (adev->ip_blocks[i].version->type == type)
1641 			return &adev->ip_blocks[i];
1642 
1643 	return NULL;
1644 }
1645 
1646 /**
1647  * amdgpu_device_ip_block_version_cmp
1648  *
1649  * @adev: amdgpu_device pointer
1650  * @type: enum amd_ip_block_type
1651  * @major: major version
1652  * @minor: minor version
1653  *
1654  * return 0 if equal or greater
1655  * return 1 if smaller or the ip_block doesn't exist
1656  */
1657 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1658 				       enum amd_ip_block_type type,
1659 				       u32 major, u32 minor)
1660 {
1661 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1662 
1663 	if (ip_block && ((ip_block->version->major > major) ||
1664 			((ip_block->version->major == major) &&
1665 			(ip_block->version->minor >= minor))))
1666 		return 0;
1667 
1668 	return 1;
1669 }
1670 
1671 /**
1672  * amdgpu_device_ip_block_add
1673  *
1674  * @adev: amdgpu_device pointer
1675  * @ip_block_version: pointer to the IP to add
1676  *
1677  * Adds the IP block driver information to the collection of IPs
1678  * on the asic.
1679  */
1680 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1681 			       const struct amdgpu_ip_block_version *ip_block_version)
1682 {
1683 	if (!ip_block_version)
1684 		return -EINVAL;
1685 
1686 	switch (ip_block_version->type) {
1687 	case AMD_IP_BLOCK_TYPE_VCN:
1688 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1689 			return 0;
1690 		break;
1691 	case AMD_IP_BLOCK_TYPE_JPEG:
1692 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1693 			return 0;
1694 		break;
1695 	default:
1696 		break;
1697 	}
1698 
1699 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1700 		  ip_block_version->funcs->name);
1701 
1702 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1703 
1704 	return 0;
1705 }
1706 
1707 /**
1708  * amdgpu_device_enable_virtual_display - enable virtual display feature
1709  *
1710  * @adev: amdgpu_device pointer
1711  *
1712  * Enabled the virtual display feature if the user has enabled it via
1713  * the module parameter virtual_display.  This feature provides a virtual
1714  * display hardware on headless boards or in virtualized environments.
1715  * This function parses and validates the configuration string specified by
1716  * the user and configues the virtual display configuration (number of
1717  * virtual connectors, crtcs, etc.) specified.
1718  */
1719 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1720 {
1721 	adev->enable_virtual_display = false;
1722 
1723 	if (amdgpu_virtual_display) {
1724 		const char *pci_address_name = pci_name(adev->pdev);
1725 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1726 
1727 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1728 		pciaddstr_tmp = pciaddstr;
1729 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1730 			pciaddname = strsep(&pciaddname_tmp, ",");
1731 			if (!strcmp("all", pciaddname)
1732 			    || !strcmp(pci_address_name, pciaddname)) {
1733 				long num_crtc;
1734 				int res = -1;
1735 
1736 				adev->enable_virtual_display = true;
1737 
1738 				if (pciaddname_tmp)
1739 					res = kstrtol(pciaddname_tmp, 10,
1740 						      &num_crtc);
1741 
1742 				if (!res) {
1743 					if (num_crtc < 1)
1744 						num_crtc = 1;
1745 					if (num_crtc > 6)
1746 						num_crtc = 6;
1747 					adev->mode_info.num_crtc = num_crtc;
1748 				} else {
1749 					adev->mode_info.num_crtc = 1;
1750 				}
1751 				break;
1752 			}
1753 		}
1754 
1755 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1756 			 amdgpu_virtual_display, pci_address_name,
1757 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1758 
1759 		kfree(pciaddstr);
1760 	}
1761 }
1762 
1763 /**
1764  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1765  *
1766  * @adev: amdgpu_device pointer
1767  *
1768  * Parses the asic configuration parameters specified in the gpu info
1769  * firmware and makes them availale to the driver for use in configuring
1770  * the asic.
1771  * Returns 0 on success, -EINVAL on failure.
1772  */
1773 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1774 {
1775 	const char *chip_name;
1776 	char fw_name[40];
1777 	int err;
1778 	const struct gpu_info_firmware_header_v1_0 *hdr;
1779 
1780 	adev->firmware.gpu_info_fw = NULL;
1781 
1782 	if (adev->mman.discovery_bin) {
1783 		amdgpu_discovery_get_gfx_info(adev);
1784 
1785 		/*
1786 		 * FIXME: The bounding box is still needed by Navi12, so
1787 		 * temporarily read it from gpu_info firmware. Should be droped
1788 		 * when DAL no longer needs it.
1789 		 */
1790 		if (adev->asic_type != CHIP_NAVI12)
1791 			return 0;
1792 	}
1793 
1794 	switch (adev->asic_type) {
1795 #ifdef CONFIG_DRM_AMDGPU_SI
1796 	case CHIP_VERDE:
1797 	case CHIP_TAHITI:
1798 	case CHIP_PITCAIRN:
1799 	case CHIP_OLAND:
1800 	case CHIP_HAINAN:
1801 #endif
1802 #ifdef CONFIG_DRM_AMDGPU_CIK
1803 	case CHIP_BONAIRE:
1804 	case CHIP_HAWAII:
1805 	case CHIP_KAVERI:
1806 	case CHIP_KABINI:
1807 	case CHIP_MULLINS:
1808 #endif
1809 	case CHIP_TOPAZ:
1810 	case CHIP_TONGA:
1811 	case CHIP_FIJI:
1812 	case CHIP_POLARIS10:
1813 	case CHIP_POLARIS11:
1814 	case CHIP_POLARIS12:
1815 	case CHIP_VEGAM:
1816 	case CHIP_CARRIZO:
1817 	case CHIP_STONEY:
1818 	case CHIP_VEGA20:
1819 	case CHIP_ALDEBARAN:
1820 	case CHIP_SIENNA_CICHLID:
1821 	case CHIP_NAVY_FLOUNDER:
1822 	case CHIP_DIMGREY_CAVEFISH:
1823 	default:
1824 		return 0;
1825 	case CHIP_VEGA10:
1826 		chip_name = "vega10";
1827 		break;
1828 	case CHIP_VEGA12:
1829 		chip_name = "vega12";
1830 		break;
1831 	case CHIP_RAVEN:
1832 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1833 			chip_name = "raven2";
1834 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1835 			chip_name = "picasso";
1836 		else
1837 			chip_name = "raven";
1838 		break;
1839 	case CHIP_ARCTURUS:
1840 		chip_name = "arcturus";
1841 		break;
1842 	case CHIP_RENOIR:
1843 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
1844 			chip_name = "renoir";
1845 		else
1846 			chip_name = "green_sardine";
1847 		break;
1848 	case CHIP_NAVI10:
1849 		chip_name = "navi10";
1850 		break;
1851 	case CHIP_NAVI14:
1852 		chip_name = "navi14";
1853 		break;
1854 	case CHIP_NAVI12:
1855 		chip_name = "navi12";
1856 		break;
1857 	case CHIP_VANGOGH:
1858 		chip_name = "vangogh";
1859 		break;
1860 	}
1861 
1862 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1863 	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1864 	if (err) {
1865 		dev_err(adev->dev,
1866 			"Failed to load gpu_info firmware \"%s\"\n",
1867 			fw_name);
1868 		goto out;
1869 	}
1870 	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1871 	if (err) {
1872 		dev_err(adev->dev,
1873 			"Failed to validate gpu_info firmware \"%s\"\n",
1874 			fw_name);
1875 		goto out;
1876 	}
1877 
1878 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1879 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1880 
1881 	switch (hdr->version_major) {
1882 	case 1:
1883 	{
1884 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1885 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1886 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1887 
1888 		/*
1889 		 * Should be droped when DAL no longer needs it.
1890 		 */
1891 		if (adev->asic_type == CHIP_NAVI12)
1892 			goto parse_soc_bounding_box;
1893 
1894 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1895 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1896 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1897 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1898 		adev->gfx.config.max_texture_channel_caches =
1899 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1900 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1901 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1902 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1903 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1904 		adev->gfx.config.double_offchip_lds_buf =
1905 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1906 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1907 		adev->gfx.cu_info.max_waves_per_simd =
1908 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1909 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1910 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1911 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1912 		if (hdr->version_minor >= 1) {
1913 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1914 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1915 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1916 			adev->gfx.config.num_sc_per_sh =
1917 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1918 			adev->gfx.config.num_packer_per_sc =
1919 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1920 		}
1921 
1922 parse_soc_bounding_box:
1923 		/*
1924 		 * soc bounding box info is not integrated in disocovery table,
1925 		 * we always need to parse it from gpu info firmware if needed.
1926 		 */
1927 		if (hdr->version_minor == 2) {
1928 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1929 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1930 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1931 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1932 		}
1933 		break;
1934 	}
1935 	default:
1936 		dev_err(adev->dev,
1937 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1938 		err = -EINVAL;
1939 		goto out;
1940 	}
1941 out:
1942 	return err;
1943 }
1944 
1945 /**
1946  * amdgpu_device_ip_early_init - run early init for hardware IPs
1947  *
1948  * @adev: amdgpu_device pointer
1949  *
1950  * Early initialization pass for hardware IPs.  The hardware IPs that make
1951  * up each asic are discovered each IP's early_init callback is run.  This
1952  * is the first stage in initializing the asic.
1953  * Returns 0 on success, negative error code on failure.
1954  */
1955 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1956 {
1957 	int i, r;
1958 
1959 	amdgpu_device_enable_virtual_display(adev);
1960 
1961 	if (amdgpu_sriov_vf(adev)) {
1962 		r = amdgpu_virt_request_full_gpu(adev, true);
1963 		if (r)
1964 			return r;
1965 	}
1966 
1967 	switch (adev->asic_type) {
1968 #ifdef CONFIG_DRM_AMDGPU_SI
1969 	case CHIP_VERDE:
1970 	case CHIP_TAHITI:
1971 	case CHIP_PITCAIRN:
1972 	case CHIP_OLAND:
1973 	case CHIP_HAINAN:
1974 		adev->family = AMDGPU_FAMILY_SI;
1975 		r = si_set_ip_blocks(adev);
1976 		if (r)
1977 			return r;
1978 		break;
1979 #endif
1980 #ifdef CONFIG_DRM_AMDGPU_CIK
1981 	case CHIP_BONAIRE:
1982 	case CHIP_HAWAII:
1983 	case CHIP_KAVERI:
1984 	case CHIP_KABINI:
1985 	case CHIP_MULLINS:
1986 		if (adev->flags & AMD_IS_APU)
1987 			adev->family = AMDGPU_FAMILY_KV;
1988 		else
1989 			adev->family = AMDGPU_FAMILY_CI;
1990 
1991 		r = cik_set_ip_blocks(adev);
1992 		if (r)
1993 			return r;
1994 		break;
1995 #endif
1996 	case CHIP_TOPAZ:
1997 	case CHIP_TONGA:
1998 	case CHIP_FIJI:
1999 	case CHIP_POLARIS10:
2000 	case CHIP_POLARIS11:
2001 	case CHIP_POLARIS12:
2002 	case CHIP_VEGAM:
2003 	case CHIP_CARRIZO:
2004 	case CHIP_STONEY:
2005 		if (adev->flags & AMD_IS_APU)
2006 			adev->family = AMDGPU_FAMILY_CZ;
2007 		else
2008 			adev->family = AMDGPU_FAMILY_VI;
2009 
2010 		r = vi_set_ip_blocks(adev);
2011 		if (r)
2012 			return r;
2013 		break;
2014 	case CHIP_VEGA10:
2015 	case CHIP_VEGA12:
2016 	case CHIP_VEGA20:
2017 	case CHIP_RAVEN:
2018 	case CHIP_ARCTURUS:
2019 	case CHIP_RENOIR:
2020 	case CHIP_ALDEBARAN:
2021 		if (adev->flags & AMD_IS_APU)
2022 			adev->family = AMDGPU_FAMILY_RV;
2023 		else
2024 			adev->family = AMDGPU_FAMILY_AI;
2025 
2026 		r = soc15_set_ip_blocks(adev);
2027 		if (r)
2028 			return r;
2029 		break;
2030 	case  CHIP_NAVI10:
2031 	case  CHIP_NAVI14:
2032 	case  CHIP_NAVI12:
2033 	case  CHIP_SIENNA_CICHLID:
2034 	case  CHIP_NAVY_FLOUNDER:
2035 	case  CHIP_DIMGREY_CAVEFISH:
2036 	case CHIP_VANGOGH:
2037 		if (adev->asic_type == CHIP_VANGOGH)
2038 			adev->family = AMDGPU_FAMILY_VGH;
2039 		else
2040 			adev->family = AMDGPU_FAMILY_NV;
2041 
2042 		r = nv_set_ip_blocks(adev);
2043 		if (r)
2044 			return r;
2045 		break;
2046 	default:
2047 		/* FIXME: not supported yet */
2048 		return -EINVAL;
2049 	}
2050 
2051 	amdgpu_amdkfd_device_probe(adev);
2052 
2053 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2054 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2055 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2056 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2057 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2058 
2059 	for (i = 0; i < adev->num_ip_blocks; i++) {
2060 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2061 			DRM_ERROR("disabled ip block: %d <%s>\n",
2062 				  i, adev->ip_blocks[i].version->funcs->name);
2063 			adev->ip_blocks[i].status.valid = false;
2064 		} else {
2065 			if (adev->ip_blocks[i].version->funcs->early_init) {
2066 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2067 				if (r == -ENOENT) {
2068 					adev->ip_blocks[i].status.valid = false;
2069 				} else if (r) {
2070 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2071 						  adev->ip_blocks[i].version->funcs->name, r);
2072 					return r;
2073 				} else {
2074 					adev->ip_blocks[i].status.valid = true;
2075 				}
2076 			} else {
2077 				adev->ip_blocks[i].status.valid = true;
2078 			}
2079 		}
2080 		/* get the vbios after the asic_funcs are set up */
2081 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2082 			r = amdgpu_device_parse_gpu_info_fw(adev);
2083 			if (r)
2084 				return r;
2085 
2086 			/* Read BIOS */
2087 			if (!amdgpu_get_bios(adev))
2088 				return -EINVAL;
2089 
2090 			r = amdgpu_atombios_init(adev);
2091 			if (r) {
2092 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2093 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2094 				return r;
2095 			}
2096 
2097 			/*get pf2vf msg info at it's earliest time*/
2098 			if (amdgpu_sriov_vf(adev))
2099 				amdgpu_virt_init_data_exchange(adev);
2100 
2101 		}
2102 	}
2103 
2104 	adev->cg_flags &= amdgpu_cg_mask;
2105 	adev->pg_flags &= amdgpu_pg_mask;
2106 
2107 	return 0;
2108 }
2109 
2110 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2111 {
2112 	int i, r;
2113 
2114 	for (i = 0; i < adev->num_ip_blocks; i++) {
2115 		if (!adev->ip_blocks[i].status.sw)
2116 			continue;
2117 		if (adev->ip_blocks[i].status.hw)
2118 			continue;
2119 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2120 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2121 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2122 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2123 			if (r) {
2124 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2125 					  adev->ip_blocks[i].version->funcs->name, r);
2126 				return r;
2127 			}
2128 			adev->ip_blocks[i].status.hw = true;
2129 		}
2130 	}
2131 
2132 	return 0;
2133 }
2134 
2135 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2136 {
2137 	int i, r;
2138 
2139 	for (i = 0; i < adev->num_ip_blocks; i++) {
2140 		if (!adev->ip_blocks[i].status.sw)
2141 			continue;
2142 		if (adev->ip_blocks[i].status.hw)
2143 			continue;
2144 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2145 		if (r) {
2146 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2147 				  adev->ip_blocks[i].version->funcs->name, r);
2148 			return r;
2149 		}
2150 		adev->ip_blocks[i].status.hw = true;
2151 	}
2152 
2153 	return 0;
2154 }
2155 
2156 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2157 {
2158 	int r = 0;
2159 	int i;
2160 	uint32_t smu_version;
2161 
2162 	if (adev->asic_type >= CHIP_VEGA10) {
2163 		for (i = 0; i < adev->num_ip_blocks; i++) {
2164 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2165 				continue;
2166 
2167 			if (!adev->ip_blocks[i].status.sw)
2168 				continue;
2169 
2170 			/* no need to do the fw loading again if already done*/
2171 			if (adev->ip_blocks[i].status.hw == true)
2172 				break;
2173 
2174 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2175 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2176 				if (r) {
2177 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2178 							  adev->ip_blocks[i].version->funcs->name, r);
2179 					return r;
2180 				}
2181 			} else {
2182 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2183 				if (r) {
2184 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2185 							  adev->ip_blocks[i].version->funcs->name, r);
2186 					return r;
2187 				}
2188 			}
2189 
2190 			adev->ip_blocks[i].status.hw = true;
2191 			break;
2192 		}
2193 	}
2194 
2195 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2196 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2197 
2198 	return r;
2199 }
2200 
2201 /**
2202  * amdgpu_device_ip_init - run init for hardware IPs
2203  *
2204  * @adev: amdgpu_device pointer
2205  *
2206  * Main initialization pass for hardware IPs.  The list of all the hardware
2207  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2208  * are run.  sw_init initializes the software state associated with each IP
2209  * and hw_init initializes the hardware associated with each IP.
2210  * Returns 0 on success, negative error code on failure.
2211  */
2212 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2213 {
2214 	int i, r;
2215 
2216 	r = amdgpu_ras_init(adev);
2217 	if (r)
2218 		return r;
2219 
2220 	for (i = 0; i < adev->num_ip_blocks; i++) {
2221 		if (!adev->ip_blocks[i].status.valid)
2222 			continue;
2223 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2224 		if (r) {
2225 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2226 				  adev->ip_blocks[i].version->funcs->name, r);
2227 			goto init_failed;
2228 		}
2229 		adev->ip_blocks[i].status.sw = true;
2230 
2231 		/* need to do gmc hw init early so we can allocate gpu mem */
2232 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2233 			r = amdgpu_device_vram_scratch_init(adev);
2234 			if (r) {
2235 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
2236 				goto init_failed;
2237 			}
2238 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2239 			if (r) {
2240 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2241 				goto init_failed;
2242 			}
2243 			r = amdgpu_device_wb_init(adev);
2244 			if (r) {
2245 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2246 				goto init_failed;
2247 			}
2248 			adev->ip_blocks[i].status.hw = true;
2249 
2250 			/* right after GMC hw init, we create CSA */
2251 			if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
2252 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2253 								AMDGPU_GEM_DOMAIN_VRAM,
2254 								AMDGPU_CSA_SIZE);
2255 				if (r) {
2256 					DRM_ERROR("allocate CSA failed %d\n", r);
2257 					goto init_failed;
2258 				}
2259 			}
2260 		}
2261 	}
2262 
2263 	if (amdgpu_sriov_vf(adev))
2264 		amdgpu_virt_init_data_exchange(adev);
2265 
2266 	r = amdgpu_ib_pool_init(adev);
2267 	if (r) {
2268 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2269 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2270 		goto init_failed;
2271 	}
2272 
2273 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2274 	if (r)
2275 		goto init_failed;
2276 
2277 	r = amdgpu_device_ip_hw_init_phase1(adev);
2278 	if (r)
2279 		goto init_failed;
2280 
2281 	r = amdgpu_device_fw_loading(adev);
2282 	if (r)
2283 		goto init_failed;
2284 
2285 	r = amdgpu_device_ip_hw_init_phase2(adev);
2286 	if (r)
2287 		goto init_failed;
2288 
2289 	/*
2290 	 * retired pages will be loaded from eeprom and reserved here,
2291 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2292 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2293 	 * for I2C communication which only true at this point.
2294 	 *
2295 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2296 	 * failure from bad gpu situation and stop amdgpu init process
2297 	 * accordingly. For other failed cases, it will still release all
2298 	 * the resource and print error message, rather than returning one
2299 	 * negative value to upper level.
2300 	 *
2301 	 * Note: theoretically, this should be called before all vram allocations
2302 	 * to protect retired page from abusing
2303 	 */
2304 	r = amdgpu_ras_recovery_init(adev);
2305 	if (r)
2306 		goto init_failed;
2307 
2308 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2309 		amdgpu_xgmi_add_device(adev);
2310 
2311 	/* Don't init kfd if whole hive need to be reset during init */
2312 	if (!adev->gmc.xgmi.pending_reset)
2313 		amdgpu_amdkfd_device_init(adev);
2314 
2315 	amdgpu_fru_get_product_info(adev);
2316 
2317 init_failed:
2318 	if (amdgpu_sriov_vf(adev))
2319 		amdgpu_virt_release_full_gpu(adev, true);
2320 
2321 	return r;
2322 }
2323 
2324 /**
2325  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2326  *
2327  * @adev: amdgpu_device pointer
2328  *
2329  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2330  * this function before a GPU reset.  If the value is retained after a
2331  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2332  */
2333 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2334 {
2335 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2336 }
2337 
2338 /**
2339  * amdgpu_device_check_vram_lost - check if vram is valid
2340  *
2341  * @adev: amdgpu_device pointer
2342  *
2343  * Checks the reset magic value written to the gart pointer in VRAM.
2344  * The driver calls this after a GPU reset to see if the contents of
2345  * VRAM is lost or now.
2346  * returns true if vram is lost, false if not.
2347  */
2348 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2349 {
2350 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2351 			AMDGPU_RESET_MAGIC_NUM))
2352 		return true;
2353 
2354 	if (!amdgpu_in_reset(adev))
2355 		return false;
2356 
2357 	/*
2358 	 * For all ASICs with baco/mode1 reset, the VRAM is
2359 	 * always assumed to be lost.
2360 	 */
2361 	switch (amdgpu_asic_reset_method(adev)) {
2362 	case AMD_RESET_METHOD_BACO:
2363 	case AMD_RESET_METHOD_MODE1:
2364 		return true;
2365 	default:
2366 		return false;
2367 	}
2368 }
2369 
2370 /**
2371  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2372  *
2373  * @adev: amdgpu_device pointer
2374  * @state: clockgating state (gate or ungate)
2375  *
2376  * The list of all the hardware IPs that make up the asic is walked and the
2377  * set_clockgating_state callbacks are run.
2378  * Late initialization pass enabling clockgating for hardware IPs.
2379  * Fini or suspend, pass disabling clockgating for hardware IPs.
2380  * Returns 0 on success, negative error code on failure.
2381  */
2382 
2383 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2384 			       enum amd_clockgating_state state)
2385 {
2386 	int i, j, r;
2387 
2388 	if (amdgpu_emu_mode == 1)
2389 		return 0;
2390 
2391 	for (j = 0; j < adev->num_ip_blocks; j++) {
2392 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2393 		if (!adev->ip_blocks[i].status.late_initialized)
2394 			continue;
2395 		/* skip CG for GFX on S0ix */
2396 		if (adev->in_s0ix &&
2397 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2398 			continue;
2399 		/* skip CG for VCE/UVD, it's handled specially */
2400 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2401 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2402 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2403 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2404 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2405 			/* enable clockgating to save power */
2406 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2407 										     state);
2408 			if (r) {
2409 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2410 					  adev->ip_blocks[i].version->funcs->name, r);
2411 				return r;
2412 			}
2413 		}
2414 	}
2415 
2416 	return 0;
2417 }
2418 
2419 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2420 			       enum amd_powergating_state state)
2421 {
2422 	int i, j, r;
2423 
2424 	if (amdgpu_emu_mode == 1)
2425 		return 0;
2426 
2427 	for (j = 0; j < adev->num_ip_blocks; j++) {
2428 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2429 		if (!adev->ip_blocks[i].status.late_initialized)
2430 			continue;
2431 		/* skip PG for GFX on S0ix */
2432 		if (adev->in_s0ix &&
2433 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2434 			continue;
2435 		/* skip CG for VCE/UVD, it's handled specially */
2436 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2437 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2438 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2439 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2440 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2441 			/* enable powergating to save power */
2442 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2443 											state);
2444 			if (r) {
2445 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2446 					  adev->ip_blocks[i].version->funcs->name, r);
2447 				return r;
2448 			}
2449 		}
2450 	}
2451 	return 0;
2452 }
2453 
2454 static int amdgpu_device_enable_mgpu_fan_boost(void)
2455 {
2456 	struct amdgpu_gpu_instance *gpu_ins;
2457 	struct amdgpu_device *adev;
2458 	int i, ret = 0;
2459 
2460 	mutex_lock(&mgpu_info.mutex);
2461 
2462 	/*
2463 	 * MGPU fan boost feature should be enabled
2464 	 * only when there are two or more dGPUs in
2465 	 * the system
2466 	 */
2467 	if (mgpu_info.num_dgpu < 2)
2468 		goto out;
2469 
2470 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2471 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2472 		adev = gpu_ins->adev;
2473 		if (!(adev->flags & AMD_IS_APU) &&
2474 		    !gpu_ins->mgpu_fan_enabled) {
2475 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2476 			if (ret)
2477 				break;
2478 
2479 			gpu_ins->mgpu_fan_enabled = 1;
2480 		}
2481 	}
2482 
2483 out:
2484 	mutex_unlock(&mgpu_info.mutex);
2485 
2486 	return ret;
2487 }
2488 
2489 /**
2490  * amdgpu_device_ip_late_init - run late init for hardware IPs
2491  *
2492  * @adev: amdgpu_device pointer
2493  *
2494  * Late initialization pass for hardware IPs.  The list of all the hardware
2495  * IPs that make up the asic is walked and the late_init callbacks are run.
2496  * late_init covers any special initialization that an IP requires
2497  * after all of the have been initialized or something that needs to happen
2498  * late in the init process.
2499  * Returns 0 on success, negative error code on failure.
2500  */
2501 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2502 {
2503 	struct amdgpu_gpu_instance *gpu_instance;
2504 	int i = 0, r;
2505 
2506 	for (i = 0; i < adev->num_ip_blocks; i++) {
2507 		if (!adev->ip_blocks[i].status.hw)
2508 			continue;
2509 		if (adev->ip_blocks[i].version->funcs->late_init) {
2510 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2511 			if (r) {
2512 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
2513 					  adev->ip_blocks[i].version->funcs->name, r);
2514 				return r;
2515 			}
2516 		}
2517 		adev->ip_blocks[i].status.late_initialized = true;
2518 	}
2519 
2520 	amdgpu_ras_set_error_query_ready(adev, true);
2521 
2522 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2523 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2524 
2525 	amdgpu_device_fill_reset_magic(adev);
2526 
2527 	r = amdgpu_device_enable_mgpu_fan_boost();
2528 	if (r)
2529 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2530 
2531 	/* For XGMI + passthrough configuration on arcturus, enable light SBR */
2532 	if (adev->asic_type == CHIP_ARCTURUS &&
2533 	    amdgpu_passthrough(adev) &&
2534 	    adev->gmc.xgmi.num_physical_nodes > 1)
2535 		smu_set_light_sbr(&adev->smu, true);
2536 
2537 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2538 		mutex_lock(&mgpu_info.mutex);
2539 
2540 		/*
2541 		 * Reset device p-state to low as this was booted with high.
2542 		 *
2543 		 * This should be performed only after all devices from the same
2544 		 * hive get initialized.
2545 		 *
2546 		 * However, it's unknown how many device in the hive in advance.
2547 		 * As this is counted one by one during devices initializations.
2548 		 *
2549 		 * So, we wait for all XGMI interlinked devices initialized.
2550 		 * This may bring some delays as those devices may come from
2551 		 * different hives. But that should be OK.
2552 		 */
2553 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2554 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2555 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2556 				if (gpu_instance->adev->flags & AMD_IS_APU)
2557 					continue;
2558 
2559 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2560 						AMDGPU_XGMI_PSTATE_MIN);
2561 				if (r) {
2562 					DRM_ERROR("pstate setting failed (%d).\n", r);
2563 					break;
2564 				}
2565 			}
2566 		}
2567 
2568 		mutex_unlock(&mgpu_info.mutex);
2569 	}
2570 
2571 	return 0;
2572 }
2573 
2574 /**
2575  * amdgpu_device_ip_fini - run fini for hardware IPs
2576  *
2577  * @adev: amdgpu_device pointer
2578  *
2579  * Main teardown pass for hardware IPs.  The list of all the hardware
2580  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2581  * are run.  hw_fini tears down the hardware associated with each IP
2582  * and sw_fini tears down any software state associated with each IP.
2583  * Returns 0 on success, negative error code on failure.
2584  */
2585 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2586 {
2587 	int i, r;
2588 
2589 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2590 		amdgpu_virt_release_ras_err_handler_data(adev);
2591 
2592 	amdgpu_ras_pre_fini(adev);
2593 
2594 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2595 		amdgpu_xgmi_remove_device(adev);
2596 
2597 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2598 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2599 
2600 	amdgpu_amdkfd_device_fini(adev);
2601 
2602 	/* need to disable SMC first */
2603 	for (i = 0; i < adev->num_ip_blocks; i++) {
2604 		if (!adev->ip_blocks[i].status.hw)
2605 			continue;
2606 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2607 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2608 			/* XXX handle errors */
2609 			if (r) {
2610 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2611 					  adev->ip_blocks[i].version->funcs->name, r);
2612 			}
2613 			adev->ip_blocks[i].status.hw = false;
2614 			break;
2615 		}
2616 	}
2617 
2618 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2619 		if (!adev->ip_blocks[i].status.hw)
2620 			continue;
2621 
2622 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2623 		/* XXX handle errors */
2624 		if (r) {
2625 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2626 				  adev->ip_blocks[i].version->funcs->name, r);
2627 		}
2628 
2629 		adev->ip_blocks[i].status.hw = false;
2630 	}
2631 
2632 
2633 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2634 		if (!adev->ip_blocks[i].status.sw)
2635 			continue;
2636 
2637 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2638 			amdgpu_ucode_free_bo(adev);
2639 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2640 			amdgpu_device_wb_fini(adev);
2641 			amdgpu_device_vram_scratch_fini(adev);
2642 			amdgpu_ib_pool_fini(adev);
2643 		}
2644 
2645 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2646 		/* XXX handle errors */
2647 		if (r) {
2648 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2649 				  adev->ip_blocks[i].version->funcs->name, r);
2650 		}
2651 		adev->ip_blocks[i].status.sw = false;
2652 		adev->ip_blocks[i].status.valid = false;
2653 	}
2654 
2655 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2656 		if (!adev->ip_blocks[i].status.late_initialized)
2657 			continue;
2658 		if (adev->ip_blocks[i].version->funcs->late_fini)
2659 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2660 		adev->ip_blocks[i].status.late_initialized = false;
2661 	}
2662 
2663 	amdgpu_ras_fini(adev);
2664 
2665 	if (amdgpu_sriov_vf(adev))
2666 		if (amdgpu_virt_release_full_gpu(adev, false))
2667 			DRM_ERROR("failed to release exclusive mode on fini\n");
2668 
2669 	return 0;
2670 }
2671 
2672 /**
2673  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2674  *
2675  * @work: work_struct.
2676  */
2677 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2678 {
2679 	struct amdgpu_device *adev =
2680 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2681 	int r;
2682 
2683 	r = amdgpu_ib_ring_tests(adev);
2684 	if (r)
2685 		DRM_ERROR("ib ring test failed (%d).\n", r);
2686 }
2687 
2688 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2689 {
2690 	struct amdgpu_device *adev =
2691 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2692 
2693 	mutex_lock(&adev->gfx.gfx_off_mutex);
2694 	if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2695 		if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2696 			adev->gfx.gfx_off_state = true;
2697 	}
2698 	mutex_unlock(&adev->gfx.gfx_off_mutex);
2699 }
2700 
2701 /**
2702  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2703  *
2704  * @adev: amdgpu_device pointer
2705  *
2706  * Main suspend function for hardware IPs.  The list of all the hardware
2707  * IPs that make up the asic is walked, clockgating is disabled and the
2708  * suspend callbacks are run.  suspend puts the hardware and software state
2709  * in each IP into a state suitable for suspend.
2710  * Returns 0 on success, negative error code on failure.
2711  */
2712 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2713 {
2714 	int i, r;
2715 
2716 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2717 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2718 
2719 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2720 		if (!adev->ip_blocks[i].status.valid)
2721 			continue;
2722 
2723 		/* displays are handled separately */
2724 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2725 			continue;
2726 
2727 		/* XXX handle errors */
2728 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2729 		/* XXX handle errors */
2730 		if (r) {
2731 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2732 				  adev->ip_blocks[i].version->funcs->name, r);
2733 			return r;
2734 		}
2735 
2736 		adev->ip_blocks[i].status.hw = false;
2737 	}
2738 
2739 	return 0;
2740 }
2741 
2742 /**
2743  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2744  *
2745  * @adev: amdgpu_device pointer
2746  *
2747  * Main suspend function for hardware IPs.  The list of all the hardware
2748  * IPs that make up the asic is walked, clockgating is disabled and the
2749  * suspend callbacks are run.  suspend puts the hardware and software state
2750  * in each IP into a state suitable for suspend.
2751  * Returns 0 on success, negative error code on failure.
2752  */
2753 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2754 {
2755 	int i, r;
2756 
2757 	if (adev->in_s0ix)
2758 		amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
2759 
2760 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2761 		if (!adev->ip_blocks[i].status.valid)
2762 			continue;
2763 		/* displays are handled in phase1 */
2764 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2765 			continue;
2766 		/* PSP lost connection when err_event_athub occurs */
2767 		if (amdgpu_ras_intr_triggered() &&
2768 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2769 			adev->ip_blocks[i].status.hw = false;
2770 			continue;
2771 		}
2772 
2773 		/* skip unnecessary suspend if we do not initialize them yet */
2774 		if (adev->gmc.xgmi.pending_reset &&
2775 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2776 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
2777 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2778 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
2779 			adev->ip_blocks[i].status.hw = false;
2780 			continue;
2781 		}
2782 
2783 		/* skip suspend of gfx and psp for S0ix
2784 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
2785 		 * like at runtime. PSP is also part of the always on hardware
2786 		 * so no need to suspend it.
2787 		 */
2788 		if (adev->in_s0ix &&
2789 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
2790 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
2791 			continue;
2792 
2793 		/* XXX handle errors */
2794 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2795 		/* XXX handle errors */
2796 		if (r) {
2797 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2798 				  adev->ip_blocks[i].version->funcs->name, r);
2799 		}
2800 		adev->ip_blocks[i].status.hw = false;
2801 		/* handle putting the SMC in the appropriate state */
2802 		if(!amdgpu_sriov_vf(adev)){
2803 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2804 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2805 				if (r) {
2806 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2807 							adev->mp1_state, r);
2808 					return r;
2809 				}
2810 			}
2811 		}
2812 	}
2813 
2814 	return 0;
2815 }
2816 
2817 /**
2818  * amdgpu_device_ip_suspend - run suspend for hardware IPs
2819  *
2820  * @adev: amdgpu_device pointer
2821  *
2822  * Main suspend function for hardware IPs.  The list of all the hardware
2823  * IPs that make up the asic is walked, clockgating is disabled and the
2824  * suspend callbacks are run.  suspend puts the hardware and software state
2825  * in each IP into a state suitable for suspend.
2826  * Returns 0 on success, negative error code on failure.
2827  */
2828 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2829 {
2830 	int r;
2831 
2832 	if (amdgpu_sriov_vf(adev)) {
2833 		amdgpu_virt_fini_data_exchange(adev);
2834 		amdgpu_virt_request_full_gpu(adev, false);
2835 	}
2836 
2837 	r = amdgpu_device_ip_suspend_phase1(adev);
2838 	if (r)
2839 		return r;
2840 	r = amdgpu_device_ip_suspend_phase2(adev);
2841 
2842 	if (amdgpu_sriov_vf(adev))
2843 		amdgpu_virt_release_full_gpu(adev, false);
2844 
2845 	return r;
2846 }
2847 
2848 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2849 {
2850 	int i, r;
2851 
2852 	static enum amd_ip_block_type ip_order[] = {
2853 		AMD_IP_BLOCK_TYPE_GMC,
2854 		AMD_IP_BLOCK_TYPE_COMMON,
2855 		AMD_IP_BLOCK_TYPE_PSP,
2856 		AMD_IP_BLOCK_TYPE_IH,
2857 	};
2858 
2859 	for (i = 0; i < adev->num_ip_blocks; i++) {
2860 		int j;
2861 		struct amdgpu_ip_block *block;
2862 
2863 		block = &adev->ip_blocks[i];
2864 		block->status.hw = false;
2865 
2866 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2867 
2868 			if (block->version->type != ip_order[j] ||
2869 				!block->status.valid)
2870 				continue;
2871 
2872 			r = block->version->funcs->hw_init(adev);
2873 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2874 			if (r)
2875 				return r;
2876 			block->status.hw = true;
2877 		}
2878 	}
2879 
2880 	return 0;
2881 }
2882 
2883 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2884 {
2885 	int i, r;
2886 
2887 	static enum amd_ip_block_type ip_order[] = {
2888 		AMD_IP_BLOCK_TYPE_SMC,
2889 		AMD_IP_BLOCK_TYPE_DCE,
2890 		AMD_IP_BLOCK_TYPE_GFX,
2891 		AMD_IP_BLOCK_TYPE_SDMA,
2892 		AMD_IP_BLOCK_TYPE_UVD,
2893 		AMD_IP_BLOCK_TYPE_VCE,
2894 		AMD_IP_BLOCK_TYPE_VCN
2895 	};
2896 
2897 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2898 		int j;
2899 		struct amdgpu_ip_block *block;
2900 
2901 		for (j = 0; j < adev->num_ip_blocks; j++) {
2902 			block = &adev->ip_blocks[j];
2903 
2904 			if (block->version->type != ip_order[i] ||
2905 				!block->status.valid ||
2906 				block->status.hw)
2907 				continue;
2908 
2909 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2910 				r = block->version->funcs->resume(adev);
2911 			else
2912 				r = block->version->funcs->hw_init(adev);
2913 
2914 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2915 			if (r)
2916 				return r;
2917 			block->status.hw = true;
2918 		}
2919 	}
2920 
2921 	return 0;
2922 }
2923 
2924 /**
2925  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2926  *
2927  * @adev: amdgpu_device pointer
2928  *
2929  * First resume function for hardware IPs.  The list of all the hardware
2930  * IPs that make up the asic is walked and the resume callbacks are run for
2931  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
2932  * after a suspend and updates the software state as necessary.  This
2933  * function is also used for restoring the GPU after a GPU reset.
2934  * Returns 0 on success, negative error code on failure.
2935  */
2936 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
2937 {
2938 	int i, r;
2939 
2940 	for (i = 0; i < adev->num_ip_blocks; i++) {
2941 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2942 			continue;
2943 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2944 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2945 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2946 
2947 			r = adev->ip_blocks[i].version->funcs->resume(adev);
2948 			if (r) {
2949 				DRM_ERROR("resume of IP block <%s> failed %d\n",
2950 					  adev->ip_blocks[i].version->funcs->name, r);
2951 				return r;
2952 			}
2953 			adev->ip_blocks[i].status.hw = true;
2954 		}
2955 	}
2956 
2957 	return 0;
2958 }
2959 
2960 /**
2961  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2962  *
2963  * @adev: amdgpu_device pointer
2964  *
2965  * First resume function for hardware IPs.  The list of all the hardware
2966  * IPs that make up the asic is walked and the resume callbacks are run for
2967  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
2968  * functional state after a suspend and updates the software state as
2969  * necessary.  This function is also used for restoring the GPU after a GPU
2970  * reset.
2971  * Returns 0 on success, negative error code on failure.
2972  */
2973 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
2974 {
2975 	int i, r;
2976 
2977 	for (i = 0; i < adev->num_ip_blocks; i++) {
2978 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2979 			continue;
2980 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2981 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2982 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2983 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
2984 			continue;
2985 		r = adev->ip_blocks[i].version->funcs->resume(adev);
2986 		if (r) {
2987 			DRM_ERROR("resume of IP block <%s> failed %d\n",
2988 				  adev->ip_blocks[i].version->funcs->name, r);
2989 			return r;
2990 		}
2991 		adev->ip_blocks[i].status.hw = true;
2992 	}
2993 
2994 	return 0;
2995 }
2996 
2997 /**
2998  * amdgpu_device_ip_resume - run resume for hardware IPs
2999  *
3000  * @adev: amdgpu_device pointer
3001  *
3002  * Main resume function for hardware IPs.  The hardware IPs
3003  * are split into two resume functions because they are
3004  * are also used in in recovering from a GPU reset and some additional
3005  * steps need to be take between them.  In this case (S3/S4) they are
3006  * run sequentially.
3007  * Returns 0 on success, negative error code on failure.
3008  */
3009 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3010 {
3011 	int r;
3012 
3013 	r = amdgpu_device_ip_resume_phase1(adev);
3014 	if (r)
3015 		return r;
3016 
3017 	r = amdgpu_device_fw_loading(adev);
3018 	if (r)
3019 		return r;
3020 
3021 	r = amdgpu_device_ip_resume_phase2(adev);
3022 
3023 	return r;
3024 }
3025 
3026 /**
3027  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3028  *
3029  * @adev: amdgpu_device pointer
3030  *
3031  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3032  */
3033 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3034 {
3035 	if (amdgpu_sriov_vf(adev)) {
3036 		if (adev->is_atom_fw) {
3037 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3038 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3039 		} else {
3040 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3041 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3042 		}
3043 
3044 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3045 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3046 	}
3047 }
3048 
3049 /**
3050  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3051  *
3052  * @asic_type: AMD asic type
3053  *
3054  * Check if there is DC (new modesetting infrastructre) support for an asic.
3055  * returns true if DC has support, false if not.
3056  */
3057 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3058 {
3059 	switch (asic_type) {
3060 #if defined(CONFIG_DRM_AMD_DC)
3061 #if defined(CONFIG_DRM_AMD_DC_SI)
3062 	case CHIP_TAHITI:
3063 	case CHIP_PITCAIRN:
3064 	case CHIP_VERDE:
3065 	case CHIP_OLAND:
3066 #endif
3067 	case CHIP_BONAIRE:
3068 	case CHIP_KAVERI:
3069 	case CHIP_KABINI:
3070 	case CHIP_MULLINS:
3071 		/*
3072 		 * We have systems in the wild with these ASICs that require
3073 		 * LVDS and VGA support which is not supported with DC.
3074 		 *
3075 		 * Fallback to the non-DC driver here by default so as not to
3076 		 * cause regressions.
3077 		 */
3078 		return amdgpu_dc > 0;
3079 	case CHIP_HAWAII:
3080 	case CHIP_CARRIZO:
3081 	case CHIP_STONEY:
3082 	case CHIP_POLARIS10:
3083 	case CHIP_POLARIS11:
3084 	case CHIP_POLARIS12:
3085 	case CHIP_VEGAM:
3086 	case CHIP_TONGA:
3087 	case CHIP_FIJI:
3088 	case CHIP_VEGA10:
3089 	case CHIP_VEGA12:
3090 	case CHIP_VEGA20:
3091 #if defined(CONFIG_DRM_AMD_DC_DCN)
3092 	case CHIP_RAVEN:
3093 	case CHIP_NAVI10:
3094 	case CHIP_NAVI14:
3095 	case CHIP_NAVI12:
3096 	case CHIP_RENOIR:
3097 	case CHIP_SIENNA_CICHLID:
3098 	case CHIP_NAVY_FLOUNDER:
3099 	case CHIP_DIMGREY_CAVEFISH:
3100 	case CHIP_VANGOGH:
3101 #endif
3102 		return amdgpu_dc != 0;
3103 #endif
3104 	default:
3105 		if (amdgpu_dc > 0)
3106 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
3107 					 "but isn't supported by ASIC, ignoring\n");
3108 		return false;
3109 	}
3110 }
3111 
3112 /**
3113  * amdgpu_device_has_dc_support - check if dc is supported
3114  *
3115  * @adev: amdgpu_device pointer
3116  *
3117  * Returns true for supported, false for not supported
3118  */
3119 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3120 {
3121 	if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
3122 		return false;
3123 
3124 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3125 }
3126 
3127 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3128 {
3129 	struct amdgpu_device *adev =
3130 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3131 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3132 
3133 	/* It's a bug to not have a hive within this function */
3134 	if (WARN_ON(!hive))
3135 		return;
3136 
3137 	/*
3138 	 * Use task barrier to synchronize all xgmi reset works across the
3139 	 * hive. task_barrier_enter and task_barrier_exit will block
3140 	 * until all the threads running the xgmi reset works reach
3141 	 * those points. task_barrier_full will do both blocks.
3142 	 */
3143 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3144 
3145 		task_barrier_enter(&hive->tb);
3146 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3147 
3148 		if (adev->asic_reset_res)
3149 			goto fail;
3150 
3151 		task_barrier_exit(&hive->tb);
3152 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3153 
3154 		if (adev->asic_reset_res)
3155 			goto fail;
3156 
3157 		if (adev->mmhub.ras_funcs &&
3158 		    adev->mmhub.ras_funcs->reset_ras_error_count)
3159 			adev->mmhub.ras_funcs->reset_ras_error_count(adev);
3160 	} else {
3161 
3162 		task_barrier_full(&hive->tb);
3163 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3164 	}
3165 
3166 fail:
3167 	if (adev->asic_reset_res)
3168 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3169 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3170 	amdgpu_put_xgmi_hive(hive);
3171 }
3172 
3173 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3174 {
3175 	char *input = amdgpu_lockup_timeout;
3176 	char *timeout_setting = NULL;
3177 	int index = 0;
3178 	long timeout;
3179 	int ret = 0;
3180 
3181 	/*
3182 	 * By default timeout for non compute jobs is 10000
3183 	 * and 60000 for compute jobs.
3184 	 * In SR-IOV or passthrough mode, timeout for compute
3185 	 * jobs are 60000 by default.
3186 	 */
3187 	adev->gfx_timeout = msecs_to_jiffies(10000);
3188 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3189 	if (amdgpu_sriov_vf(adev))
3190 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3191 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3192 	else
3193 		adev->compute_timeout =  msecs_to_jiffies(60000);
3194 
3195 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3196 		while ((timeout_setting = strsep(&input, ",")) &&
3197 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3198 			ret = kstrtol(timeout_setting, 0, &timeout);
3199 			if (ret)
3200 				return ret;
3201 
3202 			if (timeout == 0) {
3203 				index++;
3204 				continue;
3205 			} else if (timeout < 0) {
3206 				timeout = MAX_SCHEDULE_TIMEOUT;
3207 			} else {
3208 				timeout = msecs_to_jiffies(timeout);
3209 			}
3210 
3211 			switch (index++) {
3212 			case 0:
3213 				adev->gfx_timeout = timeout;
3214 				break;
3215 			case 1:
3216 				adev->compute_timeout = timeout;
3217 				break;
3218 			case 2:
3219 				adev->sdma_timeout = timeout;
3220 				break;
3221 			case 3:
3222 				adev->video_timeout = timeout;
3223 				break;
3224 			default:
3225 				break;
3226 			}
3227 		}
3228 		/*
3229 		 * There is only one value specified and
3230 		 * it should apply to all non-compute jobs.
3231 		 */
3232 		if (index == 1) {
3233 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3234 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3235 				adev->compute_timeout = adev->gfx_timeout;
3236 		}
3237 	}
3238 
3239 	return ret;
3240 }
3241 
3242 static const struct attribute *amdgpu_dev_attributes[] = {
3243 	&dev_attr_product_name.attr,
3244 	&dev_attr_product_number.attr,
3245 	&dev_attr_serial_number.attr,
3246 	&dev_attr_pcie_replay_count.attr,
3247 	NULL
3248 };
3249 
3250 
3251 /**
3252  * amdgpu_device_init - initialize the driver
3253  *
3254  * @adev: amdgpu_device pointer
3255  * @flags: driver flags
3256  *
3257  * Initializes the driver info and hw (all asics).
3258  * Returns 0 for success or an error on failure.
3259  * Called at driver startup.
3260  */
3261 int amdgpu_device_init(struct amdgpu_device *adev,
3262 		       uint32_t flags)
3263 {
3264 	struct drm_device *ddev = adev_to_drm(adev);
3265 	struct pci_dev *pdev = adev->pdev;
3266 	int r, i;
3267 	bool px = false;
3268 	u32 max_MBps;
3269 
3270 	adev->shutdown = false;
3271 	adev->flags = flags;
3272 
3273 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3274 		adev->asic_type = amdgpu_force_asic_type;
3275 	else
3276 		adev->asic_type = flags & AMD_ASIC_MASK;
3277 
3278 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3279 	if (amdgpu_emu_mode == 1)
3280 		adev->usec_timeout *= 10;
3281 	adev->gmc.gart_size = 512 * 1024 * 1024;
3282 	adev->accel_working = false;
3283 	adev->num_rings = 0;
3284 	adev->mman.buffer_funcs = NULL;
3285 	adev->mman.buffer_funcs_ring = NULL;
3286 	adev->vm_manager.vm_pte_funcs = NULL;
3287 	adev->vm_manager.vm_pte_num_scheds = 0;
3288 	adev->gmc.gmc_funcs = NULL;
3289 	adev->harvest_ip_mask = 0x0;
3290 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3291 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3292 
3293 	adev->smc_rreg = &amdgpu_invalid_rreg;
3294 	adev->smc_wreg = &amdgpu_invalid_wreg;
3295 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3296 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3297 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3298 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3299 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3300 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3301 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3302 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3303 	adev->didt_rreg = &amdgpu_invalid_rreg;
3304 	adev->didt_wreg = &amdgpu_invalid_wreg;
3305 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3306 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3307 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3308 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3309 
3310 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3311 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3312 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3313 
3314 	/* mutex initialization are all done here so we
3315 	 * can recall function without having locking issues */
3316 	mutex_init(&adev->firmware.mutex);
3317 	mutex_init(&adev->pm.mutex);
3318 	mutex_init(&adev->gfx.gpu_clock_mutex);
3319 	mutex_init(&adev->srbm_mutex);
3320 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3321 	mutex_init(&adev->gfx.gfx_off_mutex);
3322 	mutex_init(&adev->grbm_idx_mutex);
3323 	mutex_init(&adev->mn_lock);
3324 	mutex_init(&adev->virt.vf_errors.lock);
3325 	hash_init(adev->mn_hash);
3326 	atomic_set(&adev->in_gpu_reset, 0);
3327 	init_rwsem(&adev->reset_sem);
3328 	mutex_init(&adev->psp.mutex);
3329 	mutex_init(&adev->notifier_lock);
3330 
3331 	r = amdgpu_device_check_arguments(adev);
3332 	if (r)
3333 		return r;
3334 
3335 	spin_lock_init(&adev->mmio_idx_lock);
3336 	spin_lock_init(&adev->smc_idx_lock);
3337 	spin_lock_init(&adev->pcie_idx_lock);
3338 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3339 	spin_lock_init(&adev->didt_idx_lock);
3340 	spin_lock_init(&adev->gc_cac_idx_lock);
3341 	spin_lock_init(&adev->se_cac_idx_lock);
3342 	spin_lock_init(&adev->audio_endpt_idx_lock);
3343 	spin_lock_init(&adev->mm_stats.lock);
3344 
3345 	INIT_LIST_HEAD(&adev->shadow_list);
3346 	mutex_init(&adev->shadow_list_lock);
3347 
3348 	INIT_LIST_HEAD(&adev->reset_list);
3349 
3350 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3351 			  amdgpu_device_delayed_init_work_handler);
3352 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3353 			  amdgpu_device_delay_enable_gfx_off);
3354 
3355 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3356 
3357 	adev->gfx.gfx_off_req_count = 1;
3358 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3359 
3360 	atomic_set(&adev->throttling_logging_enabled, 1);
3361 	/*
3362 	 * If throttling continues, logging will be performed every minute
3363 	 * to avoid log flooding. "-1" is subtracted since the thermal
3364 	 * throttling interrupt comes every second. Thus, the total logging
3365 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3366 	 * for throttling interrupt) = 60 seconds.
3367 	 */
3368 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3369 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3370 
3371 	/* Registers mapping */
3372 	/* TODO: block userspace mapping of io register */
3373 	if (adev->asic_type >= CHIP_BONAIRE) {
3374 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3375 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3376 	} else {
3377 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3378 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3379 	}
3380 
3381 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3382 	if (adev->rmmio == NULL) {
3383 		return -ENOMEM;
3384 	}
3385 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3386 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3387 
3388 	/* enable PCIE atomic ops */
3389 	r = pci_enable_atomic_ops_to_root(adev->pdev,
3390 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3391 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3392 	if (r) {
3393 		adev->have_atomics_support = false;
3394 		DRM_INFO("PCIE atomic ops is not supported\n");
3395 	} else {
3396 		adev->have_atomics_support = true;
3397 	}
3398 
3399 	amdgpu_device_get_pcie_info(adev);
3400 
3401 	if (amdgpu_mcbp)
3402 		DRM_INFO("MCBP is enabled\n");
3403 
3404 	if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3405 		adev->enable_mes = true;
3406 
3407 	/* detect hw virtualization here */
3408 	amdgpu_detect_virtualization(adev);
3409 
3410 	r = amdgpu_device_get_job_timeout_settings(adev);
3411 	if (r) {
3412 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3413 		goto failed_unmap;
3414 	}
3415 
3416 	/* early init functions */
3417 	r = amdgpu_device_ip_early_init(adev);
3418 	if (r)
3419 		goto failed_unmap;
3420 
3421 	/* doorbell bar mapping and doorbell index init*/
3422 	amdgpu_device_doorbell_init(adev);
3423 
3424 	if (amdgpu_emu_mode == 1) {
3425 		/* post the asic on emulation mode */
3426 		emu_soc_asic_init(adev);
3427 		goto fence_driver_init;
3428 	}
3429 
3430 	amdgpu_reset_init(adev);
3431 
3432 	/* detect if we are with an SRIOV vbios */
3433 	amdgpu_device_detect_sriov_bios(adev);
3434 
3435 	/* check if we need to reset the asic
3436 	 *  E.g., driver was not cleanly unloaded previously, etc.
3437 	 */
3438 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3439 		if (adev->gmc.xgmi.num_physical_nodes) {
3440 			dev_info(adev->dev, "Pending hive reset.\n");
3441 			adev->gmc.xgmi.pending_reset = true;
3442 			/* Only need to init necessary block for SMU to handle the reset */
3443 			for (i = 0; i < adev->num_ip_blocks; i++) {
3444 				if (!adev->ip_blocks[i].status.valid)
3445 					continue;
3446 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3447 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3448 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3449 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
3450 					DRM_DEBUG("IP %s disabled for hw_init.\n",
3451 						adev->ip_blocks[i].version->funcs->name);
3452 					adev->ip_blocks[i].status.hw = true;
3453 				}
3454 			}
3455 		} else {
3456 			r = amdgpu_asic_reset(adev);
3457 			if (r) {
3458 				dev_err(adev->dev, "asic reset on init failed\n");
3459 				goto failed;
3460 			}
3461 		}
3462 	}
3463 
3464 	pci_enable_pcie_error_reporting(adev->pdev);
3465 
3466 	/* Post card if necessary */
3467 	if (amdgpu_device_need_post(adev)) {
3468 		if (!adev->bios) {
3469 			dev_err(adev->dev, "no vBIOS found\n");
3470 			r = -EINVAL;
3471 			goto failed;
3472 		}
3473 		DRM_INFO("GPU posting now...\n");
3474 		r = amdgpu_device_asic_init(adev);
3475 		if (r) {
3476 			dev_err(adev->dev, "gpu post error!\n");
3477 			goto failed;
3478 		}
3479 	}
3480 
3481 	if (adev->is_atom_fw) {
3482 		/* Initialize clocks */
3483 		r = amdgpu_atomfirmware_get_clock_info(adev);
3484 		if (r) {
3485 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3486 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3487 			goto failed;
3488 		}
3489 	} else {
3490 		/* Initialize clocks */
3491 		r = amdgpu_atombios_get_clock_info(adev);
3492 		if (r) {
3493 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3494 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3495 			goto failed;
3496 		}
3497 		/* init i2c buses */
3498 		if (!amdgpu_device_has_dc_support(adev))
3499 			amdgpu_atombios_i2c_init(adev);
3500 	}
3501 
3502 fence_driver_init:
3503 	/* Fence driver */
3504 	r = amdgpu_fence_driver_init(adev);
3505 	if (r) {
3506 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
3507 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3508 		goto failed;
3509 	}
3510 
3511 	/* init the mode config */
3512 	drm_mode_config_init(adev_to_drm(adev));
3513 
3514 	r = amdgpu_device_ip_init(adev);
3515 	if (r) {
3516 		/* failed in exclusive mode due to timeout */
3517 		if (amdgpu_sriov_vf(adev) &&
3518 		    !amdgpu_sriov_runtime(adev) &&
3519 		    amdgpu_virt_mmio_blocked(adev) &&
3520 		    !amdgpu_virt_wait_reset(adev)) {
3521 			dev_err(adev->dev, "VF exclusive mode timeout\n");
3522 			/* Don't send request since VF is inactive. */
3523 			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3524 			adev->virt.ops = NULL;
3525 			r = -EAGAIN;
3526 			goto release_ras_con;
3527 		}
3528 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3529 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3530 		goto release_ras_con;
3531 	}
3532 
3533 	dev_info(adev->dev,
3534 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3535 			adev->gfx.config.max_shader_engines,
3536 			adev->gfx.config.max_sh_per_se,
3537 			adev->gfx.config.max_cu_per_sh,
3538 			adev->gfx.cu_info.number);
3539 
3540 	adev->accel_working = true;
3541 
3542 	amdgpu_vm_check_compute_bug(adev);
3543 
3544 	/* Initialize the buffer migration limit. */
3545 	if (amdgpu_moverate >= 0)
3546 		max_MBps = amdgpu_moverate;
3547 	else
3548 		max_MBps = 8; /* Allow 8 MB/s. */
3549 	/* Get a log2 for easy divisions. */
3550 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3551 
3552 	amdgpu_fbdev_init(adev);
3553 
3554 	r = amdgpu_pm_sysfs_init(adev);
3555 	if (r) {
3556 		adev->pm_sysfs_en = false;
3557 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3558 	} else
3559 		adev->pm_sysfs_en = true;
3560 
3561 	r = amdgpu_ucode_sysfs_init(adev);
3562 	if (r) {
3563 		adev->ucode_sysfs_en = false;
3564 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3565 	} else
3566 		adev->ucode_sysfs_en = true;
3567 
3568 	if ((amdgpu_testing & 1)) {
3569 		if (adev->accel_working)
3570 			amdgpu_test_moves(adev);
3571 		else
3572 			DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3573 	}
3574 	if (amdgpu_benchmarking) {
3575 		if (adev->accel_working)
3576 			amdgpu_benchmark(adev, amdgpu_benchmarking);
3577 		else
3578 			DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3579 	}
3580 
3581 	/*
3582 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3583 	 * Otherwise the mgpu fan boost feature will be skipped due to the
3584 	 * gpu instance is counted less.
3585 	 */
3586 	amdgpu_register_gpu_instance(adev);
3587 
3588 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
3589 	 * explicit gating rather than handling it automatically.
3590 	 */
3591 	if (!adev->gmc.xgmi.pending_reset) {
3592 		r = amdgpu_device_ip_late_init(adev);
3593 		if (r) {
3594 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3595 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3596 			goto release_ras_con;
3597 		}
3598 		/* must succeed. */
3599 		amdgpu_ras_resume(adev);
3600 		queue_delayed_work(system_wq, &adev->delayed_init_work,
3601 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
3602 	}
3603 
3604 	if (amdgpu_sriov_vf(adev))
3605 		flush_delayed_work(&adev->delayed_init_work);
3606 
3607 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3608 	if (r)
3609 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3610 
3611 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3612 		r = amdgpu_pmu_init(adev);
3613 	if (r)
3614 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3615 
3616 	/* Have stored pci confspace at hand for restore in sudden PCI error */
3617 	if (amdgpu_device_cache_pci_state(adev->pdev))
3618 		pci_restore_state(pdev);
3619 
3620 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3621 	/* this will fail for cards that aren't VGA class devices, just
3622 	 * ignore it */
3623 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3624 		vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
3625 
3626 	if (amdgpu_device_supports_px(ddev)) {
3627 		px = true;
3628 		vga_switcheroo_register_client(adev->pdev,
3629 					       &amdgpu_switcheroo_ops, px);
3630 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3631 	}
3632 
3633 	if (adev->gmc.xgmi.pending_reset)
3634 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3635 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
3636 
3637 	return 0;
3638 
3639 release_ras_con:
3640 	amdgpu_release_ras_context(adev);
3641 
3642 failed:
3643 	amdgpu_vf_error_trans_all(adev);
3644 
3645 failed_unmap:
3646 	iounmap(adev->rmmio);
3647 	adev->rmmio = NULL;
3648 
3649 	return r;
3650 }
3651 
3652 /**
3653  * amdgpu_device_fini - tear down the driver
3654  *
3655  * @adev: amdgpu_device pointer
3656  *
3657  * Tear down the driver info (all asics).
3658  * Called at driver shutdown.
3659  */
3660 void amdgpu_device_fini(struct amdgpu_device *adev)
3661 {
3662 	dev_info(adev->dev, "amdgpu: finishing device.\n");
3663 	flush_delayed_work(&adev->delayed_init_work);
3664 	ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
3665 	adev->shutdown = true;
3666 
3667 	kfree(adev->pci_state);
3668 
3669 	/* make sure IB test finished before entering exclusive mode
3670 	 * to avoid preemption on IB test
3671 	 * */
3672 	if (amdgpu_sriov_vf(adev)) {
3673 		amdgpu_virt_request_full_gpu(adev, false);
3674 		amdgpu_virt_fini_data_exchange(adev);
3675 	}
3676 
3677 	/* disable all interrupts */
3678 	amdgpu_irq_disable_all(adev);
3679 	if (adev->mode_info.mode_config_initialized){
3680 		if (!amdgpu_device_has_dc_support(adev))
3681 			drm_helper_force_disable_all(adev_to_drm(adev));
3682 		else
3683 			drm_atomic_helper_shutdown(adev_to_drm(adev));
3684 	}
3685 	amdgpu_fence_driver_fini(adev);
3686 	if (adev->pm_sysfs_en)
3687 		amdgpu_pm_sysfs_fini(adev);
3688 	amdgpu_fbdev_fini(adev);
3689 	amdgpu_device_ip_fini(adev);
3690 	release_firmware(adev->firmware.gpu_info_fw);
3691 	adev->firmware.gpu_info_fw = NULL;
3692 	adev->accel_working = false;
3693 
3694 	amdgpu_reset_fini(adev);
3695 
3696 	/* free i2c buses */
3697 	if (!amdgpu_device_has_dc_support(adev))
3698 		amdgpu_i2c_fini(adev);
3699 
3700 	if (amdgpu_emu_mode != 1)
3701 		amdgpu_atombios_fini(adev);
3702 
3703 	kfree(adev->bios);
3704 	adev->bios = NULL;
3705 	if (amdgpu_device_supports_px(adev_to_drm(adev))) {
3706 		vga_switcheroo_unregister_client(adev->pdev);
3707 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
3708 	}
3709 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3710 		vga_client_register(adev->pdev, NULL, NULL, NULL);
3711 	iounmap(adev->rmmio);
3712 	adev->rmmio = NULL;
3713 	amdgpu_device_doorbell_fini(adev);
3714 
3715 	if (adev->ucode_sysfs_en)
3716 		amdgpu_ucode_sysfs_fini(adev);
3717 
3718 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
3719 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3720 		amdgpu_pmu_fini(adev);
3721 	if (adev->mman.discovery_bin)
3722 		amdgpu_discovery_fini(adev);
3723 }
3724 
3725 
3726 /*
3727  * Suspend & resume.
3728  */
3729 /**
3730  * amdgpu_device_suspend - initiate device suspend
3731  *
3732  * @dev: drm dev pointer
3733  * @fbcon : notify the fbdev of suspend
3734  *
3735  * Puts the hw in the suspend state (all asics).
3736  * Returns 0 for success or an error on failure.
3737  * Called at driver suspend.
3738  */
3739 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
3740 {
3741 	struct amdgpu_device *adev = drm_to_adev(dev);
3742 
3743 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3744 		return 0;
3745 
3746 	adev->in_suspend = true;
3747 	drm_kms_helper_poll_disable(dev);
3748 
3749 	if (fbcon)
3750 		amdgpu_fbdev_set_suspend(adev, 1);
3751 
3752 	cancel_delayed_work_sync(&adev->delayed_init_work);
3753 
3754 	amdgpu_ras_suspend(adev);
3755 
3756 	amdgpu_device_ip_suspend_phase1(adev);
3757 
3758 	if (!adev->in_s0ix)
3759 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
3760 
3761 	/* evict vram memory */
3762 	amdgpu_bo_evict_vram(adev);
3763 
3764 	amdgpu_fence_driver_suspend(adev);
3765 
3766 	amdgpu_device_ip_suspend_phase2(adev);
3767 	/* evict remaining vram memory
3768 	 * This second call to evict vram is to evict the gart page table
3769 	 * using the CPU.
3770 	 */
3771 	amdgpu_bo_evict_vram(adev);
3772 
3773 	return 0;
3774 }
3775 
3776 /**
3777  * amdgpu_device_resume - initiate device resume
3778  *
3779  * @dev: drm dev pointer
3780  * @fbcon : notify the fbdev of resume
3781  *
3782  * Bring the hw back to operating state (all asics).
3783  * Returns 0 for success or an error on failure.
3784  * Called at driver resume.
3785  */
3786 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
3787 {
3788 	struct amdgpu_device *adev = drm_to_adev(dev);
3789 	int r = 0;
3790 
3791 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3792 		return 0;
3793 
3794 	if (adev->in_s0ix)
3795 		amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
3796 
3797 	/* post card */
3798 	if (amdgpu_device_need_post(adev)) {
3799 		r = amdgpu_device_asic_init(adev);
3800 		if (r)
3801 			dev_err(adev->dev, "amdgpu asic init failed\n");
3802 	}
3803 
3804 	r = amdgpu_device_ip_resume(adev);
3805 	if (r) {
3806 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3807 		return r;
3808 	}
3809 	amdgpu_fence_driver_resume(adev);
3810 
3811 
3812 	r = amdgpu_device_ip_late_init(adev);
3813 	if (r)
3814 		return r;
3815 
3816 	queue_delayed_work(system_wq, &adev->delayed_init_work,
3817 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3818 
3819 	if (!adev->in_s0ix) {
3820 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
3821 		if (r)
3822 			return r;
3823 	}
3824 
3825 	/* Make sure IB tests flushed */
3826 	flush_delayed_work(&adev->delayed_init_work);
3827 
3828 	if (fbcon)
3829 		amdgpu_fbdev_set_suspend(adev, 0);
3830 
3831 	drm_kms_helper_poll_enable(dev);
3832 
3833 	amdgpu_ras_resume(adev);
3834 
3835 	/*
3836 	 * Most of the connector probing functions try to acquire runtime pm
3837 	 * refs to ensure that the GPU is powered on when connector polling is
3838 	 * performed. Since we're calling this from a runtime PM callback,
3839 	 * trying to acquire rpm refs will cause us to deadlock.
3840 	 *
3841 	 * Since we're guaranteed to be holding the rpm lock, it's safe to
3842 	 * temporarily disable the rpm helpers so this doesn't deadlock us.
3843 	 */
3844 #ifdef CONFIG_PM
3845 	dev->dev->power.disable_depth++;
3846 #endif
3847 	if (!amdgpu_device_has_dc_support(adev))
3848 		drm_helper_hpd_irq_event(dev);
3849 	else
3850 		drm_kms_helper_hotplug_event(dev);
3851 #ifdef CONFIG_PM
3852 	dev->dev->power.disable_depth--;
3853 #endif
3854 	adev->in_suspend = false;
3855 
3856 	return 0;
3857 }
3858 
3859 /**
3860  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3861  *
3862  * @adev: amdgpu_device pointer
3863  *
3864  * The list of all the hardware IPs that make up the asic is walked and
3865  * the check_soft_reset callbacks are run.  check_soft_reset determines
3866  * if the asic is still hung or not.
3867  * Returns true if any of the IPs are still in a hung state, false if not.
3868  */
3869 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
3870 {
3871 	int i;
3872 	bool asic_hang = false;
3873 
3874 	if (amdgpu_sriov_vf(adev))
3875 		return true;
3876 
3877 	if (amdgpu_asic_need_full_reset(adev))
3878 		return true;
3879 
3880 	for (i = 0; i < adev->num_ip_blocks; i++) {
3881 		if (!adev->ip_blocks[i].status.valid)
3882 			continue;
3883 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3884 			adev->ip_blocks[i].status.hang =
3885 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3886 		if (adev->ip_blocks[i].status.hang) {
3887 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
3888 			asic_hang = true;
3889 		}
3890 	}
3891 	return asic_hang;
3892 }
3893 
3894 /**
3895  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3896  *
3897  * @adev: amdgpu_device pointer
3898  *
3899  * The list of all the hardware IPs that make up the asic is walked and the
3900  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
3901  * handles any IP specific hardware or software state changes that are
3902  * necessary for a soft reset to succeed.
3903  * Returns 0 on success, negative error code on failure.
3904  */
3905 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
3906 {
3907 	int i, r = 0;
3908 
3909 	for (i = 0; i < adev->num_ip_blocks; i++) {
3910 		if (!adev->ip_blocks[i].status.valid)
3911 			continue;
3912 		if (adev->ip_blocks[i].status.hang &&
3913 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3914 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
3915 			if (r)
3916 				return r;
3917 		}
3918 	}
3919 
3920 	return 0;
3921 }
3922 
3923 /**
3924  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3925  *
3926  * @adev: amdgpu_device pointer
3927  *
3928  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
3929  * reset is necessary to recover.
3930  * Returns true if a full asic reset is required, false if not.
3931  */
3932 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
3933 {
3934 	int i;
3935 
3936 	if (amdgpu_asic_need_full_reset(adev))
3937 		return true;
3938 
3939 	for (i = 0; i < adev->num_ip_blocks; i++) {
3940 		if (!adev->ip_blocks[i].status.valid)
3941 			continue;
3942 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3943 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3944 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
3945 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3946 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3947 			if (adev->ip_blocks[i].status.hang) {
3948 				dev_info(adev->dev, "Some block need full reset!\n");
3949 				return true;
3950 			}
3951 		}
3952 	}
3953 	return false;
3954 }
3955 
3956 /**
3957  * amdgpu_device_ip_soft_reset - do a soft reset
3958  *
3959  * @adev: amdgpu_device pointer
3960  *
3961  * The list of all the hardware IPs that make up the asic is walked and the
3962  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
3963  * IP specific hardware or software state changes that are necessary to soft
3964  * reset the IP.
3965  * Returns 0 on success, negative error code on failure.
3966  */
3967 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
3968 {
3969 	int i, r = 0;
3970 
3971 	for (i = 0; i < adev->num_ip_blocks; i++) {
3972 		if (!adev->ip_blocks[i].status.valid)
3973 			continue;
3974 		if (adev->ip_blocks[i].status.hang &&
3975 		    adev->ip_blocks[i].version->funcs->soft_reset) {
3976 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
3977 			if (r)
3978 				return r;
3979 		}
3980 	}
3981 
3982 	return 0;
3983 }
3984 
3985 /**
3986  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3987  *
3988  * @adev: amdgpu_device pointer
3989  *
3990  * The list of all the hardware IPs that make up the asic is walked and the
3991  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
3992  * handles any IP specific hardware or software state changes that are
3993  * necessary after the IP has been soft reset.
3994  * Returns 0 on success, negative error code on failure.
3995  */
3996 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
3997 {
3998 	int i, r = 0;
3999 
4000 	for (i = 0; i < adev->num_ip_blocks; i++) {
4001 		if (!adev->ip_blocks[i].status.valid)
4002 			continue;
4003 		if (adev->ip_blocks[i].status.hang &&
4004 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4005 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4006 		if (r)
4007 			return r;
4008 	}
4009 
4010 	return 0;
4011 }
4012 
4013 /**
4014  * amdgpu_device_recover_vram - Recover some VRAM contents
4015  *
4016  * @adev: amdgpu_device pointer
4017  *
4018  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4019  * restore things like GPUVM page tables after a GPU reset where
4020  * the contents of VRAM might be lost.
4021  *
4022  * Returns:
4023  * 0 on success, negative error code on failure.
4024  */
4025 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4026 {
4027 	struct dma_fence *fence = NULL, *next = NULL;
4028 	struct amdgpu_bo *shadow;
4029 	long r = 1, tmo;
4030 
4031 	if (amdgpu_sriov_runtime(adev))
4032 		tmo = msecs_to_jiffies(8000);
4033 	else
4034 		tmo = msecs_to_jiffies(100);
4035 
4036 	dev_info(adev->dev, "recover vram bo from shadow start\n");
4037 	mutex_lock(&adev->shadow_list_lock);
4038 	list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
4039 
4040 		/* No need to recover an evicted BO */
4041 		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
4042 		    shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
4043 		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
4044 			continue;
4045 
4046 		r = amdgpu_bo_restore_shadow(shadow, &next);
4047 		if (r)
4048 			break;
4049 
4050 		if (fence) {
4051 			tmo = dma_fence_wait_timeout(fence, false, tmo);
4052 			dma_fence_put(fence);
4053 			fence = next;
4054 			if (tmo == 0) {
4055 				r = -ETIMEDOUT;
4056 				break;
4057 			} else if (tmo < 0) {
4058 				r = tmo;
4059 				break;
4060 			}
4061 		} else {
4062 			fence = next;
4063 		}
4064 	}
4065 	mutex_unlock(&adev->shadow_list_lock);
4066 
4067 	if (fence)
4068 		tmo = dma_fence_wait_timeout(fence, false, tmo);
4069 	dma_fence_put(fence);
4070 
4071 	if (r < 0 || tmo <= 0) {
4072 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4073 		return -EIO;
4074 	}
4075 
4076 	dev_info(adev->dev, "recover vram bo from shadow done\n");
4077 	return 0;
4078 }
4079 
4080 
4081 /**
4082  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4083  *
4084  * @adev: amdgpu_device pointer
4085  * @from_hypervisor: request from hypervisor
4086  *
4087  * do VF FLR and reinitialize Asic
4088  * return 0 means succeeded otherwise failed
4089  */
4090 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4091 				     bool from_hypervisor)
4092 {
4093 	int r;
4094 
4095 	if (from_hypervisor)
4096 		r = amdgpu_virt_request_full_gpu(adev, true);
4097 	else
4098 		r = amdgpu_virt_reset_gpu(adev);
4099 	if (r)
4100 		return r;
4101 
4102 	amdgpu_amdkfd_pre_reset(adev);
4103 
4104 	/* Resume IP prior to SMC */
4105 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4106 	if (r)
4107 		goto error;
4108 
4109 	amdgpu_virt_init_data_exchange(adev);
4110 	/* we need recover gart prior to run SMC/CP/SDMA resume */
4111 	amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
4112 
4113 	r = amdgpu_device_fw_loading(adev);
4114 	if (r)
4115 		return r;
4116 
4117 	/* now we are okay to resume SMC/CP/SDMA */
4118 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4119 	if (r)
4120 		goto error;
4121 
4122 	amdgpu_irq_gpu_reset_resume_helper(adev);
4123 	r = amdgpu_ib_ring_tests(adev);
4124 	amdgpu_amdkfd_post_reset(adev);
4125 
4126 error:
4127 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
4128 		amdgpu_inc_vram_lost(adev);
4129 		r = amdgpu_device_recover_vram(adev);
4130 	}
4131 	amdgpu_virt_release_full_gpu(adev, true);
4132 
4133 	return r;
4134 }
4135 
4136 /**
4137  * amdgpu_device_has_job_running - check if there is any job in mirror list
4138  *
4139  * @adev: amdgpu_device pointer
4140  *
4141  * check if there is any job in mirror list
4142  */
4143 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4144 {
4145 	int i;
4146 	struct drm_sched_job *job;
4147 
4148 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4149 		struct amdgpu_ring *ring = adev->rings[i];
4150 
4151 		if (!ring || !ring->sched.thread)
4152 			continue;
4153 
4154 		spin_lock(&ring->sched.job_list_lock);
4155 		job = list_first_entry_or_null(&ring->sched.pending_list,
4156 					       struct drm_sched_job, list);
4157 		spin_unlock(&ring->sched.job_list_lock);
4158 		if (job)
4159 			return true;
4160 	}
4161 	return false;
4162 }
4163 
4164 /**
4165  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4166  *
4167  * @adev: amdgpu_device pointer
4168  *
4169  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4170  * a hung GPU.
4171  */
4172 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4173 {
4174 	if (!amdgpu_device_ip_check_soft_reset(adev)) {
4175 		dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
4176 		return false;
4177 	}
4178 
4179 	if (amdgpu_gpu_recovery == 0)
4180 		goto disabled;
4181 
4182 	if (amdgpu_sriov_vf(adev))
4183 		return true;
4184 
4185 	if (amdgpu_gpu_recovery == -1) {
4186 		switch (adev->asic_type) {
4187 		case CHIP_BONAIRE:
4188 		case CHIP_HAWAII:
4189 		case CHIP_TOPAZ:
4190 		case CHIP_TONGA:
4191 		case CHIP_FIJI:
4192 		case CHIP_POLARIS10:
4193 		case CHIP_POLARIS11:
4194 		case CHIP_POLARIS12:
4195 		case CHIP_VEGAM:
4196 		case CHIP_VEGA20:
4197 		case CHIP_VEGA10:
4198 		case CHIP_VEGA12:
4199 		case CHIP_RAVEN:
4200 		case CHIP_ARCTURUS:
4201 		case CHIP_RENOIR:
4202 		case CHIP_NAVI10:
4203 		case CHIP_NAVI14:
4204 		case CHIP_NAVI12:
4205 		case CHIP_SIENNA_CICHLID:
4206 		case CHIP_NAVY_FLOUNDER:
4207 		case CHIP_DIMGREY_CAVEFISH:
4208 		case CHIP_VANGOGH:
4209 		case CHIP_ALDEBARAN:
4210 			break;
4211 		default:
4212 			goto disabled;
4213 		}
4214 	}
4215 
4216 	return true;
4217 
4218 disabled:
4219 		dev_info(adev->dev, "GPU recovery disabled.\n");
4220 		return false;
4221 }
4222 
4223 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4224 {
4225         u32 i;
4226         int ret = 0;
4227 
4228         amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4229 
4230         dev_info(adev->dev, "GPU mode1 reset\n");
4231 
4232         /* disable BM */
4233         pci_clear_master(adev->pdev);
4234 
4235         amdgpu_device_cache_pci_state(adev->pdev);
4236 
4237         if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4238                 dev_info(adev->dev, "GPU smu mode1 reset\n");
4239                 ret = amdgpu_dpm_mode1_reset(adev);
4240         } else {
4241                 dev_info(adev->dev, "GPU psp mode1 reset\n");
4242                 ret = psp_gpu_reset(adev);
4243         }
4244 
4245         if (ret)
4246                 dev_err(adev->dev, "GPU mode1 reset failed\n");
4247 
4248         amdgpu_device_load_pci_state(adev->pdev);
4249 
4250         /* wait for asic to come out of reset */
4251         for (i = 0; i < adev->usec_timeout; i++) {
4252                 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4253 
4254                 if (memsize != 0xffffffff)
4255                         break;
4256                 udelay(1);
4257         }
4258 
4259         amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4260         return ret;
4261 }
4262 
4263 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4264 				 struct amdgpu_reset_context *reset_context)
4265 {
4266 	int i, r = 0;
4267 	struct amdgpu_job *job = NULL;
4268 	bool need_full_reset =
4269 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4270 
4271 	if (reset_context->reset_req_dev == adev)
4272 		job = reset_context->job;
4273 
4274 	/* no need to dump if device is not in good state during probe period */
4275 	if (!adev->gmc.xgmi.pending_reset)
4276 		amdgpu_debugfs_wait_dump(adev);
4277 
4278 	if (amdgpu_sriov_vf(adev)) {
4279 		/* stop the data exchange thread */
4280 		amdgpu_virt_fini_data_exchange(adev);
4281 	}
4282 
4283 	/* block all schedulers and reset given job's ring */
4284 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4285 		struct amdgpu_ring *ring = adev->rings[i];
4286 
4287 		if (!ring || !ring->sched.thread)
4288 			continue;
4289 
4290 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4291 		amdgpu_fence_driver_force_completion(ring);
4292 	}
4293 
4294 	if(job)
4295 		drm_sched_increase_karma(&job->base);
4296 
4297 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
4298 	/* If reset handler not implemented, continue; otherwise return */
4299 	if (r == -ENOSYS)
4300 		r = 0;
4301 	else
4302 		return r;
4303 
4304 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
4305 	if (!amdgpu_sriov_vf(adev)) {
4306 
4307 		if (!need_full_reset)
4308 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4309 
4310 		if (!need_full_reset) {
4311 			amdgpu_device_ip_pre_soft_reset(adev);
4312 			r = amdgpu_device_ip_soft_reset(adev);
4313 			amdgpu_device_ip_post_soft_reset(adev);
4314 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4315 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
4316 				need_full_reset = true;
4317 			}
4318 		}
4319 
4320 		if (need_full_reset)
4321 			r = amdgpu_device_ip_suspend(adev);
4322 		if (need_full_reset)
4323 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4324 		else
4325 			clear_bit(AMDGPU_NEED_FULL_RESET,
4326 				  &reset_context->flags);
4327 	}
4328 
4329 	return r;
4330 }
4331 
4332 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4333 			 struct amdgpu_reset_context *reset_context)
4334 {
4335 	struct amdgpu_device *tmp_adev = NULL;
4336 	bool need_full_reset, skip_hw_reset, vram_lost = false;
4337 	int r = 0;
4338 
4339 	/* Try reset handler method first */
4340 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4341 				    reset_list);
4342 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
4343 	/* If reset handler not implemented, continue; otherwise return */
4344 	if (r == -ENOSYS)
4345 		r = 0;
4346 	else
4347 		return r;
4348 
4349 	/* Reset handler not implemented, use the default method */
4350 	need_full_reset =
4351 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4352 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4353 
4354 	/*
4355 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
4356 	 * to allow proper links negotiation in FW (within 1 sec)
4357 	 */
4358 	if (!skip_hw_reset && need_full_reset) {
4359 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4360 			/* For XGMI run all resets in parallel to speed up the process */
4361 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4362 				tmp_adev->gmc.xgmi.pending_reset = false;
4363 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4364 					r = -EALREADY;
4365 			} else
4366 				r = amdgpu_asic_reset(tmp_adev);
4367 
4368 			if (r) {
4369 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4370 					 r, adev_to_drm(tmp_adev)->unique);
4371 				break;
4372 			}
4373 		}
4374 
4375 		/* For XGMI wait for all resets to complete before proceed */
4376 		if (!r) {
4377 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4378 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4379 					flush_work(&tmp_adev->xgmi_reset_work);
4380 					r = tmp_adev->asic_reset_res;
4381 					if (r)
4382 						break;
4383 				}
4384 			}
4385 		}
4386 	}
4387 
4388 	if (!r && amdgpu_ras_intr_triggered()) {
4389 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4390 			if (tmp_adev->mmhub.ras_funcs &&
4391 			    tmp_adev->mmhub.ras_funcs->reset_ras_error_count)
4392 				tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev);
4393 		}
4394 
4395 		amdgpu_ras_intr_cleared();
4396 	}
4397 
4398 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4399 		if (need_full_reset) {
4400 			/* post card */
4401 			r = amdgpu_device_asic_init(tmp_adev);
4402 			if (r) {
4403 				dev_warn(tmp_adev->dev, "asic atom init failed!");
4404 			} else {
4405 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4406 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
4407 				if (r)
4408 					goto out;
4409 
4410 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4411 				if (vram_lost) {
4412 					DRM_INFO("VRAM is lost due to GPU reset!\n");
4413 					amdgpu_inc_vram_lost(tmp_adev);
4414 				}
4415 
4416 				r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
4417 				if (r)
4418 					goto out;
4419 
4420 				r = amdgpu_device_fw_loading(tmp_adev);
4421 				if (r)
4422 					return r;
4423 
4424 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
4425 				if (r)
4426 					goto out;
4427 
4428 				if (vram_lost)
4429 					amdgpu_device_fill_reset_magic(tmp_adev);
4430 
4431 				/*
4432 				 * Add this ASIC as tracked as reset was already
4433 				 * complete successfully.
4434 				 */
4435 				amdgpu_register_gpu_instance(tmp_adev);
4436 
4437 				if (!reset_context->hive &&
4438 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4439 					amdgpu_xgmi_add_device(tmp_adev);
4440 
4441 				r = amdgpu_device_ip_late_init(tmp_adev);
4442 				if (r)
4443 					goto out;
4444 
4445 				amdgpu_fbdev_set_suspend(tmp_adev, 0);
4446 
4447 				/*
4448 				 * The GPU enters bad state once faulty pages
4449 				 * by ECC has reached the threshold, and ras
4450 				 * recovery is scheduled next. So add one check
4451 				 * here to break recovery if it indeed exceeds
4452 				 * bad page threshold, and remind user to
4453 				 * retire this GPU or setting one bigger
4454 				 * bad_page_threshold value to fix this once
4455 				 * probing driver again.
4456 				 */
4457 				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
4458 					/* must succeed. */
4459 					amdgpu_ras_resume(tmp_adev);
4460 				} else {
4461 					r = -EINVAL;
4462 					goto out;
4463 				}
4464 
4465 				/* Update PSP FW topology after reset */
4466 				if (reset_context->hive &&
4467 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4468 					r = amdgpu_xgmi_update_topology(
4469 						reset_context->hive, tmp_adev);
4470 			}
4471 		}
4472 
4473 out:
4474 		if (!r) {
4475 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4476 			r = amdgpu_ib_ring_tests(tmp_adev);
4477 			if (r) {
4478 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4479 				r = amdgpu_device_ip_suspend(tmp_adev);
4480 				need_full_reset = true;
4481 				r = -EAGAIN;
4482 				goto end;
4483 			}
4484 		}
4485 
4486 		if (!r)
4487 			r = amdgpu_device_recover_vram(tmp_adev);
4488 		else
4489 			tmp_adev->asic_reset_res = r;
4490 	}
4491 
4492 end:
4493 	if (need_full_reset)
4494 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4495 	else
4496 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4497 	return r;
4498 }
4499 
4500 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
4501 				struct amdgpu_hive_info *hive)
4502 {
4503 	if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
4504 		return false;
4505 
4506 	if (hive) {
4507 		down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
4508 	} else {
4509 		down_write(&adev->reset_sem);
4510 	}
4511 
4512 	switch (amdgpu_asic_reset_method(adev)) {
4513 	case AMD_RESET_METHOD_MODE1:
4514 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4515 		break;
4516 	case AMD_RESET_METHOD_MODE2:
4517 		adev->mp1_state = PP_MP1_STATE_RESET;
4518 		break;
4519 	default:
4520 		adev->mp1_state = PP_MP1_STATE_NONE;
4521 		break;
4522 	}
4523 
4524 	return true;
4525 }
4526 
4527 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4528 {
4529 	amdgpu_vf_error_trans_all(adev);
4530 	adev->mp1_state = PP_MP1_STATE_NONE;
4531 	atomic_set(&adev->in_gpu_reset, 0);
4532 	up_write(&adev->reset_sem);
4533 }
4534 
4535 /*
4536  * to lockup a list of amdgpu devices in a hive safely, if not a hive
4537  * with multiple nodes, it will be similar as amdgpu_device_lock_adev.
4538  *
4539  * unlock won't require roll back.
4540  */
4541 static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
4542 {
4543 	struct amdgpu_device *tmp_adev = NULL;
4544 
4545 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
4546 		if (!hive) {
4547 			dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes");
4548 			return -ENODEV;
4549 		}
4550 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
4551 			if (!amdgpu_device_lock_adev(tmp_adev, hive))
4552 				goto roll_back;
4553 		}
4554 	} else if (!amdgpu_device_lock_adev(adev, hive))
4555 		return -EAGAIN;
4556 
4557 	return 0;
4558 roll_back:
4559 	if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) {
4560 		/*
4561 		 * if the lockup iteration break in the middle of a hive,
4562 		 * it may means there may has a race issue,
4563 		 * or a hive device locked up independently.
4564 		 * we may be in trouble and may not, so will try to roll back
4565 		 * the lock and give out a warnning.
4566 		 */
4567 		dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock");
4568 		list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) {
4569 			amdgpu_device_unlock_adev(tmp_adev);
4570 		}
4571 	}
4572 	return -EAGAIN;
4573 }
4574 
4575 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4576 {
4577 	struct pci_dev *p = NULL;
4578 
4579 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4580 			adev->pdev->bus->number, 1);
4581 	if (p) {
4582 		pm_runtime_enable(&(p->dev));
4583 		pm_runtime_resume(&(p->dev));
4584 	}
4585 }
4586 
4587 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4588 {
4589 	enum amd_reset_method reset_method;
4590 	struct pci_dev *p = NULL;
4591 	u64 expires;
4592 
4593 	/*
4594 	 * For now, only BACO and mode1 reset are confirmed
4595 	 * to suffer the audio issue without proper suspended.
4596 	 */
4597 	reset_method = amdgpu_asic_reset_method(adev);
4598 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
4599 	     (reset_method != AMD_RESET_METHOD_MODE1))
4600 		return -EINVAL;
4601 
4602 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4603 			adev->pdev->bus->number, 1);
4604 	if (!p)
4605 		return -ENODEV;
4606 
4607 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
4608 	if (!expires)
4609 		/*
4610 		 * If we cannot get the audio device autosuspend delay,
4611 		 * a fixed 4S interval will be used. Considering 3S is
4612 		 * the audio controller default autosuspend delay setting.
4613 		 * 4S used here is guaranteed to cover that.
4614 		 */
4615 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
4616 
4617 	while (!pm_runtime_status_suspended(&(p->dev))) {
4618 		if (!pm_runtime_suspend(&(p->dev)))
4619 			break;
4620 
4621 		if (expires < ktime_get_mono_fast_ns()) {
4622 			dev_warn(adev->dev, "failed to suspend display audio\n");
4623 			/* TODO: abort the succeeding gpu reset? */
4624 			return -ETIMEDOUT;
4625 		}
4626 	}
4627 
4628 	pm_runtime_disable(&(p->dev));
4629 
4630 	return 0;
4631 }
4632 
4633 void amdgpu_device_recheck_guilty_jobs(
4634 	struct amdgpu_device *adev, struct list_head *device_list_handle,
4635 	struct amdgpu_reset_context *reset_context)
4636 {
4637 	int i, r = 0;
4638 
4639 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4640 		struct amdgpu_ring *ring = adev->rings[i];
4641 		int ret = 0;
4642 		struct drm_sched_job *s_job;
4643 
4644 		if (!ring || !ring->sched.thread)
4645 			continue;
4646 
4647 		s_job = list_first_entry_or_null(&ring->sched.pending_list,
4648 				struct drm_sched_job, list);
4649 		if (s_job == NULL)
4650 			continue;
4651 
4652 		/* clear job's guilty and depend the folowing step to decide the real one */
4653 		drm_sched_reset_karma(s_job);
4654 		drm_sched_resubmit_jobs_ext(&ring->sched, 1);
4655 
4656 		ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
4657 		if (ret == 0) { /* timeout */
4658 			DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",
4659 						ring->sched.name, s_job->id);
4660 
4661 			/* set guilty */
4662 			drm_sched_increase_karma(s_job);
4663 retry:
4664 			/* do hw reset */
4665 			if (amdgpu_sriov_vf(adev)) {
4666 				amdgpu_virt_fini_data_exchange(adev);
4667 				r = amdgpu_device_reset_sriov(adev, false);
4668 				if (r)
4669 					adev->asic_reset_res = r;
4670 			} else {
4671 				clear_bit(AMDGPU_SKIP_HW_RESET,
4672 					  &reset_context->flags);
4673 				r = amdgpu_do_asic_reset(device_list_handle,
4674 							 reset_context);
4675 				if (r && r == -EAGAIN)
4676 					goto retry;
4677 			}
4678 
4679 			/*
4680 			 * add reset counter so that the following
4681 			 * resubmitted job could flush vmid
4682 			 */
4683 			atomic_inc(&adev->gpu_reset_counter);
4684 			continue;
4685 		}
4686 
4687 		/* got the hw fence, signal finished fence */
4688 		atomic_dec(ring->sched.score);
4689 		dma_fence_get(&s_job->s_fence->finished);
4690 		dma_fence_signal(&s_job->s_fence->finished);
4691 		dma_fence_put(&s_job->s_fence->finished);
4692 
4693 		/* remove node from list and free the job */
4694 		spin_lock(&ring->sched.job_list_lock);
4695 		list_del_init(&s_job->list);
4696 		spin_unlock(&ring->sched.job_list_lock);
4697 		ring->sched.ops->free_job(s_job);
4698 	}
4699 }
4700 
4701 /**
4702  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4703  *
4704  * @adev: amdgpu_device pointer
4705  * @job: which job trigger hang
4706  *
4707  * Attempt to reset the GPU if it has hung (all asics).
4708  * Attempt to do soft-reset or full-reset and reinitialize Asic
4709  * Returns 0 for success or an error on failure.
4710  */
4711 
4712 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4713 			      struct amdgpu_job *job)
4714 {
4715 	struct list_head device_list, *device_list_handle =  NULL;
4716 	bool job_signaled = false;
4717 	struct amdgpu_hive_info *hive = NULL;
4718 	struct amdgpu_device *tmp_adev = NULL;
4719 	int i, r = 0;
4720 	bool need_emergency_restart = false;
4721 	bool audio_suspended = false;
4722 	int tmp_vram_lost_counter;
4723 	struct amdgpu_reset_context reset_context;
4724 
4725 	memset(&reset_context, 0, sizeof(reset_context));
4726 
4727 	/*
4728 	 * Special case: RAS triggered and full reset isn't supported
4729 	 */
4730 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4731 
4732 	/*
4733 	 * Flush RAM to disk so that after reboot
4734 	 * the user can read log and see why the system rebooted.
4735 	 */
4736 	if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
4737 		DRM_WARN("Emergency reboot.");
4738 
4739 		ksys_sync_helper();
4740 		emergency_restart();
4741 	}
4742 
4743 	dev_info(adev->dev, "GPU %s begin!\n",
4744 		need_emergency_restart ? "jobs stop":"reset");
4745 
4746 	/*
4747 	 * Here we trylock to avoid chain of resets executing from
4748 	 * either trigger by jobs on different adevs in XGMI hive or jobs on
4749 	 * different schedulers for same device while this TO handler is running.
4750 	 * We always reset all schedulers for device and all devices for XGMI
4751 	 * hive so that should take care of them too.
4752 	 */
4753 	hive = amdgpu_get_xgmi_hive(adev);
4754 	if (hive) {
4755 		if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
4756 			DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4757 				job ? job->base.id : -1, hive->hive_id);
4758 			amdgpu_put_xgmi_hive(hive);
4759 			if (job)
4760 				drm_sched_increase_karma(&job->base);
4761 			return 0;
4762 		}
4763 		mutex_lock(&hive->hive_lock);
4764 	}
4765 
4766 	reset_context.method = AMD_RESET_METHOD_NONE;
4767 	reset_context.reset_req_dev = adev;
4768 	reset_context.job = job;
4769 	reset_context.hive = hive;
4770 	clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
4771 
4772 	/*
4773 	 * lock the device before we try to operate the linked list
4774 	 * if didn't get the device lock, don't touch the linked list since
4775 	 * others may iterating it.
4776 	 */
4777 	r = amdgpu_device_lock_hive_adev(adev, hive);
4778 	if (r) {
4779 		dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
4780 					job ? job->base.id : -1);
4781 
4782 		/* even we skipped this reset, still need to set the job to guilty */
4783 		if (job)
4784 			drm_sched_increase_karma(&job->base);
4785 		goto skip_recovery;
4786 	}
4787 
4788 	/*
4789 	 * Build list of devices to reset.
4790 	 * In case we are in XGMI hive mode, resort the device list
4791 	 * to put adev in the 1st position.
4792 	 */
4793 	INIT_LIST_HEAD(&device_list);
4794 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
4795 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
4796 			list_add_tail(&tmp_adev->reset_list, &device_list);
4797 		if (!list_is_first(&adev->reset_list, &device_list))
4798 			list_rotate_to_front(&adev->reset_list, &device_list);
4799 		device_list_handle = &device_list;
4800 	} else {
4801 		list_add_tail(&adev->reset_list, &device_list);
4802 		device_list_handle = &device_list;
4803 	}
4804 
4805 	/* block all schedulers and reset given job's ring */
4806 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4807 		/*
4808 		 * Try to put the audio codec into suspend state
4809 		 * before gpu reset started.
4810 		 *
4811 		 * Due to the power domain of the graphics device
4812 		 * is shared with AZ power domain. Without this,
4813 		 * we may change the audio hardware from behind
4814 		 * the audio driver's back. That will trigger
4815 		 * some audio codec errors.
4816 		 */
4817 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
4818 			audio_suspended = true;
4819 
4820 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
4821 
4822 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4823 
4824 		if (!amdgpu_sriov_vf(tmp_adev))
4825 			amdgpu_amdkfd_pre_reset(tmp_adev);
4826 
4827 		/*
4828 		 * Mark these ASICs to be reseted as untracked first
4829 		 * And add them back after reset completed
4830 		 */
4831 		amdgpu_unregister_gpu_instance(tmp_adev);
4832 
4833 		amdgpu_fbdev_set_suspend(tmp_adev, 1);
4834 
4835 		/* disable ras on ALL IPs */
4836 		if (!need_emergency_restart &&
4837 		      amdgpu_device_ip_need_full_reset(tmp_adev))
4838 			amdgpu_ras_suspend(tmp_adev);
4839 
4840 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4841 			struct amdgpu_ring *ring = tmp_adev->rings[i];
4842 
4843 			if (!ring || !ring->sched.thread)
4844 				continue;
4845 
4846 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
4847 
4848 			if (need_emergency_restart)
4849 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
4850 		}
4851 		atomic_inc(&tmp_adev->gpu_reset_counter);
4852 	}
4853 
4854 	if (need_emergency_restart)
4855 		goto skip_sched_resume;
4856 
4857 	/*
4858 	 * Must check guilty signal here since after this point all old
4859 	 * HW fences are force signaled.
4860 	 *
4861 	 * job->base holds a reference to parent fence
4862 	 */
4863 	if (job && job->base.s_fence->parent &&
4864 	    dma_fence_is_signaled(job->base.s_fence->parent)) {
4865 		job_signaled = true;
4866 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4867 		goto skip_hw_reset;
4868 	}
4869 
4870 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
4871 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4872 		r = amdgpu_device_pre_asic_reset(tmp_adev, &reset_context);
4873 		/*TODO Should we stop ?*/
4874 		if (r) {
4875 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4876 				  r, adev_to_drm(tmp_adev)->unique);
4877 			tmp_adev->asic_reset_res = r;
4878 		}
4879 	}
4880 
4881 	tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
4882 	/* Actual ASIC resets if needed.*/
4883 	/* TODO Implement XGMI hive reset logic for SRIOV */
4884 	if (amdgpu_sriov_vf(adev)) {
4885 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
4886 		if (r)
4887 			adev->asic_reset_res = r;
4888 	} else {
4889 		r = amdgpu_do_asic_reset(device_list_handle, &reset_context);
4890 		if (r && r == -EAGAIN)
4891 			goto retry;
4892 	}
4893 
4894 skip_hw_reset:
4895 
4896 	/* Post ASIC reset for all devs .*/
4897 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4898 
4899 		/*
4900 		 * Sometimes a later bad compute job can block a good gfx job as gfx
4901 		 * and compute ring share internal GC HW mutually. We add an additional
4902 		 * guilty jobs recheck step to find the real guilty job, it synchronously
4903 		 * submits and pends for the first job being signaled. If it gets timeout,
4904 		 * we identify it as a real guilty job.
4905 		 */
4906 		if (amdgpu_gpu_recovery == 2 &&
4907 			!(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter)))
4908 			amdgpu_device_recheck_guilty_jobs(
4909 				tmp_adev, device_list_handle, &reset_context);
4910 
4911 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4912 			struct amdgpu_ring *ring = tmp_adev->rings[i];
4913 
4914 			if (!ring || !ring->sched.thread)
4915 				continue;
4916 
4917 			/* No point to resubmit jobs if we didn't HW reset*/
4918 			if (!tmp_adev->asic_reset_res && !job_signaled)
4919 				drm_sched_resubmit_jobs(&ring->sched);
4920 
4921 			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4922 		}
4923 
4924 		if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4925 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
4926 		}
4927 
4928 		tmp_adev->asic_reset_res = 0;
4929 
4930 		if (r) {
4931 			/* bad news, how to tell it to userspace ? */
4932 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
4933 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4934 		} else {
4935 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
4936 		}
4937 	}
4938 
4939 skip_sched_resume:
4940 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4941 		/* unlock kfd: SRIOV would do it separately */
4942 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
4943 	                amdgpu_amdkfd_post_reset(tmp_adev);
4944 
4945 		/* kfd_post_reset will do nothing if kfd device is not initialized,
4946 		 * need to bring up kfd here if it's not be initialized before
4947 		 */
4948 		if (!adev->kfd.init_complete)
4949 			amdgpu_amdkfd_device_init(adev);
4950 
4951 		if (audio_suspended)
4952 			amdgpu_device_resume_display_audio(tmp_adev);
4953 		amdgpu_device_unlock_adev(tmp_adev);
4954 	}
4955 
4956 skip_recovery:
4957 	if (hive) {
4958 		atomic_set(&hive->in_reset, 0);
4959 		mutex_unlock(&hive->hive_lock);
4960 		amdgpu_put_xgmi_hive(hive);
4961 	}
4962 
4963 	if (r && r != -EAGAIN)
4964 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
4965 	return r;
4966 }
4967 
4968 /**
4969  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4970  *
4971  * @adev: amdgpu_device pointer
4972  *
4973  * Fetchs and stores in the driver the PCIE capabilities (gen speed
4974  * and lanes) of the slot the device is in. Handles APUs and
4975  * virtualized environments where PCIE config space may not be available.
4976  */
4977 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
4978 {
4979 	struct pci_dev *pdev;
4980 	enum pci_bus_speed speed_cap, platform_speed_cap;
4981 	enum pcie_link_width platform_link_width;
4982 
4983 	if (amdgpu_pcie_gen_cap)
4984 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
4985 
4986 	if (amdgpu_pcie_lane_cap)
4987 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
4988 
4989 	/* covers APUs as well */
4990 	if (pci_is_root_bus(adev->pdev->bus)) {
4991 		if (adev->pm.pcie_gen_mask == 0)
4992 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4993 		if (adev->pm.pcie_mlw_mask == 0)
4994 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
4995 		return;
4996 	}
4997 
4998 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4999 		return;
5000 
5001 	pcie_bandwidth_available(adev->pdev, NULL,
5002 				 &platform_speed_cap, &platform_link_width);
5003 
5004 	if (adev->pm.pcie_gen_mask == 0) {
5005 		/* asic caps */
5006 		pdev = adev->pdev;
5007 		speed_cap = pcie_get_speed_cap(pdev);
5008 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5009 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5010 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5011 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5012 		} else {
5013 			if (speed_cap == PCIE_SPEED_32_0GT)
5014 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5015 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5016 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5017 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5018 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5019 			else if (speed_cap == PCIE_SPEED_16_0GT)
5020 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5021 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5022 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5023 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5024 			else if (speed_cap == PCIE_SPEED_8_0GT)
5025 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5026 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5027 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5028 			else if (speed_cap == PCIE_SPEED_5_0GT)
5029 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5030 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5031 			else
5032 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5033 		}
5034 		/* platform caps */
5035 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5036 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5037 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5038 		} else {
5039 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
5040 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5041 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5042 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5043 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5044 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5045 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5046 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5047 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5048 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5049 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5050 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5051 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5052 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5053 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5054 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5055 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5056 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5057 			else
5058 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5059 
5060 		}
5061 	}
5062 	if (adev->pm.pcie_mlw_mask == 0) {
5063 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5064 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5065 		} else {
5066 			switch (platform_link_width) {
5067 			case PCIE_LNK_X32:
5068 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5069 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5070 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5071 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5072 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5073 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5074 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5075 				break;
5076 			case PCIE_LNK_X16:
5077 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5078 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5079 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5080 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5081 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5082 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5083 				break;
5084 			case PCIE_LNK_X12:
5085 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5086 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5087 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5088 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5089 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5090 				break;
5091 			case PCIE_LNK_X8:
5092 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5093 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5094 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5095 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5096 				break;
5097 			case PCIE_LNK_X4:
5098 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5099 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5100 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5101 				break;
5102 			case PCIE_LNK_X2:
5103 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5104 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5105 				break;
5106 			case PCIE_LNK_X1:
5107 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5108 				break;
5109 			default:
5110 				break;
5111 			}
5112 		}
5113 	}
5114 }
5115 
5116 int amdgpu_device_baco_enter(struct drm_device *dev)
5117 {
5118 	struct amdgpu_device *adev = drm_to_adev(dev);
5119 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5120 
5121 	if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5122 		return -ENOTSUPP;
5123 
5124 	if (ras && adev->ras_enabled &&
5125 	    adev->nbio.funcs->enable_doorbell_interrupt)
5126 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5127 
5128 	return amdgpu_dpm_baco_enter(adev);
5129 }
5130 
5131 int amdgpu_device_baco_exit(struct drm_device *dev)
5132 {
5133 	struct amdgpu_device *adev = drm_to_adev(dev);
5134 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5135 	int ret = 0;
5136 
5137 	if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5138 		return -ENOTSUPP;
5139 
5140 	ret = amdgpu_dpm_baco_exit(adev);
5141 	if (ret)
5142 		return ret;
5143 
5144 	if (ras && adev->ras_enabled &&
5145 	    adev->nbio.funcs->enable_doorbell_interrupt)
5146 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5147 
5148 	return 0;
5149 }
5150 
5151 static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
5152 {
5153 	int i;
5154 
5155 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5156 		struct amdgpu_ring *ring = adev->rings[i];
5157 
5158 		if (!ring || !ring->sched.thread)
5159 			continue;
5160 
5161 		cancel_delayed_work_sync(&ring->sched.work_tdr);
5162 	}
5163 }
5164 
5165 /**
5166  * amdgpu_pci_error_detected - Called when a PCI error is detected.
5167  * @pdev: PCI device struct
5168  * @state: PCI channel state
5169  *
5170  * Description: Called when a PCI error is detected.
5171  *
5172  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5173  */
5174 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5175 {
5176 	struct drm_device *dev = pci_get_drvdata(pdev);
5177 	struct amdgpu_device *adev = drm_to_adev(dev);
5178 	int i;
5179 
5180 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5181 
5182 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
5183 		DRM_WARN("No support for XGMI hive yet...");
5184 		return PCI_ERS_RESULT_DISCONNECT;
5185 	}
5186 
5187 	switch (state) {
5188 	case pci_channel_io_normal:
5189 		return PCI_ERS_RESULT_CAN_RECOVER;
5190 	/* Fatal error, prepare for slot reset */
5191 	case pci_channel_io_frozen:
5192 		/*
5193 		 * Cancel and wait for all TDRs in progress if failing to
5194 		 * set  adev->in_gpu_reset in amdgpu_device_lock_adev
5195 		 *
5196 		 * Locking adev->reset_sem will prevent any external access
5197 		 * to GPU during PCI error recovery
5198 		 */
5199 		while (!amdgpu_device_lock_adev(adev, NULL))
5200 			amdgpu_cancel_all_tdr(adev);
5201 
5202 		/*
5203 		 * Block any work scheduling as we do for regular GPU reset
5204 		 * for the duration of the recovery
5205 		 */
5206 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5207 			struct amdgpu_ring *ring = adev->rings[i];
5208 
5209 			if (!ring || !ring->sched.thread)
5210 				continue;
5211 
5212 			drm_sched_stop(&ring->sched, NULL);
5213 		}
5214 		atomic_inc(&adev->gpu_reset_counter);
5215 		return PCI_ERS_RESULT_NEED_RESET;
5216 	case pci_channel_io_perm_failure:
5217 		/* Permanent error, prepare for device removal */
5218 		return PCI_ERS_RESULT_DISCONNECT;
5219 	}
5220 
5221 	return PCI_ERS_RESULT_NEED_RESET;
5222 }
5223 
5224 /**
5225  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5226  * @pdev: pointer to PCI device
5227  */
5228 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5229 {
5230 
5231 	DRM_INFO("PCI error: mmio enabled callback!!\n");
5232 
5233 	/* TODO - dump whatever for debugging purposes */
5234 
5235 	/* This called only if amdgpu_pci_error_detected returns
5236 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5237 	 * works, no need to reset slot.
5238 	 */
5239 
5240 	return PCI_ERS_RESULT_RECOVERED;
5241 }
5242 
5243 /**
5244  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5245  * @pdev: PCI device struct
5246  *
5247  * Description: This routine is called by the pci error recovery
5248  * code after the PCI slot has been reset, just before we
5249  * should resume normal operations.
5250  */
5251 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5252 {
5253 	struct drm_device *dev = pci_get_drvdata(pdev);
5254 	struct amdgpu_device *adev = drm_to_adev(dev);
5255 	int r, i;
5256 	struct amdgpu_reset_context reset_context;
5257 	u32 memsize;
5258 	struct list_head device_list;
5259 
5260 	DRM_INFO("PCI error: slot reset callback!!\n");
5261 
5262 	memset(&reset_context, 0, sizeof(reset_context));
5263 
5264 	INIT_LIST_HEAD(&device_list);
5265 	list_add_tail(&adev->reset_list, &device_list);
5266 
5267 	/* wait for asic to come out of reset */
5268 	msleep(500);
5269 
5270 	/* Restore PCI confspace */
5271 	amdgpu_device_load_pci_state(pdev);
5272 
5273 	/* confirm  ASIC came out of reset */
5274 	for (i = 0; i < adev->usec_timeout; i++) {
5275 		memsize = amdgpu_asic_get_config_memsize(adev);
5276 
5277 		if (memsize != 0xffffffff)
5278 			break;
5279 		udelay(1);
5280 	}
5281 	if (memsize == 0xffffffff) {
5282 		r = -ETIME;
5283 		goto out;
5284 	}
5285 
5286 	reset_context.method = AMD_RESET_METHOD_NONE;
5287 	reset_context.reset_req_dev = adev;
5288 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5289 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5290 
5291 	adev->in_pci_err_recovery = true;
5292 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
5293 	adev->in_pci_err_recovery = false;
5294 	if (r)
5295 		goto out;
5296 
5297 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
5298 
5299 out:
5300 	if (!r) {
5301 		if (amdgpu_device_cache_pci_state(adev->pdev))
5302 			pci_restore_state(adev->pdev);
5303 
5304 		DRM_INFO("PCIe error recovery succeeded\n");
5305 	} else {
5306 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
5307 		amdgpu_device_unlock_adev(adev);
5308 	}
5309 
5310 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5311 }
5312 
5313 /**
5314  * amdgpu_pci_resume() - resume normal ops after PCI reset
5315  * @pdev: pointer to PCI device
5316  *
5317  * Called when the error recovery driver tells us that its
5318  * OK to resume normal operation.
5319  */
5320 void amdgpu_pci_resume(struct pci_dev *pdev)
5321 {
5322 	struct drm_device *dev = pci_get_drvdata(pdev);
5323 	struct amdgpu_device *adev = drm_to_adev(dev);
5324 	int i;
5325 
5326 
5327 	DRM_INFO("PCI error: resume callback!!\n");
5328 
5329 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5330 		struct amdgpu_ring *ring = adev->rings[i];
5331 
5332 		if (!ring || !ring->sched.thread)
5333 			continue;
5334 
5335 
5336 		drm_sched_resubmit_jobs(&ring->sched);
5337 		drm_sched_start(&ring->sched, true);
5338 	}
5339 
5340 	amdgpu_device_unlock_adev(adev);
5341 }
5342 
5343 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5344 {
5345 	struct drm_device *dev = pci_get_drvdata(pdev);
5346 	struct amdgpu_device *adev = drm_to_adev(dev);
5347 	int r;
5348 
5349 	r = pci_save_state(pdev);
5350 	if (!r) {
5351 		kfree(adev->pci_state);
5352 
5353 		adev->pci_state = pci_store_saved_state(pdev);
5354 
5355 		if (!adev->pci_state) {
5356 			DRM_ERROR("Failed to store PCI saved state");
5357 			return false;
5358 		}
5359 	} else {
5360 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
5361 		return false;
5362 	}
5363 
5364 	return true;
5365 }
5366 
5367 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5368 {
5369 	struct drm_device *dev = pci_get_drvdata(pdev);
5370 	struct amdgpu_device *adev = drm_to_adev(dev);
5371 	int r;
5372 
5373 	if (!adev->pci_state)
5374 		return false;
5375 
5376 	r = pci_load_saved_state(pdev, adev->pci_state);
5377 
5378 	if (!r) {
5379 		pci_restore_state(pdev);
5380 	} else {
5381 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
5382 		return false;
5383 	}
5384 
5385 	return true;
5386 }
5387 
5388 
5389