1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 
34 #include <drm/drm_atomic_helper.h>
35 #include <drm/drm_probe_helper.h>
36 #include <drm/amdgpu_drm.h>
37 #include <linux/vgaarb.h>
38 #include <linux/vga_switcheroo.h>
39 #include <linux/efi.h>
40 #include "amdgpu.h"
41 #include "amdgpu_trace.h"
42 #include "amdgpu_i2c.h"
43 #include "atom.h"
44 #include "amdgpu_atombios.h"
45 #include "amdgpu_atomfirmware.h"
46 #include "amd_pcie.h"
47 #ifdef CONFIG_DRM_AMDGPU_SI
48 #include "si.h"
49 #endif
50 #ifdef CONFIG_DRM_AMDGPU_CIK
51 #include "cik.h"
52 #endif
53 #include "vi.h"
54 #include "soc15.h"
55 #include "nv.h"
56 #include "bif/bif_4_1_d.h"
57 #include <linux/pci.h>
58 #include <linux/firmware.h>
59 #include "amdgpu_vf_error.h"
60 
61 #include "amdgpu_amdkfd.h"
62 #include "amdgpu_pm.h"
63 
64 #include "amdgpu_xgmi.h"
65 #include "amdgpu_ras.h"
66 #include "amdgpu_pmu.h"
67 #include "amdgpu_fru_eeprom.h"
68 
69 #include <linux/suspend.h>
70 #include <drm/task_barrier.h>
71 #include <linux/pm_runtime.h>
72 
73 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
75 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
76 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
77 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
78 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
79 MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
80 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
81 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
82 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
83 MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin");
84 MODULE_FIRMWARE("amdgpu/navy_flounder_gpu_info.bin");
85 
86 #define AMDGPU_RESUME_MS		2000
87 
88 const char *amdgpu_asic_name[] = {
89 	"TAHITI",
90 	"PITCAIRN",
91 	"VERDE",
92 	"OLAND",
93 	"HAINAN",
94 	"BONAIRE",
95 	"KAVERI",
96 	"KABINI",
97 	"HAWAII",
98 	"MULLINS",
99 	"TOPAZ",
100 	"TONGA",
101 	"FIJI",
102 	"CARRIZO",
103 	"STONEY",
104 	"POLARIS10",
105 	"POLARIS11",
106 	"POLARIS12",
107 	"VEGAM",
108 	"VEGA10",
109 	"VEGA12",
110 	"VEGA20",
111 	"RAVEN",
112 	"ARCTURUS",
113 	"RENOIR",
114 	"NAVI10",
115 	"NAVI14",
116 	"NAVI12",
117 	"SIENNA_CICHLID",
118 	"NAVY_FLOUNDER",
119 	"LAST",
120 };
121 
122 /**
123  * DOC: pcie_replay_count
124  *
125  * The amdgpu driver provides a sysfs API for reporting the total number
126  * of PCIe replays (NAKs)
127  * The file pcie_replay_count is used for this and returns the total
128  * number of replays as a sum of the NAKs generated and NAKs received
129  */
130 
131 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
132 		struct device_attribute *attr, char *buf)
133 {
134 	struct drm_device *ddev = dev_get_drvdata(dev);
135 	struct amdgpu_device *adev = drm_to_adev(ddev);
136 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
137 
138 	return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
139 }
140 
141 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
142 		amdgpu_device_get_pcie_replay_count, NULL);
143 
144 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
145 
146 /**
147  * DOC: product_name
148  *
149  * The amdgpu driver provides a sysfs API for reporting the product name
150  * for the device
151  * The file serial_number is used for this and returns the product name
152  * as returned from the FRU.
153  * NOTE: This is only available for certain server cards
154  */
155 
156 static ssize_t amdgpu_device_get_product_name(struct device *dev,
157 		struct device_attribute *attr, char *buf)
158 {
159 	struct drm_device *ddev = dev_get_drvdata(dev);
160 	struct amdgpu_device *adev = drm_to_adev(ddev);
161 
162 	return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
163 }
164 
165 static DEVICE_ATTR(product_name, S_IRUGO,
166 		amdgpu_device_get_product_name, NULL);
167 
168 /**
169  * DOC: product_number
170  *
171  * The amdgpu driver provides a sysfs API for reporting the part number
172  * for the device
173  * The file serial_number is used for this and returns the part number
174  * as returned from the FRU.
175  * NOTE: This is only available for certain server cards
176  */
177 
178 static ssize_t amdgpu_device_get_product_number(struct device *dev,
179 		struct device_attribute *attr, char *buf)
180 {
181 	struct drm_device *ddev = dev_get_drvdata(dev);
182 	struct amdgpu_device *adev = drm_to_adev(ddev);
183 
184 	return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
185 }
186 
187 static DEVICE_ATTR(product_number, S_IRUGO,
188 		amdgpu_device_get_product_number, NULL);
189 
190 /**
191  * DOC: serial_number
192  *
193  * The amdgpu driver provides a sysfs API for reporting the serial number
194  * for the device
195  * The file serial_number is used for this and returns the serial number
196  * as returned from the FRU.
197  * NOTE: This is only available for certain server cards
198  */
199 
200 static ssize_t amdgpu_device_get_serial_number(struct device *dev,
201 		struct device_attribute *attr, char *buf)
202 {
203 	struct drm_device *ddev = dev_get_drvdata(dev);
204 	struct amdgpu_device *adev = drm_to_adev(ddev);
205 
206 	return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
207 }
208 
209 static DEVICE_ATTR(serial_number, S_IRUGO,
210 		amdgpu_device_get_serial_number, NULL);
211 
212 /**
213  * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
214  *
215  * @dev: drm_device pointer
216  *
217  * Returns true if the device is a dGPU with HG/PX power control,
218  * otherwise return false.
219  */
220 bool amdgpu_device_supports_boco(struct drm_device *dev)
221 {
222 	struct amdgpu_device *adev = drm_to_adev(dev);
223 
224 	if (adev->flags & AMD_IS_PX)
225 		return true;
226 	return false;
227 }
228 
229 /**
230  * amdgpu_device_supports_baco - Does the device support BACO
231  *
232  * @dev: drm_device pointer
233  *
234  * Returns true if the device supporte BACO,
235  * otherwise return false.
236  */
237 bool amdgpu_device_supports_baco(struct drm_device *dev)
238 {
239 	struct amdgpu_device *adev = drm_to_adev(dev);
240 
241 	return amdgpu_asic_supports_baco(adev);
242 }
243 
244 /**
245  * VRAM access helper functions.
246  *
247  * amdgpu_device_vram_access - read/write a buffer in vram
248  *
249  * @adev: amdgpu_device pointer
250  * @pos: offset of the buffer in vram
251  * @buf: virtual address of the buffer in system memory
252  * @size: read/write size, sizeof(@buf) must > @size
253  * @write: true - write to vram, otherwise - read from vram
254  */
255 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
256 			       uint32_t *buf, size_t size, bool write)
257 {
258 	unsigned long flags;
259 	uint32_t hi = ~0;
260 	uint64_t last;
261 
262 
263 #ifdef CONFIG_64BIT
264 	last = min(pos + size, adev->gmc.visible_vram_size);
265 	if (last > pos) {
266 		void __iomem *addr = adev->mman.aper_base_kaddr + pos;
267 		size_t count = last - pos;
268 
269 		if (write) {
270 			memcpy_toio(addr, buf, count);
271 			mb();
272 			amdgpu_asic_flush_hdp(adev, NULL);
273 		} else {
274 			amdgpu_asic_invalidate_hdp(adev, NULL);
275 			mb();
276 			memcpy_fromio(buf, addr, count);
277 		}
278 
279 		if (count == size)
280 			return;
281 
282 		pos += count;
283 		buf += count / 4;
284 		size -= count;
285 	}
286 #endif
287 
288 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
289 	for (last = pos + size; pos < last; pos += 4) {
290 		uint32_t tmp = pos >> 31;
291 
292 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
293 		if (tmp != hi) {
294 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
295 			hi = tmp;
296 		}
297 		if (write)
298 			WREG32_NO_KIQ(mmMM_DATA, *buf++);
299 		else
300 			*buf++ = RREG32_NO_KIQ(mmMM_DATA);
301 	}
302 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
303 }
304 
305 /*
306  * MMIO register access helper functions.
307  */
308 /**
309  * amdgpu_mm_rreg - read a memory mapped IO register
310  *
311  * @adev: amdgpu_device pointer
312  * @reg: dword aligned register offset
313  * @acc_flags: access flags which require special behavior
314  *
315  * Returns the 32 bit value from the offset specified.
316  */
317 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
318 			uint32_t acc_flags)
319 {
320 	uint32_t ret;
321 
322 	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) &&
323 	    down_read_trylock(&adev->reset_sem)) {
324 		ret = amdgpu_kiq_rreg(adev, reg);
325 		up_read(&adev->reset_sem);
326 		return ret;
327 	}
328 
329 	if ((reg * 4) < adev->rmmio_size)
330 		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
331 	else {
332 		unsigned long flags;
333 
334 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
335 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
336 		ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
337 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
338 	}
339 
340 	trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
341 	return ret;
342 }
343 
344 /*
345  * MMIO register read with bytes helper functions
346  * @offset:bytes offset from MMIO start
347  *
348 */
349 
350 /**
351  * amdgpu_mm_rreg8 - read a memory mapped IO register
352  *
353  * @adev: amdgpu_device pointer
354  * @offset: byte aligned register offset
355  *
356  * Returns the 8 bit value from the offset specified.
357  */
358 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
359 	if (offset < adev->rmmio_size)
360 		return (readb(adev->rmmio + offset));
361 	BUG();
362 }
363 
364 /*
365  * MMIO register write with bytes helper functions
366  * @offset:bytes offset from MMIO start
367  * @value: the value want to be written to the register
368  *
369 */
370 /**
371  * amdgpu_mm_wreg8 - read a memory mapped IO register
372  *
373  * @adev: amdgpu_device pointer
374  * @offset: byte aligned register offset
375  * @value: 8 bit value to write
376  *
377  * Writes the value specified to the offset specified.
378  */
379 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
380 	if (offset < adev->rmmio_size)
381 		writeb(value, adev->rmmio + offset);
382 	else
383 		BUG();
384 }
385 
386 static inline void amdgpu_mm_wreg_mmio(struct amdgpu_device *adev,
387 				       uint32_t reg, uint32_t v,
388 				       uint32_t acc_flags)
389 {
390 	trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
391 
392 	if ((reg * 4) < adev->rmmio_size)
393 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
394 	else {
395 		unsigned long flags;
396 
397 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
398 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
399 		writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
400 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
401 	}
402 }
403 
404 /**
405  * amdgpu_mm_wreg - write to a memory mapped IO register
406  *
407  * @adev: amdgpu_device pointer
408  * @reg: dword aligned register offset
409  * @v: 32 bit value to write to the register
410  * @acc_flags: access flags which require special behavior
411  *
412  * Writes the value specified to the offset specified.
413  */
414 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
415 		    uint32_t acc_flags)
416 {
417 	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) &&
418 	    down_read_trylock(&adev->reset_sem)) {
419 		amdgpu_kiq_wreg(adev, reg, v);
420 		up_read(&adev->reset_sem);
421 		return;
422 	}
423 
424 	amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
425 }
426 
427 /*
428  * amdgpu_mm_wreg_mmio_rlc -  write register either with mmio or with RLC path if in range
429  *
430  * this function is invoked only the debugfs register access
431  * */
432 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
433 		    uint32_t acc_flags)
434 {
435 	if (amdgpu_sriov_fullaccess(adev) &&
436 		adev->gfx.rlc.funcs &&
437 		adev->gfx.rlc.funcs->is_rlcg_access_range) {
438 
439 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
440 			return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
441 	}
442 
443 	amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
444 }
445 
446 /**
447  * amdgpu_io_rreg - read an IO register
448  *
449  * @adev: amdgpu_device pointer
450  * @reg: dword aligned register offset
451  *
452  * Returns the 32 bit value from the offset specified.
453  */
454 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
455 {
456 	if ((reg * 4) < adev->rio_mem_size)
457 		return ioread32(adev->rio_mem + (reg * 4));
458 	else {
459 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
460 		return ioread32(adev->rio_mem + (mmMM_DATA * 4));
461 	}
462 }
463 
464 /**
465  * amdgpu_io_wreg - write to an IO register
466  *
467  * @adev: amdgpu_device pointer
468  * @reg: dword aligned register offset
469  * @v: 32 bit value to write to the register
470  *
471  * Writes the value specified to the offset specified.
472  */
473 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
474 {
475 	if ((reg * 4) < adev->rio_mem_size)
476 		iowrite32(v, adev->rio_mem + (reg * 4));
477 	else {
478 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
479 		iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
480 	}
481 }
482 
483 /**
484  * amdgpu_mm_rdoorbell - read a doorbell dword
485  *
486  * @adev: amdgpu_device pointer
487  * @index: doorbell index
488  *
489  * Returns the value in the doorbell aperture at the
490  * requested doorbell index (CIK).
491  */
492 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
493 {
494 	if (index < adev->doorbell.num_doorbells) {
495 		return readl(adev->doorbell.ptr + index);
496 	} else {
497 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
498 		return 0;
499 	}
500 }
501 
502 /**
503  * amdgpu_mm_wdoorbell - write a doorbell dword
504  *
505  * @adev: amdgpu_device pointer
506  * @index: doorbell index
507  * @v: value to write
508  *
509  * Writes @v to the doorbell aperture at the
510  * requested doorbell index (CIK).
511  */
512 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
513 {
514 	if (index < adev->doorbell.num_doorbells) {
515 		writel(v, adev->doorbell.ptr + index);
516 	} else {
517 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
518 	}
519 }
520 
521 /**
522  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
523  *
524  * @adev: amdgpu_device pointer
525  * @index: doorbell index
526  *
527  * Returns the value in the doorbell aperture at the
528  * requested doorbell index (VEGA10+).
529  */
530 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
531 {
532 	if (index < adev->doorbell.num_doorbells) {
533 		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
534 	} else {
535 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
536 		return 0;
537 	}
538 }
539 
540 /**
541  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
542  *
543  * @adev: amdgpu_device pointer
544  * @index: doorbell index
545  * @v: value to write
546  *
547  * Writes @v to the doorbell aperture at the
548  * requested doorbell index (VEGA10+).
549  */
550 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
551 {
552 	if (index < adev->doorbell.num_doorbells) {
553 		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
554 	} else {
555 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
556 	}
557 }
558 
559 /**
560  * amdgpu_invalid_rreg - dummy reg read function
561  *
562  * @adev: amdgpu device pointer
563  * @reg: offset of register
564  *
565  * Dummy register read function.  Used for register blocks
566  * that certain asics don't have (all asics).
567  * Returns the value in the register.
568  */
569 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
570 {
571 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
572 	BUG();
573 	return 0;
574 }
575 
576 /**
577  * amdgpu_invalid_wreg - dummy reg write function
578  *
579  * @adev: amdgpu device pointer
580  * @reg: offset of register
581  * @v: value to write to the register
582  *
583  * Dummy register read function.  Used for register blocks
584  * that certain asics don't have (all asics).
585  */
586 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
587 {
588 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
589 		  reg, v);
590 	BUG();
591 }
592 
593 /**
594  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
595  *
596  * @adev: amdgpu device pointer
597  * @reg: offset of register
598  *
599  * Dummy register read function.  Used for register blocks
600  * that certain asics don't have (all asics).
601  * Returns the value in the register.
602  */
603 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
604 {
605 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
606 	BUG();
607 	return 0;
608 }
609 
610 /**
611  * amdgpu_invalid_wreg64 - dummy reg write function
612  *
613  * @adev: amdgpu device pointer
614  * @reg: offset of register
615  * @v: value to write to the register
616  *
617  * Dummy register read function.  Used for register blocks
618  * that certain asics don't have (all asics).
619  */
620 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
621 {
622 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
623 		  reg, v);
624 	BUG();
625 }
626 
627 /**
628  * amdgpu_block_invalid_rreg - dummy reg read function
629  *
630  * @adev: amdgpu device pointer
631  * @block: offset of instance
632  * @reg: offset of register
633  *
634  * Dummy register read function.  Used for register blocks
635  * that certain asics don't have (all asics).
636  * Returns the value in the register.
637  */
638 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
639 					  uint32_t block, uint32_t reg)
640 {
641 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
642 		  reg, block);
643 	BUG();
644 	return 0;
645 }
646 
647 /**
648  * amdgpu_block_invalid_wreg - dummy reg write function
649  *
650  * @adev: amdgpu device pointer
651  * @block: offset of instance
652  * @reg: offset of register
653  * @v: value to write to the register
654  *
655  * Dummy register read function.  Used for register blocks
656  * that certain asics don't have (all asics).
657  */
658 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
659 				      uint32_t block,
660 				      uint32_t reg, uint32_t v)
661 {
662 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
663 		  reg, block, v);
664 	BUG();
665 }
666 
667 /**
668  * amdgpu_device_asic_init - Wrapper for atom asic_init
669  *
670  * @dev: drm_device pointer
671  *
672  * Does any asic specific work and then calls atom asic init.
673  */
674 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
675 {
676 	amdgpu_asic_pre_asic_init(adev);
677 
678 	return amdgpu_atom_asic_init(adev->mode_info.atom_context);
679 }
680 
681 /**
682  * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
683  *
684  * @adev: amdgpu device pointer
685  *
686  * Allocates a scratch page of VRAM for use by various things in the
687  * driver.
688  */
689 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
690 {
691 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
692 				       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
693 				       &adev->vram_scratch.robj,
694 				       &adev->vram_scratch.gpu_addr,
695 				       (void **)&adev->vram_scratch.ptr);
696 }
697 
698 /**
699  * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
700  *
701  * @adev: amdgpu device pointer
702  *
703  * Frees the VRAM scratch page.
704  */
705 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
706 {
707 	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
708 }
709 
710 /**
711  * amdgpu_device_program_register_sequence - program an array of registers.
712  *
713  * @adev: amdgpu_device pointer
714  * @registers: pointer to the register array
715  * @array_size: size of the register array
716  *
717  * Programs an array or registers with and and or masks.
718  * This is a helper for setting golden registers.
719  */
720 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
721 					     const u32 *registers,
722 					     const u32 array_size)
723 {
724 	u32 tmp, reg, and_mask, or_mask;
725 	int i;
726 
727 	if (array_size % 3)
728 		return;
729 
730 	for (i = 0; i < array_size; i +=3) {
731 		reg = registers[i + 0];
732 		and_mask = registers[i + 1];
733 		or_mask = registers[i + 2];
734 
735 		if (and_mask == 0xffffffff) {
736 			tmp = or_mask;
737 		} else {
738 			tmp = RREG32(reg);
739 			tmp &= ~and_mask;
740 			if (adev->family >= AMDGPU_FAMILY_AI)
741 				tmp |= (or_mask & and_mask);
742 			else
743 				tmp |= or_mask;
744 		}
745 		WREG32(reg, tmp);
746 	}
747 }
748 
749 /**
750  * amdgpu_device_pci_config_reset - reset the GPU
751  *
752  * @adev: amdgpu_device pointer
753  *
754  * Resets the GPU using the pci config reset sequence.
755  * Only applicable to asics prior to vega10.
756  */
757 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
758 {
759 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
760 }
761 
762 /*
763  * GPU doorbell aperture helpers function.
764  */
765 /**
766  * amdgpu_device_doorbell_init - Init doorbell driver information.
767  *
768  * @adev: amdgpu_device pointer
769  *
770  * Init doorbell driver information (CIK)
771  * Returns 0 on success, error on failure.
772  */
773 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
774 {
775 
776 	/* No doorbell on SI hardware generation */
777 	if (adev->asic_type < CHIP_BONAIRE) {
778 		adev->doorbell.base = 0;
779 		adev->doorbell.size = 0;
780 		adev->doorbell.num_doorbells = 0;
781 		adev->doorbell.ptr = NULL;
782 		return 0;
783 	}
784 
785 	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
786 		return -EINVAL;
787 
788 	amdgpu_asic_init_doorbell_index(adev);
789 
790 	/* doorbell bar mapping */
791 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
792 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
793 
794 	adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
795 					     adev->doorbell_index.max_assignment+1);
796 	if (adev->doorbell.num_doorbells == 0)
797 		return -EINVAL;
798 
799 	/* For Vega, reserve and map two pages on doorbell BAR since SDMA
800 	 * paging queue doorbell use the second page. The
801 	 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
802 	 * doorbells are in the first page. So with paging queue enabled,
803 	 * the max num_doorbells should + 1 page (0x400 in dword)
804 	 */
805 	if (adev->asic_type >= CHIP_VEGA10)
806 		adev->doorbell.num_doorbells += 0x400;
807 
808 	adev->doorbell.ptr = ioremap(adev->doorbell.base,
809 				     adev->doorbell.num_doorbells *
810 				     sizeof(u32));
811 	if (adev->doorbell.ptr == NULL)
812 		return -ENOMEM;
813 
814 	return 0;
815 }
816 
817 /**
818  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
819  *
820  * @adev: amdgpu_device pointer
821  *
822  * Tear down doorbell driver information (CIK)
823  */
824 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
825 {
826 	iounmap(adev->doorbell.ptr);
827 	adev->doorbell.ptr = NULL;
828 }
829 
830 
831 
832 /*
833  * amdgpu_device_wb_*()
834  * Writeback is the method by which the GPU updates special pages in memory
835  * with the status of certain GPU events (fences, ring pointers,etc.).
836  */
837 
838 /**
839  * amdgpu_device_wb_fini - Disable Writeback and free memory
840  *
841  * @adev: amdgpu_device pointer
842  *
843  * Disables Writeback and frees the Writeback memory (all asics).
844  * Used at driver shutdown.
845  */
846 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
847 {
848 	if (adev->wb.wb_obj) {
849 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
850 				      &adev->wb.gpu_addr,
851 				      (void **)&adev->wb.wb);
852 		adev->wb.wb_obj = NULL;
853 	}
854 }
855 
856 /**
857  * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
858  *
859  * @adev: amdgpu_device pointer
860  *
861  * Initializes writeback and allocates writeback memory (all asics).
862  * Used at driver startup.
863  * Returns 0 on success or an -error on failure.
864  */
865 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
866 {
867 	int r;
868 
869 	if (adev->wb.wb_obj == NULL) {
870 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
871 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
872 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
873 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
874 					    (void **)&adev->wb.wb);
875 		if (r) {
876 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
877 			return r;
878 		}
879 
880 		adev->wb.num_wb = AMDGPU_MAX_WB;
881 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
882 
883 		/* clear wb memory */
884 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
885 	}
886 
887 	return 0;
888 }
889 
890 /**
891  * amdgpu_device_wb_get - Allocate a wb entry
892  *
893  * @adev: amdgpu_device pointer
894  * @wb: wb index
895  *
896  * Allocate a wb slot for use by the driver (all asics).
897  * Returns 0 on success or -EINVAL on failure.
898  */
899 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
900 {
901 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
902 
903 	if (offset < adev->wb.num_wb) {
904 		__set_bit(offset, adev->wb.used);
905 		*wb = offset << 3; /* convert to dw offset */
906 		return 0;
907 	} else {
908 		return -EINVAL;
909 	}
910 }
911 
912 /**
913  * amdgpu_device_wb_free - Free a wb entry
914  *
915  * @adev: amdgpu_device pointer
916  * @wb: wb index
917  *
918  * Free a wb slot allocated for use by the driver (all asics)
919  */
920 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
921 {
922 	wb >>= 3;
923 	if (wb < adev->wb.num_wb)
924 		__clear_bit(wb, adev->wb.used);
925 }
926 
927 /**
928  * amdgpu_device_resize_fb_bar - try to resize FB BAR
929  *
930  * @adev: amdgpu_device pointer
931  *
932  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
933  * to fail, but if any of the BARs is not accessible after the size we abort
934  * driver loading by returning -ENODEV.
935  */
936 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
937 {
938 	u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
939 	u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
940 	struct pci_bus *root;
941 	struct resource *res;
942 	unsigned i;
943 	u16 cmd;
944 	int r;
945 
946 	/* Bypass for VF */
947 	if (amdgpu_sriov_vf(adev))
948 		return 0;
949 
950 	/* skip if the bios has already enabled large BAR */
951 	if (adev->gmc.real_vram_size &&
952 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
953 		return 0;
954 
955 	/* Check if the root BUS has 64bit memory resources */
956 	root = adev->pdev->bus;
957 	while (root->parent)
958 		root = root->parent;
959 
960 	pci_bus_for_each_resource(root, res, i) {
961 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
962 		    res->start > 0x100000000ull)
963 			break;
964 	}
965 
966 	/* Trying to resize is pointless without a root hub window above 4GB */
967 	if (!res)
968 		return 0;
969 
970 	/* Disable memory decoding while we change the BAR addresses and size */
971 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
972 	pci_write_config_word(adev->pdev, PCI_COMMAND,
973 			      cmd & ~PCI_COMMAND_MEMORY);
974 
975 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
976 	amdgpu_device_doorbell_fini(adev);
977 	if (adev->asic_type >= CHIP_BONAIRE)
978 		pci_release_resource(adev->pdev, 2);
979 
980 	pci_release_resource(adev->pdev, 0);
981 
982 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
983 	if (r == -ENOSPC)
984 		DRM_INFO("Not enough PCI address space for a large BAR.");
985 	else if (r && r != -ENOTSUPP)
986 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
987 
988 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
989 
990 	/* When the doorbell or fb BAR isn't available we have no chance of
991 	 * using the device.
992 	 */
993 	r = amdgpu_device_doorbell_init(adev);
994 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
995 		return -ENODEV;
996 
997 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
998 
999 	return 0;
1000 }
1001 
1002 /*
1003  * GPU helpers function.
1004  */
1005 /**
1006  * amdgpu_device_need_post - check if the hw need post or not
1007  *
1008  * @adev: amdgpu_device pointer
1009  *
1010  * Check if the asic has been initialized (all asics) at driver startup
1011  * or post is needed if  hw reset is performed.
1012  * Returns true if need or false if not.
1013  */
1014 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1015 {
1016 	uint32_t reg;
1017 
1018 	if (amdgpu_sriov_vf(adev))
1019 		return false;
1020 
1021 	if (amdgpu_passthrough(adev)) {
1022 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1023 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1024 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1025 		 * vpost executed for smc version below 22.15
1026 		 */
1027 		if (adev->asic_type == CHIP_FIJI) {
1028 			int err;
1029 			uint32_t fw_ver;
1030 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1031 			/* force vPost if error occured */
1032 			if (err)
1033 				return true;
1034 
1035 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1036 			if (fw_ver < 0x00160e00)
1037 				return true;
1038 		}
1039 	}
1040 
1041 	if (adev->has_hw_reset) {
1042 		adev->has_hw_reset = false;
1043 		return true;
1044 	}
1045 
1046 	/* bios scratch used on CIK+ */
1047 	if (adev->asic_type >= CHIP_BONAIRE)
1048 		return amdgpu_atombios_scratch_need_asic_init(adev);
1049 
1050 	/* check MEM_SIZE for older asics */
1051 	reg = amdgpu_asic_get_config_memsize(adev);
1052 
1053 	if ((reg != 0) && (reg != 0xffffffff))
1054 		return false;
1055 
1056 	return true;
1057 }
1058 
1059 /* if we get transitioned to only one device, take VGA back */
1060 /**
1061  * amdgpu_device_vga_set_decode - enable/disable vga decode
1062  *
1063  * @cookie: amdgpu_device pointer
1064  * @state: enable/disable vga decode
1065  *
1066  * Enable/disable vga decode (all asics).
1067  * Returns VGA resource flags.
1068  */
1069 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
1070 {
1071 	struct amdgpu_device *adev = cookie;
1072 	amdgpu_asic_set_vga_state(adev, state);
1073 	if (state)
1074 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1075 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1076 	else
1077 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1078 }
1079 
1080 /**
1081  * amdgpu_device_check_block_size - validate the vm block size
1082  *
1083  * @adev: amdgpu_device pointer
1084  *
1085  * Validates the vm block size specified via module parameter.
1086  * The vm block size defines number of bits in page table versus page directory,
1087  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1088  * page table and the remaining bits are in the page directory.
1089  */
1090 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1091 {
1092 	/* defines number of bits in page table versus page directory,
1093 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1094 	 * page table and the remaining bits are in the page directory */
1095 	if (amdgpu_vm_block_size == -1)
1096 		return;
1097 
1098 	if (amdgpu_vm_block_size < 9) {
1099 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1100 			 amdgpu_vm_block_size);
1101 		amdgpu_vm_block_size = -1;
1102 	}
1103 }
1104 
1105 /**
1106  * amdgpu_device_check_vm_size - validate the vm size
1107  *
1108  * @adev: amdgpu_device pointer
1109  *
1110  * Validates the vm size in GB specified via module parameter.
1111  * The VM size is the size of the GPU virtual memory space in GB.
1112  */
1113 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1114 {
1115 	/* no need to check the default value */
1116 	if (amdgpu_vm_size == -1)
1117 		return;
1118 
1119 	if (amdgpu_vm_size < 1) {
1120 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1121 			 amdgpu_vm_size);
1122 		amdgpu_vm_size = -1;
1123 	}
1124 }
1125 
1126 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1127 {
1128 	struct sysinfo si;
1129 	bool is_os_64 = (sizeof(void *) == 8);
1130 	uint64_t total_memory;
1131 	uint64_t dram_size_seven_GB = 0x1B8000000;
1132 	uint64_t dram_size_three_GB = 0xB8000000;
1133 
1134 	if (amdgpu_smu_memory_pool_size == 0)
1135 		return;
1136 
1137 	if (!is_os_64) {
1138 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1139 		goto def_value;
1140 	}
1141 	si_meminfo(&si);
1142 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1143 
1144 	if ((amdgpu_smu_memory_pool_size == 1) ||
1145 		(amdgpu_smu_memory_pool_size == 2)) {
1146 		if (total_memory < dram_size_three_GB)
1147 			goto def_value1;
1148 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1149 		(amdgpu_smu_memory_pool_size == 8)) {
1150 		if (total_memory < dram_size_seven_GB)
1151 			goto def_value1;
1152 	} else {
1153 		DRM_WARN("Smu memory pool size not supported\n");
1154 		goto def_value;
1155 	}
1156 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1157 
1158 	return;
1159 
1160 def_value1:
1161 	DRM_WARN("No enough system memory\n");
1162 def_value:
1163 	adev->pm.smu_prv_buffer_size = 0;
1164 }
1165 
1166 /**
1167  * amdgpu_device_check_arguments - validate module params
1168  *
1169  * @adev: amdgpu_device pointer
1170  *
1171  * Validates certain module parameters and updates
1172  * the associated values used by the driver (all asics).
1173  */
1174 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1175 {
1176 	if (amdgpu_sched_jobs < 4) {
1177 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1178 			 amdgpu_sched_jobs);
1179 		amdgpu_sched_jobs = 4;
1180 	} else if (!is_power_of_2(amdgpu_sched_jobs)){
1181 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1182 			 amdgpu_sched_jobs);
1183 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1184 	}
1185 
1186 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1187 		/* gart size must be greater or equal to 32M */
1188 		dev_warn(adev->dev, "gart size (%d) too small\n",
1189 			 amdgpu_gart_size);
1190 		amdgpu_gart_size = -1;
1191 	}
1192 
1193 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1194 		/* gtt size must be greater or equal to 32M */
1195 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1196 				 amdgpu_gtt_size);
1197 		amdgpu_gtt_size = -1;
1198 	}
1199 
1200 	/* valid range is between 4 and 9 inclusive */
1201 	if (amdgpu_vm_fragment_size != -1 &&
1202 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1203 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1204 		amdgpu_vm_fragment_size = -1;
1205 	}
1206 
1207 	if (amdgpu_sched_hw_submission < 2) {
1208 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1209 			 amdgpu_sched_hw_submission);
1210 		amdgpu_sched_hw_submission = 2;
1211 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1212 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1213 			 amdgpu_sched_hw_submission);
1214 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1215 	}
1216 
1217 	amdgpu_device_check_smu_prv_buffer_size(adev);
1218 
1219 	amdgpu_device_check_vm_size(adev);
1220 
1221 	amdgpu_device_check_block_size(adev);
1222 
1223 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1224 
1225 	amdgpu_gmc_tmz_set(adev);
1226 
1227 	if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
1228 		amdgpu_num_kcq = 8;
1229 		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
1230 	}
1231 
1232 	return 0;
1233 }
1234 
1235 /**
1236  * amdgpu_switcheroo_set_state - set switcheroo state
1237  *
1238  * @pdev: pci dev pointer
1239  * @state: vga_switcheroo state
1240  *
1241  * Callback for the switcheroo driver.  Suspends or resumes the
1242  * the asics before or after it is powered up using ACPI methods.
1243  */
1244 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1245 					enum vga_switcheroo_state state)
1246 {
1247 	struct drm_device *dev = pci_get_drvdata(pdev);
1248 	int r;
1249 
1250 	if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
1251 		return;
1252 
1253 	if (state == VGA_SWITCHEROO_ON) {
1254 		pr_info("switched on\n");
1255 		/* don't suspend or resume card normally */
1256 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1257 
1258 		pci_set_power_state(dev->pdev, PCI_D0);
1259 		pci_restore_state(dev->pdev);
1260 		r = pci_enable_device(dev->pdev);
1261 		if (r)
1262 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1263 		amdgpu_device_resume(dev, true);
1264 
1265 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1266 		drm_kms_helper_poll_enable(dev);
1267 	} else {
1268 		pr_info("switched off\n");
1269 		drm_kms_helper_poll_disable(dev);
1270 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1271 		amdgpu_device_suspend(dev, true);
1272 		pci_save_state(dev->pdev);
1273 		/* Shut down the device */
1274 		pci_disable_device(dev->pdev);
1275 		pci_set_power_state(dev->pdev, PCI_D3cold);
1276 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1277 	}
1278 }
1279 
1280 /**
1281  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1282  *
1283  * @pdev: pci dev pointer
1284  *
1285  * Callback for the switcheroo driver.  Check of the switcheroo
1286  * state can be changed.
1287  * Returns true if the state can be changed, false if not.
1288  */
1289 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1290 {
1291 	struct drm_device *dev = pci_get_drvdata(pdev);
1292 
1293 	/*
1294 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1295 	* locking inversion with the driver load path. And the access here is
1296 	* completely racy anyway. So don't bother with locking for now.
1297 	*/
1298 	return atomic_read(&dev->open_count) == 0;
1299 }
1300 
1301 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1302 	.set_gpu_state = amdgpu_switcheroo_set_state,
1303 	.reprobe = NULL,
1304 	.can_switch = amdgpu_switcheroo_can_switch,
1305 };
1306 
1307 /**
1308  * amdgpu_device_ip_set_clockgating_state - set the CG state
1309  *
1310  * @dev: amdgpu_device pointer
1311  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1312  * @state: clockgating state (gate or ungate)
1313  *
1314  * Sets the requested clockgating state for all instances of
1315  * the hardware IP specified.
1316  * Returns the error code from the last instance.
1317  */
1318 int amdgpu_device_ip_set_clockgating_state(void *dev,
1319 					   enum amd_ip_block_type block_type,
1320 					   enum amd_clockgating_state state)
1321 {
1322 	struct amdgpu_device *adev = dev;
1323 	int i, r = 0;
1324 
1325 	for (i = 0; i < adev->num_ip_blocks; i++) {
1326 		if (!adev->ip_blocks[i].status.valid)
1327 			continue;
1328 		if (adev->ip_blocks[i].version->type != block_type)
1329 			continue;
1330 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1331 			continue;
1332 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1333 			(void *)adev, state);
1334 		if (r)
1335 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1336 				  adev->ip_blocks[i].version->funcs->name, r);
1337 	}
1338 	return r;
1339 }
1340 
1341 /**
1342  * amdgpu_device_ip_set_powergating_state - set the PG state
1343  *
1344  * @dev: amdgpu_device pointer
1345  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1346  * @state: powergating state (gate or ungate)
1347  *
1348  * Sets the requested powergating state for all instances of
1349  * the hardware IP specified.
1350  * Returns the error code from the last instance.
1351  */
1352 int amdgpu_device_ip_set_powergating_state(void *dev,
1353 					   enum amd_ip_block_type block_type,
1354 					   enum amd_powergating_state state)
1355 {
1356 	struct amdgpu_device *adev = dev;
1357 	int i, r = 0;
1358 
1359 	for (i = 0; i < adev->num_ip_blocks; i++) {
1360 		if (!adev->ip_blocks[i].status.valid)
1361 			continue;
1362 		if (adev->ip_blocks[i].version->type != block_type)
1363 			continue;
1364 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1365 			continue;
1366 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1367 			(void *)adev, state);
1368 		if (r)
1369 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1370 				  adev->ip_blocks[i].version->funcs->name, r);
1371 	}
1372 	return r;
1373 }
1374 
1375 /**
1376  * amdgpu_device_ip_get_clockgating_state - get the CG state
1377  *
1378  * @adev: amdgpu_device pointer
1379  * @flags: clockgating feature flags
1380  *
1381  * Walks the list of IPs on the device and updates the clockgating
1382  * flags for each IP.
1383  * Updates @flags with the feature flags for each hardware IP where
1384  * clockgating is enabled.
1385  */
1386 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1387 					    u32 *flags)
1388 {
1389 	int i;
1390 
1391 	for (i = 0; i < adev->num_ip_blocks; i++) {
1392 		if (!adev->ip_blocks[i].status.valid)
1393 			continue;
1394 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1395 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1396 	}
1397 }
1398 
1399 /**
1400  * amdgpu_device_ip_wait_for_idle - wait for idle
1401  *
1402  * @adev: amdgpu_device pointer
1403  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1404  *
1405  * Waits for the request hardware IP to be idle.
1406  * Returns 0 for success or a negative error code on failure.
1407  */
1408 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1409 				   enum amd_ip_block_type block_type)
1410 {
1411 	int i, r;
1412 
1413 	for (i = 0; i < adev->num_ip_blocks; i++) {
1414 		if (!adev->ip_blocks[i].status.valid)
1415 			continue;
1416 		if (adev->ip_blocks[i].version->type == block_type) {
1417 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1418 			if (r)
1419 				return r;
1420 			break;
1421 		}
1422 	}
1423 	return 0;
1424 
1425 }
1426 
1427 /**
1428  * amdgpu_device_ip_is_idle - is the hardware IP idle
1429  *
1430  * @adev: amdgpu_device pointer
1431  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1432  *
1433  * Check if the hardware IP is idle or not.
1434  * Returns true if it the IP is idle, false if not.
1435  */
1436 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1437 			      enum amd_ip_block_type block_type)
1438 {
1439 	int i;
1440 
1441 	for (i = 0; i < adev->num_ip_blocks; i++) {
1442 		if (!adev->ip_blocks[i].status.valid)
1443 			continue;
1444 		if (adev->ip_blocks[i].version->type == block_type)
1445 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1446 	}
1447 	return true;
1448 
1449 }
1450 
1451 /**
1452  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1453  *
1454  * @adev: amdgpu_device pointer
1455  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1456  *
1457  * Returns a pointer to the hardware IP block structure
1458  * if it exists for the asic, otherwise NULL.
1459  */
1460 struct amdgpu_ip_block *
1461 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1462 			      enum amd_ip_block_type type)
1463 {
1464 	int i;
1465 
1466 	for (i = 0; i < adev->num_ip_blocks; i++)
1467 		if (adev->ip_blocks[i].version->type == type)
1468 			return &adev->ip_blocks[i];
1469 
1470 	return NULL;
1471 }
1472 
1473 /**
1474  * amdgpu_device_ip_block_version_cmp
1475  *
1476  * @adev: amdgpu_device pointer
1477  * @type: enum amd_ip_block_type
1478  * @major: major version
1479  * @minor: minor version
1480  *
1481  * return 0 if equal or greater
1482  * return 1 if smaller or the ip_block doesn't exist
1483  */
1484 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1485 				       enum amd_ip_block_type type,
1486 				       u32 major, u32 minor)
1487 {
1488 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1489 
1490 	if (ip_block && ((ip_block->version->major > major) ||
1491 			((ip_block->version->major == major) &&
1492 			(ip_block->version->minor >= minor))))
1493 		return 0;
1494 
1495 	return 1;
1496 }
1497 
1498 /**
1499  * amdgpu_device_ip_block_add
1500  *
1501  * @adev: amdgpu_device pointer
1502  * @ip_block_version: pointer to the IP to add
1503  *
1504  * Adds the IP block driver information to the collection of IPs
1505  * on the asic.
1506  */
1507 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1508 			       const struct amdgpu_ip_block_version *ip_block_version)
1509 {
1510 	if (!ip_block_version)
1511 		return -EINVAL;
1512 
1513 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1514 		  ip_block_version->funcs->name);
1515 
1516 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1517 
1518 	return 0;
1519 }
1520 
1521 /**
1522  * amdgpu_device_enable_virtual_display - enable virtual display feature
1523  *
1524  * @adev: amdgpu_device pointer
1525  *
1526  * Enabled the virtual display feature if the user has enabled it via
1527  * the module parameter virtual_display.  This feature provides a virtual
1528  * display hardware on headless boards or in virtualized environments.
1529  * This function parses and validates the configuration string specified by
1530  * the user and configues the virtual display configuration (number of
1531  * virtual connectors, crtcs, etc.) specified.
1532  */
1533 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1534 {
1535 	adev->enable_virtual_display = false;
1536 
1537 	if (amdgpu_virtual_display) {
1538 		struct drm_device *ddev = adev_to_drm(adev);
1539 		const char *pci_address_name = pci_name(ddev->pdev);
1540 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1541 
1542 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1543 		pciaddstr_tmp = pciaddstr;
1544 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1545 			pciaddname = strsep(&pciaddname_tmp, ",");
1546 			if (!strcmp("all", pciaddname)
1547 			    || !strcmp(pci_address_name, pciaddname)) {
1548 				long num_crtc;
1549 				int res = -1;
1550 
1551 				adev->enable_virtual_display = true;
1552 
1553 				if (pciaddname_tmp)
1554 					res = kstrtol(pciaddname_tmp, 10,
1555 						      &num_crtc);
1556 
1557 				if (!res) {
1558 					if (num_crtc < 1)
1559 						num_crtc = 1;
1560 					if (num_crtc > 6)
1561 						num_crtc = 6;
1562 					adev->mode_info.num_crtc = num_crtc;
1563 				} else {
1564 					adev->mode_info.num_crtc = 1;
1565 				}
1566 				break;
1567 			}
1568 		}
1569 
1570 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1571 			 amdgpu_virtual_display, pci_address_name,
1572 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1573 
1574 		kfree(pciaddstr);
1575 	}
1576 }
1577 
1578 /**
1579  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1580  *
1581  * @adev: amdgpu_device pointer
1582  *
1583  * Parses the asic configuration parameters specified in the gpu info
1584  * firmware and makes them availale to the driver for use in configuring
1585  * the asic.
1586  * Returns 0 on success, -EINVAL on failure.
1587  */
1588 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1589 {
1590 	const char *chip_name;
1591 	char fw_name[40];
1592 	int err;
1593 	const struct gpu_info_firmware_header_v1_0 *hdr;
1594 
1595 	adev->firmware.gpu_info_fw = NULL;
1596 
1597 	if (adev->mman.discovery_bin) {
1598 		amdgpu_discovery_get_gfx_info(adev);
1599 
1600 		/*
1601 		 * FIXME: The bounding box is still needed by Navi12, so
1602 		 * temporarily read it from gpu_info firmware. Should be droped
1603 		 * when DAL no longer needs it.
1604 		 */
1605 		if (adev->asic_type != CHIP_NAVI12)
1606 			return 0;
1607 	}
1608 
1609 	switch (adev->asic_type) {
1610 #ifdef CONFIG_DRM_AMDGPU_SI
1611 	case CHIP_VERDE:
1612 	case CHIP_TAHITI:
1613 	case CHIP_PITCAIRN:
1614 	case CHIP_OLAND:
1615 	case CHIP_HAINAN:
1616 #endif
1617 #ifdef CONFIG_DRM_AMDGPU_CIK
1618 	case CHIP_BONAIRE:
1619 	case CHIP_HAWAII:
1620 	case CHIP_KAVERI:
1621 	case CHIP_KABINI:
1622 	case CHIP_MULLINS:
1623 #endif
1624 	case CHIP_TOPAZ:
1625 	case CHIP_TONGA:
1626 	case CHIP_FIJI:
1627 	case CHIP_POLARIS10:
1628 	case CHIP_POLARIS11:
1629 	case CHIP_POLARIS12:
1630 	case CHIP_VEGAM:
1631 	case CHIP_CARRIZO:
1632 	case CHIP_STONEY:
1633 	case CHIP_VEGA20:
1634 	default:
1635 		return 0;
1636 	case CHIP_VEGA10:
1637 		chip_name = "vega10";
1638 		break;
1639 	case CHIP_VEGA12:
1640 		chip_name = "vega12";
1641 		break;
1642 	case CHIP_RAVEN:
1643 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1644 			chip_name = "raven2";
1645 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1646 			chip_name = "picasso";
1647 		else
1648 			chip_name = "raven";
1649 		break;
1650 	case CHIP_ARCTURUS:
1651 		chip_name = "arcturus";
1652 		break;
1653 	case CHIP_RENOIR:
1654 		chip_name = "renoir";
1655 		break;
1656 	case CHIP_NAVI10:
1657 		chip_name = "navi10";
1658 		break;
1659 	case CHIP_NAVI14:
1660 		chip_name = "navi14";
1661 		break;
1662 	case CHIP_NAVI12:
1663 		chip_name = "navi12";
1664 		break;
1665 	case CHIP_SIENNA_CICHLID:
1666 		chip_name = "sienna_cichlid";
1667 		break;
1668 	case CHIP_NAVY_FLOUNDER:
1669 		chip_name = "navy_flounder";
1670 		break;
1671 	}
1672 
1673 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1674 	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1675 	if (err) {
1676 		dev_err(adev->dev,
1677 			"Failed to load gpu_info firmware \"%s\"\n",
1678 			fw_name);
1679 		goto out;
1680 	}
1681 	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1682 	if (err) {
1683 		dev_err(adev->dev,
1684 			"Failed to validate gpu_info firmware \"%s\"\n",
1685 			fw_name);
1686 		goto out;
1687 	}
1688 
1689 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1690 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1691 
1692 	switch (hdr->version_major) {
1693 	case 1:
1694 	{
1695 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1696 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1697 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1698 
1699 		/*
1700 		 * Should be droped when DAL no longer needs it.
1701 		 */
1702 		if (adev->asic_type == CHIP_NAVI12)
1703 			goto parse_soc_bounding_box;
1704 
1705 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1706 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1707 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1708 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1709 		adev->gfx.config.max_texture_channel_caches =
1710 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1711 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1712 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1713 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1714 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1715 		adev->gfx.config.double_offchip_lds_buf =
1716 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1717 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1718 		adev->gfx.cu_info.max_waves_per_simd =
1719 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1720 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1721 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1722 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1723 		if (hdr->version_minor >= 1) {
1724 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1725 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1726 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1727 			adev->gfx.config.num_sc_per_sh =
1728 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1729 			adev->gfx.config.num_packer_per_sc =
1730 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1731 		}
1732 
1733 parse_soc_bounding_box:
1734 		/*
1735 		 * soc bounding box info is not integrated in disocovery table,
1736 		 * we always need to parse it from gpu info firmware if needed.
1737 		 */
1738 		if (hdr->version_minor == 2) {
1739 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1740 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1741 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1742 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1743 		}
1744 		break;
1745 	}
1746 	default:
1747 		dev_err(adev->dev,
1748 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1749 		err = -EINVAL;
1750 		goto out;
1751 	}
1752 out:
1753 	return err;
1754 }
1755 
1756 /**
1757  * amdgpu_device_ip_early_init - run early init for hardware IPs
1758  *
1759  * @adev: amdgpu_device pointer
1760  *
1761  * Early initialization pass for hardware IPs.  The hardware IPs that make
1762  * up each asic are discovered each IP's early_init callback is run.  This
1763  * is the first stage in initializing the asic.
1764  * Returns 0 on success, negative error code on failure.
1765  */
1766 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1767 {
1768 	int i, r;
1769 
1770 	amdgpu_device_enable_virtual_display(adev);
1771 
1772 	if (amdgpu_sriov_vf(adev)) {
1773 		r = amdgpu_virt_request_full_gpu(adev, true);
1774 		if (r)
1775 			return r;
1776 	}
1777 
1778 	switch (adev->asic_type) {
1779 #ifdef CONFIG_DRM_AMDGPU_SI
1780 	case CHIP_VERDE:
1781 	case CHIP_TAHITI:
1782 	case CHIP_PITCAIRN:
1783 	case CHIP_OLAND:
1784 	case CHIP_HAINAN:
1785 		adev->family = AMDGPU_FAMILY_SI;
1786 		r = si_set_ip_blocks(adev);
1787 		if (r)
1788 			return r;
1789 		break;
1790 #endif
1791 #ifdef CONFIG_DRM_AMDGPU_CIK
1792 	case CHIP_BONAIRE:
1793 	case CHIP_HAWAII:
1794 	case CHIP_KAVERI:
1795 	case CHIP_KABINI:
1796 	case CHIP_MULLINS:
1797 		if (adev->flags & AMD_IS_APU)
1798 			adev->family = AMDGPU_FAMILY_KV;
1799 		else
1800 			adev->family = AMDGPU_FAMILY_CI;
1801 
1802 		r = cik_set_ip_blocks(adev);
1803 		if (r)
1804 			return r;
1805 		break;
1806 #endif
1807 	case CHIP_TOPAZ:
1808 	case CHIP_TONGA:
1809 	case CHIP_FIJI:
1810 	case CHIP_POLARIS10:
1811 	case CHIP_POLARIS11:
1812 	case CHIP_POLARIS12:
1813 	case CHIP_VEGAM:
1814 	case CHIP_CARRIZO:
1815 	case CHIP_STONEY:
1816 		if (adev->flags & AMD_IS_APU)
1817 			adev->family = AMDGPU_FAMILY_CZ;
1818 		else
1819 			adev->family = AMDGPU_FAMILY_VI;
1820 
1821 		r = vi_set_ip_blocks(adev);
1822 		if (r)
1823 			return r;
1824 		break;
1825 	case CHIP_VEGA10:
1826 	case CHIP_VEGA12:
1827 	case CHIP_VEGA20:
1828 	case CHIP_RAVEN:
1829 	case CHIP_ARCTURUS:
1830 	case CHIP_RENOIR:
1831 		if (adev->flags & AMD_IS_APU)
1832 			adev->family = AMDGPU_FAMILY_RV;
1833 		else
1834 			adev->family = AMDGPU_FAMILY_AI;
1835 
1836 		r = soc15_set_ip_blocks(adev);
1837 		if (r)
1838 			return r;
1839 		break;
1840 	case  CHIP_NAVI10:
1841 	case  CHIP_NAVI14:
1842 	case  CHIP_NAVI12:
1843 	case  CHIP_SIENNA_CICHLID:
1844 	case  CHIP_NAVY_FLOUNDER:
1845 		adev->family = AMDGPU_FAMILY_NV;
1846 
1847 		r = nv_set_ip_blocks(adev);
1848 		if (r)
1849 			return r;
1850 		break;
1851 	default:
1852 		/* FIXME: not supported yet */
1853 		return -EINVAL;
1854 	}
1855 
1856 	amdgpu_amdkfd_device_probe(adev);
1857 
1858 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
1859 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
1860 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1861 
1862 	for (i = 0; i < adev->num_ip_blocks; i++) {
1863 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1864 			DRM_ERROR("disabled ip block: %d <%s>\n",
1865 				  i, adev->ip_blocks[i].version->funcs->name);
1866 			adev->ip_blocks[i].status.valid = false;
1867 		} else {
1868 			if (adev->ip_blocks[i].version->funcs->early_init) {
1869 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
1870 				if (r == -ENOENT) {
1871 					adev->ip_blocks[i].status.valid = false;
1872 				} else if (r) {
1873 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
1874 						  adev->ip_blocks[i].version->funcs->name, r);
1875 					return r;
1876 				} else {
1877 					adev->ip_blocks[i].status.valid = true;
1878 				}
1879 			} else {
1880 				adev->ip_blocks[i].status.valid = true;
1881 			}
1882 		}
1883 		/* get the vbios after the asic_funcs are set up */
1884 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1885 			r = amdgpu_device_parse_gpu_info_fw(adev);
1886 			if (r)
1887 				return r;
1888 
1889 			/* Read BIOS */
1890 			if (!amdgpu_get_bios(adev))
1891 				return -EINVAL;
1892 
1893 			r = amdgpu_atombios_init(adev);
1894 			if (r) {
1895 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1896 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1897 				return r;
1898 			}
1899 		}
1900 	}
1901 
1902 	adev->cg_flags &= amdgpu_cg_mask;
1903 	adev->pg_flags &= amdgpu_pg_mask;
1904 
1905 	return 0;
1906 }
1907 
1908 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1909 {
1910 	int i, r;
1911 
1912 	for (i = 0; i < adev->num_ip_blocks; i++) {
1913 		if (!adev->ip_blocks[i].status.sw)
1914 			continue;
1915 		if (adev->ip_blocks[i].status.hw)
1916 			continue;
1917 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1918 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
1919 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1920 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1921 			if (r) {
1922 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1923 					  adev->ip_blocks[i].version->funcs->name, r);
1924 				return r;
1925 			}
1926 			adev->ip_blocks[i].status.hw = true;
1927 		}
1928 	}
1929 
1930 	return 0;
1931 }
1932 
1933 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1934 {
1935 	int i, r;
1936 
1937 	for (i = 0; i < adev->num_ip_blocks; i++) {
1938 		if (!adev->ip_blocks[i].status.sw)
1939 			continue;
1940 		if (adev->ip_blocks[i].status.hw)
1941 			continue;
1942 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1943 		if (r) {
1944 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1945 				  adev->ip_blocks[i].version->funcs->name, r);
1946 			return r;
1947 		}
1948 		adev->ip_blocks[i].status.hw = true;
1949 	}
1950 
1951 	return 0;
1952 }
1953 
1954 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1955 {
1956 	int r = 0;
1957 	int i;
1958 	uint32_t smu_version;
1959 
1960 	if (adev->asic_type >= CHIP_VEGA10) {
1961 		for (i = 0; i < adev->num_ip_blocks; i++) {
1962 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1963 				continue;
1964 
1965 			/* no need to do the fw loading again if already done*/
1966 			if (adev->ip_blocks[i].status.hw == true)
1967 				break;
1968 
1969 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
1970 				r = adev->ip_blocks[i].version->funcs->resume(adev);
1971 				if (r) {
1972 					DRM_ERROR("resume of IP block <%s> failed %d\n",
1973 							  adev->ip_blocks[i].version->funcs->name, r);
1974 					return r;
1975 				}
1976 			} else {
1977 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1978 				if (r) {
1979 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1980 							  adev->ip_blocks[i].version->funcs->name, r);
1981 					return r;
1982 				}
1983 			}
1984 
1985 			adev->ip_blocks[i].status.hw = true;
1986 			break;
1987 		}
1988 	}
1989 
1990 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1991 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
1992 
1993 	return r;
1994 }
1995 
1996 /**
1997  * amdgpu_device_ip_init - run init for hardware IPs
1998  *
1999  * @adev: amdgpu_device pointer
2000  *
2001  * Main initialization pass for hardware IPs.  The list of all the hardware
2002  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2003  * are run.  sw_init initializes the software state associated with each IP
2004  * and hw_init initializes the hardware associated with each IP.
2005  * Returns 0 on success, negative error code on failure.
2006  */
2007 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2008 {
2009 	int i, r;
2010 
2011 	r = amdgpu_ras_init(adev);
2012 	if (r)
2013 		return r;
2014 
2015 	for (i = 0; i < adev->num_ip_blocks; i++) {
2016 		if (!adev->ip_blocks[i].status.valid)
2017 			continue;
2018 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2019 		if (r) {
2020 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2021 				  adev->ip_blocks[i].version->funcs->name, r);
2022 			goto init_failed;
2023 		}
2024 		adev->ip_blocks[i].status.sw = true;
2025 
2026 		/* need to do gmc hw init early so we can allocate gpu mem */
2027 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2028 			r = amdgpu_device_vram_scratch_init(adev);
2029 			if (r) {
2030 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
2031 				goto init_failed;
2032 			}
2033 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2034 			if (r) {
2035 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2036 				goto init_failed;
2037 			}
2038 			r = amdgpu_device_wb_init(adev);
2039 			if (r) {
2040 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2041 				goto init_failed;
2042 			}
2043 			adev->ip_blocks[i].status.hw = true;
2044 
2045 			/* right after GMC hw init, we create CSA */
2046 			if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
2047 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2048 								AMDGPU_GEM_DOMAIN_VRAM,
2049 								AMDGPU_CSA_SIZE);
2050 				if (r) {
2051 					DRM_ERROR("allocate CSA failed %d\n", r);
2052 					goto init_failed;
2053 				}
2054 			}
2055 		}
2056 	}
2057 
2058 	if (amdgpu_sriov_vf(adev))
2059 		amdgpu_virt_init_data_exchange(adev);
2060 
2061 	r = amdgpu_ib_pool_init(adev);
2062 	if (r) {
2063 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2064 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2065 		goto init_failed;
2066 	}
2067 
2068 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2069 	if (r)
2070 		goto init_failed;
2071 
2072 	r = amdgpu_device_ip_hw_init_phase1(adev);
2073 	if (r)
2074 		goto init_failed;
2075 
2076 	r = amdgpu_device_fw_loading(adev);
2077 	if (r)
2078 		goto init_failed;
2079 
2080 	r = amdgpu_device_ip_hw_init_phase2(adev);
2081 	if (r)
2082 		goto init_failed;
2083 
2084 	/*
2085 	 * retired pages will be loaded from eeprom and reserved here,
2086 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2087 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2088 	 * for I2C communication which only true at this point.
2089 	 *
2090 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2091 	 * failure from bad gpu situation and stop amdgpu init process
2092 	 * accordingly. For other failed cases, it will still release all
2093 	 * the resource and print error message, rather than returning one
2094 	 * negative value to upper level.
2095 	 *
2096 	 * Note: theoretically, this should be called before all vram allocations
2097 	 * to protect retired page from abusing
2098 	 */
2099 	r = amdgpu_ras_recovery_init(adev);
2100 	if (r)
2101 		goto init_failed;
2102 
2103 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2104 		amdgpu_xgmi_add_device(adev);
2105 	amdgpu_amdkfd_device_init(adev);
2106 
2107 	amdgpu_fru_get_product_info(adev);
2108 
2109 init_failed:
2110 	if (amdgpu_sriov_vf(adev))
2111 		amdgpu_virt_release_full_gpu(adev, true);
2112 
2113 	return r;
2114 }
2115 
2116 /**
2117  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2118  *
2119  * @adev: amdgpu_device pointer
2120  *
2121  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2122  * this function before a GPU reset.  If the value is retained after a
2123  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2124  */
2125 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2126 {
2127 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2128 }
2129 
2130 /**
2131  * amdgpu_device_check_vram_lost - check if vram is valid
2132  *
2133  * @adev: amdgpu_device pointer
2134  *
2135  * Checks the reset magic value written to the gart pointer in VRAM.
2136  * The driver calls this after a GPU reset to see if the contents of
2137  * VRAM is lost or now.
2138  * returns true if vram is lost, false if not.
2139  */
2140 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2141 {
2142 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2143 			AMDGPU_RESET_MAGIC_NUM))
2144 		return true;
2145 
2146 	if (!amdgpu_in_reset(adev))
2147 		return false;
2148 
2149 	/*
2150 	 * For all ASICs with baco/mode1 reset, the VRAM is
2151 	 * always assumed to be lost.
2152 	 */
2153 	switch (amdgpu_asic_reset_method(adev)) {
2154 	case AMD_RESET_METHOD_BACO:
2155 	case AMD_RESET_METHOD_MODE1:
2156 		return true;
2157 	default:
2158 		return false;
2159 	}
2160 }
2161 
2162 /**
2163  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2164  *
2165  * @adev: amdgpu_device pointer
2166  * @state: clockgating state (gate or ungate)
2167  *
2168  * The list of all the hardware IPs that make up the asic is walked and the
2169  * set_clockgating_state callbacks are run.
2170  * Late initialization pass enabling clockgating for hardware IPs.
2171  * Fini or suspend, pass disabling clockgating for hardware IPs.
2172  * Returns 0 on success, negative error code on failure.
2173  */
2174 
2175 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2176 						enum amd_clockgating_state state)
2177 {
2178 	int i, j, r;
2179 
2180 	if (amdgpu_emu_mode == 1)
2181 		return 0;
2182 
2183 	for (j = 0; j < adev->num_ip_blocks; j++) {
2184 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2185 		if (!adev->ip_blocks[i].status.late_initialized)
2186 			continue;
2187 		/* skip CG for VCE/UVD, it's handled specially */
2188 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2189 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2190 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2191 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2192 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2193 			/* enable clockgating to save power */
2194 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2195 										     state);
2196 			if (r) {
2197 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2198 					  adev->ip_blocks[i].version->funcs->name, r);
2199 				return r;
2200 			}
2201 		}
2202 	}
2203 
2204 	return 0;
2205 }
2206 
2207 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
2208 {
2209 	int i, j, r;
2210 
2211 	if (amdgpu_emu_mode == 1)
2212 		return 0;
2213 
2214 	for (j = 0; j < adev->num_ip_blocks; j++) {
2215 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2216 		if (!adev->ip_blocks[i].status.late_initialized)
2217 			continue;
2218 		/* skip CG for VCE/UVD, it's handled specially */
2219 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2220 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2221 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2222 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2223 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2224 			/* enable powergating to save power */
2225 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2226 											state);
2227 			if (r) {
2228 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2229 					  adev->ip_blocks[i].version->funcs->name, r);
2230 				return r;
2231 			}
2232 		}
2233 	}
2234 	return 0;
2235 }
2236 
2237 static int amdgpu_device_enable_mgpu_fan_boost(void)
2238 {
2239 	struct amdgpu_gpu_instance *gpu_ins;
2240 	struct amdgpu_device *adev;
2241 	int i, ret = 0;
2242 
2243 	mutex_lock(&mgpu_info.mutex);
2244 
2245 	/*
2246 	 * MGPU fan boost feature should be enabled
2247 	 * only when there are two or more dGPUs in
2248 	 * the system
2249 	 */
2250 	if (mgpu_info.num_dgpu < 2)
2251 		goto out;
2252 
2253 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2254 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2255 		adev = gpu_ins->adev;
2256 		if (!(adev->flags & AMD_IS_APU) &&
2257 		    !gpu_ins->mgpu_fan_enabled) {
2258 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2259 			if (ret)
2260 				break;
2261 
2262 			gpu_ins->mgpu_fan_enabled = 1;
2263 		}
2264 	}
2265 
2266 out:
2267 	mutex_unlock(&mgpu_info.mutex);
2268 
2269 	return ret;
2270 }
2271 
2272 /**
2273  * amdgpu_device_ip_late_init - run late init for hardware IPs
2274  *
2275  * @adev: amdgpu_device pointer
2276  *
2277  * Late initialization pass for hardware IPs.  The list of all the hardware
2278  * IPs that make up the asic is walked and the late_init callbacks are run.
2279  * late_init covers any special initialization that an IP requires
2280  * after all of the have been initialized or something that needs to happen
2281  * late in the init process.
2282  * Returns 0 on success, negative error code on failure.
2283  */
2284 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2285 {
2286 	struct amdgpu_gpu_instance *gpu_instance;
2287 	int i = 0, r;
2288 
2289 	for (i = 0; i < adev->num_ip_blocks; i++) {
2290 		if (!adev->ip_blocks[i].status.hw)
2291 			continue;
2292 		if (adev->ip_blocks[i].version->funcs->late_init) {
2293 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2294 			if (r) {
2295 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
2296 					  adev->ip_blocks[i].version->funcs->name, r);
2297 				return r;
2298 			}
2299 		}
2300 		adev->ip_blocks[i].status.late_initialized = true;
2301 	}
2302 
2303 	amdgpu_ras_set_error_query_ready(adev, true);
2304 
2305 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2306 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2307 
2308 	amdgpu_device_fill_reset_magic(adev);
2309 
2310 	r = amdgpu_device_enable_mgpu_fan_boost();
2311 	if (r)
2312 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2313 
2314 
2315 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2316 		mutex_lock(&mgpu_info.mutex);
2317 
2318 		/*
2319 		 * Reset device p-state to low as this was booted with high.
2320 		 *
2321 		 * This should be performed only after all devices from the same
2322 		 * hive get initialized.
2323 		 *
2324 		 * However, it's unknown how many device in the hive in advance.
2325 		 * As this is counted one by one during devices initializations.
2326 		 *
2327 		 * So, we wait for all XGMI interlinked devices initialized.
2328 		 * This may bring some delays as those devices may come from
2329 		 * different hives. But that should be OK.
2330 		 */
2331 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2332 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2333 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2334 				if (gpu_instance->adev->flags & AMD_IS_APU)
2335 					continue;
2336 
2337 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2338 						AMDGPU_XGMI_PSTATE_MIN);
2339 				if (r) {
2340 					DRM_ERROR("pstate setting failed (%d).\n", r);
2341 					break;
2342 				}
2343 			}
2344 		}
2345 
2346 		mutex_unlock(&mgpu_info.mutex);
2347 	}
2348 
2349 	return 0;
2350 }
2351 
2352 /**
2353  * amdgpu_device_ip_fini - run fini for hardware IPs
2354  *
2355  * @adev: amdgpu_device pointer
2356  *
2357  * Main teardown pass for hardware IPs.  The list of all the hardware
2358  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2359  * are run.  hw_fini tears down the hardware associated with each IP
2360  * and sw_fini tears down any software state associated with each IP.
2361  * Returns 0 on success, negative error code on failure.
2362  */
2363 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2364 {
2365 	int i, r;
2366 
2367 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2368 		amdgpu_virt_release_ras_err_handler_data(adev);
2369 
2370 	amdgpu_ras_pre_fini(adev);
2371 
2372 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2373 		amdgpu_xgmi_remove_device(adev);
2374 
2375 	amdgpu_amdkfd_device_fini(adev);
2376 
2377 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2378 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2379 
2380 	/* need to disable SMC first */
2381 	for (i = 0; i < adev->num_ip_blocks; i++) {
2382 		if (!adev->ip_blocks[i].status.hw)
2383 			continue;
2384 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2385 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2386 			/* XXX handle errors */
2387 			if (r) {
2388 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2389 					  adev->ip_blocks[i].version->funcs->name, r);
2390 			}
2391 			adev->ip_blocks[i].status.hw = false;
2392 			break;
2393 		}
2394 	}
2395 
2396 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2397 		if (!adev->ip_blocks[i].status.hw)
2398 			continue;
2399 
2400 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2401 		/* XXX handle errors */
2402 		if (r) {
2403 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2404 				  adev->ip_blocks[i].version->funcs->name, r);
2405 		}
2406 
2407 		adev->ip_blocks[i].status.hw = false;
2408 	}
2409 
2410 
2411 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2412 		if (!adev->ip_blocks[i].status.sw)
2413 			continue;
2414 
2415 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2416 			amdgpu_ucode_free_bo(adev);
2417 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2418 			amdgpu_device_wb_fini(adev);
2419 			amdgpu_device_vram_scratch_fini(adev);
2420 			amdgpu_ib_pool_fini(adev);
2421 		}
2422 
2423 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2424 		/* XXX handle errors */
2425 		if (r) {
2426 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2427 				  adev->ip_blocks[i].version->funcs->name, r);
2428 		}
2429 		adev->ip_blocks[i].status.sw = false;
2430 		adev->ip_blocks[i].status.valid = false;
2431 	}
2432 
2433 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2434 		if (!adev->ip_blocks[i].status.late_initialized)
2435 			continue;
2436 		if (adev->ip_blocks[i].version->funcs->late_fini)
2437 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2438 		adev->ip_blocks[i].status.late_initialized = false;
2439 	}
2440 
2441 	amdgpu_ras_fini(adev);
2442 
2443 	if (amdgpu_sriov_vf(adev))
2444 		if (amdgpu_virt_release_full_gpu(adev, false))
2445 			DRM_ERROR("failed to release exclusive mode on fini\n");
2446 
2447 	return 0;
2448 }
2449 
2450 /**
2451  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2452  *
2453  * @work: work_struct.
2454  */
2455 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2456 {
2457 	struct amdgpu_device *adev =
2458 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2459 	int r;
2460 
2461 	r = amdgpu_ib_ring_tests(adev);
2462 	if (r)
2463 		DRM_ERROR("ib ring test failed (%d).\n", r);
2464 }
2465 
2466 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2467 {
2468 	struct amdgpu_device *adev =
2469 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2470 
2471 	mutex_lock(&adev->gfx.gfx_off_mutex);
2472 	if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2473 		if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2474 			adev->gfx.gfx_off_state = true;
2475 	}
2476 	mutex_unlock(&adev->gfx.gfx_off_mutex);
2477 }
2478 
2479 /**
2480  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2481  *
2482  * @adev: amdgpu_device pointer
2483  *
2484  * Main suspend function for hardware IPs.  The list of all the hardware
2485  * IPs that make up the asic is walked, clockgating is disabled and the
2486  * suspend callbacks are run.  suspend puts the hardware and software state
2487  * in each IP into a state suitable for suspend.
2488  * Returns 0 on success, negative error code on failure.
2489  */
2490 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2491 {
2492 	int i, r;
2493 
2494 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2495 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2496 
2497 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2498 		if (!adev->ip_blocks[i].status.valid)
2499 			continue;
2500 
2501 		/* displays are handled separately */
2502 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2503 			continue;
2504 
2505 		/* XXX handle errors */
2506 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2507 		/* XXX handle errors */
2508 		if (r) {
2509 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2510 				  adev->ip_blocks[i].version->funcs->name, r);
2511 			return r;
2512 		}
2513 
2514 		adev->ip_blocks[i].status.hw = false;
2515 	}
2516 
2517 	return 0;
2518 }
2519 
2520 /**
2521  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2522  *
2523  * @adev: amdgpu_device pointer
2524  *
2525  * Main suspend function for hardware IPs.  The list of all the hardware
2526  * IPs that make up the asic is walked, clockgating is disabled and the
2527  * suspend callbacks are run.  suspend puts the hardware and software state
2528  * in each IP into a state suitable for suspend.
2529  * Returns 0 on success, negative error code on failure.
2530  */
2531 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2532 {
2533 	int i, r;
2534 
2535 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2536 		if (!adev->ip_blocks[i].status.valid)
2537 			continue;
2538 		/* displays are handled in phase1 */
2539 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2540 			continue;
2541 		/* PSP lost connection when err_event_athub occurs */
2542 		if (amdgpu_ras_intr_triggered() &&
2543 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2544 			adev->ip_blocks[i].status.hw = false;
2545 			continue;
2546 		}
2547 		/* XXX handle errors */
2548 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2549 		/* XXX handle errors */
2550 		if (r) {
2551 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2552 				  adev->ip_blocks[i].version->funcs->name, r);
2553 		}
2554 		adev->ip_blocks[i].status.hw = false;
2555 		/* handle putting the SMC in the appropriate state */
2556 		if(!amdgpu_sriov_vf(adev)){
2557 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2558 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2559 				if (r) {
2560 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2561 							adev->mp1_state, r);
2562 					return r;
2563 				}
2564 			}
2565 		}
2566 		adev->ip_blocks[i].status.hw = false;
2567 	}
2568 
2569 	return 0;
2570 }
2571 
2572 /**
2573  * amdgpu_device_ip_suspend - run suspend for hardware IPs
2574  *
2575  * @adev: amdgpu_device pointer
2576  *
2577  * Main suspend function for hardware IPs.  The list of all the hardware
2578  * IPs that make up the asic is walked, clockgating is disabled and the
2579  * suspend callbacks are run.  suspend puts the hardware and software state
2580  * in each IP into a state suitable for suspend.
2581  * Returns 0 on success, negative error code on failure.
2582  */
2583 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2584 {
2585 	int r;
2586 
2587 	if (amdgpu_sriov_vf(adev))
2588 		amdgpu_virt_request_full_gpu(adev, false);
2589 
2590 	r = amdgpu_device_ip_suspend_phase1(adev);
2591 	if (r)
2592 		return r;
2593 	r = amdgpu_device_ip_suspend_phase2(adev);
2594 
2595 	if (amdgpu_sriov_vf(adev))
2596 		amdgpu_virt_release_full_gpu(adev, false);
2597 
2598 	return r;
2599 }
2600 
2601 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2602 {
2603 	int i, r;
2604 
2605 	static enum amd_ip_block_type ip_order[] = {
2606 		AMD_IP_BLOCK_TYPE_GMC,
2607 		AMD_IP_BLOCK_TYPE_COMMON,
2608 		AMD_IP_BLOCK_TYPE_PSP,
2609 		AMD_IP_BLOCK_TYPE_IH,
2610 	};
2611 
2612 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2613 		int j;
2614 		struct amdgpu_ip_block *block;
2615 
2616 		block = &adev->ip_blocks[i];
2617 		block->status.hw = false;
2618 
2619 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2620 
2621 			if (block->version->type != ip_order[j] ||
2622 				!block->status.valid)
2623 				continue;
2624 
2625 			r = block->version->funcs->hw_init(adev);
2626 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2627 			if (r)
2628 				return r;
2629 			block->status.hw = true;
2630 		}
2631 	}
2632 
2633 	return 0;
2634 }
2635 
2636 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2637 {
2638 	int i, r;
2639 
2640 	static enum amd_ip_block_type ip_order[] = {
2641 		AMD_IP_BLOCK_TYPE_SMC,
2642 		AMD_IP_BLOCK_TYPE_DCE,
2643 		AMD_IP_BLOCK_TYPE_GFX,
2644 		AMD_IP_BLOCK_TYPE_SDMA,
2645 		AMD_IP_BLOCK_TYPE_UVD,
2646 		AMD_IP_BLOCK_TYPE_VCE,
2647 		AMD_IP_BLOCK_TYPE_VCN
2648 	};
2649 
2650 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2651 		int j;
2652 		struct amdgpu_ip_block *block;
2653 
2654 		for (j = 0; j < adev->num_ip_blocks; j++) {
2655 			block = &adev->ip_blocks[j];
2656 
2657 			if (block->version->type != ip_order[i] ||
2658 				!block->status.valid ||
2659 				block->status.hw)
2660 				continue;
2661 
2662 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2663 				r = block->version->funcs->resume(adev);
2664 			else
2665 				r = block->version->funcs->hw_init(adev);
2666 
2667 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2668 			if (r)
2669 				return r;
2670 			block->status.hw = true;
2671 		}
2672 	}
2673 
2674 	return 0;
2675 }
2676 
2677 /**
2678  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2679  *
2680  * @adev: amdgpu_device pointer
2681  *
2682  * First resume function for hardware IPs.  The list of all the hardware
2683  * IPs that make up the asic is walked and the resume callbacks are run for
2684  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
2685  * after a suspend and updates the software state as necessary.  This
2686  * function is also used for restoring the GPU after a GPU reset.
2687  * Returns 0 on success, negative error code on failure.
2688  */
2689 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
2690 {
2691 	int i, r;
2692 
2693 	for (i = 0; i < adev->num_ip_blocks; i++) {
2694 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2695 			continue;
2696 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2697 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2698 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2699 
2700 			r = adev->ip_blocks[i].version->funcs->resume(adev);
2701 			if (r) {
2702 				DRM_ERROR("resume of IP block <%s> failed %d\n",
2703 					  adev->ip_blocks[i].version->funcs->name, r);
2704 				return r;
2705 			}
2706 			adev->ip_blocks[i].status.hw = true;
2707 		}
2708 	}
2709 
2710 	return 0;
2711 }
2712 
2713 /**
2714  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2715  *
2716  * @adev: amdgpu_device pointer
2717  *
2718  * First resume function for hardware IPs.  The list of all the hardware
2719  * IPs that make up the asic is walked and the resume callbacks are run for
2720  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
2721  * functional state after a suspend and updates the software state as
2722  * necessary.  This function is also used for restoring the GPU after a GPU
2723  * reset.
2724  * Returns 0 on success, negative error code on failure.
2725  */
2726 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
2727 {
2728 	int i, r;
2729 
2730 	for (i = 0; i < adev->num_ip_blocks; i++) {
2731 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2732 			continue;
2733 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2734 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2735 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2736 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
2737 			continue;
2738 		r = adev->ip_blocks[i].version->funcs->resume(adev);
2739 		if (r) {
2740 			DRM_ERROR("resume of IP block <%s> failed %d\n",
2741 				  adev->ip_blocks[i].version->funcs->name, r);
2742 			return r;
2743 		}
2744 		adev->ip_blocks[i].status.hw = true;
2745 	}
2746 
2747 	return 0;
2748 }
2749 
2750 /**
2751  * amdgpu_device_ip_resume - run resume for hardware IPs
2752  *
2753  * @adev: amdgpu_device pointer
2754  *
2755  * Main resume function for hardware IPs.  The hardware IPs
2756  * are split into two resume functions because they are
2757  * are also used in in recovering from a GPU reset and some additional
2758  * steps need to be take between them.  In this case (S3/S4) they are
2759  * run sequentially.
2760  * Returns 0 on success, negative error code on failure.
2761  */
2762 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
2763 {
2764 	int r;
2765 
2766 	r = amdgpu_device_ip_resume_phase1(adev);
2767 	if (r)
2768 		return r;
2769 
2770 	r = amdgpu_device_fw_loading(adev);
2771 	if (r)
2772 		return r;
2773 
2774 	r = amdgpu_device_ip_resume_phase2(adev);
2775 
2776 	return r;
2777 }
2778 
2779 /**
2780  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2781  *
2782  * @adev: amdgpu_device pointer
2783  *
2784  * Query the VBIOS data tables to determine if the board supports SR-IOV.
2785  */
2786 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
2787 {
2788 	if (amdgpu_sriov_vf(adev)) {
2789 		if (adev->is_atom_fw) {
2790 			if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2791 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2792 		} else {
2793 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2794 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2795 		}
2796 
2797 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2798 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2799 	}
2800 }
2801 
2802 /**
2803  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2804  *
2805  * @asic_type: AMD asic type
2806  *
2807  * Check if there is DC (new modesetting infrastructre) support for an asic.
2808  * returns true if DC has support, false if not.
2809  */
2810 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2811 {
2812 	switch (asic_type) {
2813 #if defined(CONFIG_DRM_AMD_DC)
2814 #if defined(CONFIG_DRM_AMD_DC_SI)
2815 	case CHIP_TAHITI:
2816 	case CHIP_PITCAIRN:
2817 	case CHIP_VERDE:
2818 	case CHIP_OLAND:
2819 #endif
2820 	case CHIP_BONAIRE:
2821 	case CHIP_KAVERI:
2822 	case CHIP_KABINI:
2823 	case CHIP_MULLINS:
2824 		/*
2825 		 * We have systems in the wild with these ASICs that require
2826 		 * LVDS and VGA support which is not supported with DC.
2827 		 *
2828 		 * Fallback to the non-DC driver here by default so as not to
2829 		 * cause regressions.
2830 		 */
2831 		return amdgpu_dc > 0;
2832 	case CHIP_HAWAII:
2833 	case CHIP_CARRIZO:
2834 	case CHIP_STONEY:
2835 	case CHIP_POLARIS10:
2836 	case CHIP_POLARIS11:
2837 	case CHIP_POLARIS12:
2838 	case CHIP_VEGAM:
2839 	case CHIP_TONGA:
2840 	case CHIP_FIJI:
2841 	case CHIP_VEGA10:
2842 	case CHIP_VEGA12:
2843 	case CHIP_VEGA20:
2844 #if defined(CONFIG_DRM_AMD_DC_DCN)
2845 	case CHIP_RAVEN:
2846 	case CHIP_NAVI10:
2847 	case CHIP_NAVI14:
2848 	case CHIP_NAVI12:
2849 	case CHIP_RENOIR:
2850 #endif
2851 #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
2852 	case CHIP_SIENNA_CICHLID:
2853 	case CHIP_NAVY_FLOUNDER:
2854 #endif
2855 		return amdgpu_dc != 0;
2856 #endif
2857 	default:
2858 		if (amdgpu_dc > 0)
2859 			DRM_INFO("Display Core has been requested via kernel parameter "
2860 					 "but isn't supported by ASIC, ignoring\n");
2861 		return false;
2862 	}
2863 }
2864 
2865 /**
2866  * amdgpu_device_has_dc_support - check if dc is supported
2867  *
2868  * @adev: amdgpu_device_pointer
2869  *
2870  * Returns true for supported, false for not supported
2871  */
2872 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2873 {
2874 	if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
2875 		return false;
2876 
2877 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
2878 }
2879 
2880 
2881 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2882 {
2883 	struct amdgpu_device *adev =
2884 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
2885 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2886 
2887 	/* It's a bug to not have a hive within this function */
2888 	if (WARN_ON(!hive))
2889 		return;
2890 
2891 	/*
2892 	 * Use task barrier to synchronize all xgmi reset works across the
2893 	 * hive. task_barrier_enter and task_barrier_exit will block
2894 	 * until all the threads running the xgmi reset works reach
2895 	 * those points. task_barrier_full will do both blocks.
2896 	 */
2897 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2898 
2899 		task_barrier_enter(&hive->tb);
2900 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
2901 
2902 		if (adev->asic_reset_res)
2903 			goto fail;
2904 
2905 		task_barrier_exit(&hive->tb);
2906 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
2907 
2908 		if (adev->asic_reset_res)
2909 			goto fail;
2910 
2911 		if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2912 			adev->mmhub.funcs->reset_ras_error_count(adev);
2913 	} else {
2914 
2915 		task_barrier_full(&hive->tb);
2916 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
2917 	}
2918 
2919 fail:
2920 	if (adev->asic_reset_res)
2921 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
2922 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
2923 	amdgpu_put_xgmi_hive(hive);
2924 }
2925 
2926 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2927 {
2928 	char *input = amdgpu_lockup_timeout;
2929 	char *timeout_setting = NULL;
2930 	int index = 0;
2931 	long timeout;
2932 	int ret = 0;
2933 
2934 	/*
2935 	 * By default timeout for non compute jobs is 10000.
2936 	 * And there is no timeout enforced on compute jobs.
2937 	 * In SR-IOV or passthrough mode, timeout for compute
2938 	 * jobs are 60000 by default.
2939 	 */
2940 	adev->gfx_timeout = msecs_to_jiffies(10000);
2941 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2942 	if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2943 		adev->compute_timeout =  msecs_to_jiffies(60000);
2944 	else
2945 		adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2946 
2947 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
2948 		while ((timeout_setting = strsep(&input, ",")) &&
2949 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
2950 			ret = kstrtol(timeout_setting, 0, &timeout);
2951 			if (ret)
2952 				return ret;
2953 
2954 			if (timeout == 0) {
2955 				index++;
2956 				continue;
2957 			} else if (timeout < 0) {
2958 				timeout = MAX_SCHEDULE_TIMEOUT;
2959 			} else {
2960 				timeout = msecs_to_jiffies(timeout);
2961 			}
2962 
2963 			switch (index++) {
2964 			case 0:
2965 				adev->gfx_timeout = timeout;
2966 				break;
2967 			case 1:
2968 				adev->compute_timeout = timeout;
2969 				break;
2970 			case 2:
2971 				adev->sdma_timeout = timeout;
2972 				break;
2973 			case 3:
2974 				adev->video_timeout = timeout;
2975 				break;
2976 			default:
2977 				break;
2978 			}
2979 		}
2980 		/*
2981 		 * There is only one value specified and
2982 		 * it should apply to all non-compute jobs.
2983 		 */
2984 		if (index == 1) {
2985 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2986 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2987 				adev->compute_timeout = adev->gfx_timeout;
2988 		}
2989 	}
2990 
2991 	return ret;
2992 }
2993 
2994 static const struct attribute *amdgpu_dev_attributes[] = {
2995 	&dev_attr_product_name.attr,
2996 	&dev_attr_product_number.attr,
2997 	&dev_attr_serial_number.attr,
2998 	&dev_attr_pcie_replay_count.attr,
2999 	NULL
3000 };
3001 
3002 /**
3003  * amdgpu_device_init - initialize the driver
3004  *
3005  * @adev: amdgpu_device pointer
3006  * @flags: driver flags
3007  *
3008  * Initializes the driver info and hw (all asics).
3009  * Returns 0 for success or an error on failure.
3010  * Called at driver startup.
3011  */
3012 int amdgpu_device_init(struct amdgpu_device *adev,
3013 		       uint32_t flags)
3014 {
3015 	struct drm_device *ddev = adev_to_drm(adev);
3016 	struct pci_dev *pdev = adev->pdev;
3017 	int r, i;
3018 	bool boco = false;
3019 	u32 max_MBps;
3020 
3021 	adev->shutdown = false;
3022 	adev->flags = flags;
3023 
3024 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3025 		adev->asic_type = amdgpu_force_asic_type;
3026 	else
3027 		adev->asic_type = flags & AMD_ASIC_MASK;
3028 
3029 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3030 	if (amdgpu_emu_mode == 1)
3031 		adev->usec_timeout *= 10;
3032 	adev->gmc.gart_size = 512 * 1024 * 1024;
3033 	adev->accel_working = false;
3034 	adev->num_rings = 0;
3035 	adev->mman.buffer_funcs = NULL;
3036 	adev->mman.buffer_funcs_ring = NULL;
3037 	adev->vm_manager.vm_pte_funcs = NULL;
3038 	adev->vm_manager.vm_pte_num_scheds = 0;
3039 	adev->gmc.gmc_funcs = NULL;
3040 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3041 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3042 
3043 	adev->smc_rreg = &amdgpu_invalid_rreg;
3044 	adev->smc_wreg = &amdgpu_invalid_wreg;
3045 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3046 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3047 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3048 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3049 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3050 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3051 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3052 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3053 	adev->didt_rreg = &amdgpu_invalid_rreg;
3054 	adev->didt_wreg = &amdgpu_invalid_wreg;
3055 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3056 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3057 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3058 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3059 
3060 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3061 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3062 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3063 
3064 	/* mutex initialization are all done here so we
3065 	 * can recall function without having locking issues */
3066 	atomic_set(&adev->irq.ih.lock, 0);
3067 	mutex_init(&adev->firmware.mutex);
3068 	mutex_init(&adev->pm.mutex);
3069 	mutex_init(&adev->gfx.gpu_clock_mutex);
3070 	mutex_init(&adev->srbm_mutex);
3071 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3072 	mutex_init(&adev->gfx.gfx_off_mutex);
3073 	mutex_init(&adev->grbm_idx_mutex);
3074 	mutex_init(&adev->mn_lock);
3075 	mutex_init(&adev->virt.vf_errors.lock);
3076 	hash_init(adev->mn_hash);
3077 	atomic_set(&adev->in_gpu_reset, 0);
3078 	init_rwsem(&adev->reset_sem);
3079 	mutex_init(&adev->psp.mutex);
3080 	mutex_init(&adev->notifier_lock);
3081 
3082 	r = amdgpu_device_check_arguments(adev);
3083 	if (r)
3084 		return r;
3085 
3086 	spin_lock_init(&adev->mmio_idx_lock);
3087 	spin_lock_init(&adev->smc_idx_lock);
3088 	spin_lock_init(&adev->pcie_idx_lock);
3089 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3090 	spin_lock_init(&adev->didt_idx_lock);
3091 	spin_lock_init(&adev->gc_cac_idx_lock);
3092 	spin_lock_init(&adev->se_cac_idx_lock);
3093 	spin_lock_init(&adev->audio_endpt_idx_lock);
3094 	spin_lock_init(&adev->mm_stats.lock);
3095 
3096 	INIT_LIST_HEAD(&adev->shadow_list);
3097 	mutex_init(&adev->shadow_list_lock);
3098 
3099 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3100 			  amdgpu_device_delayed_init_work_handler);
3101 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3102 			  amdgpu_device_delay_enable_gfx_off);
3103 
3104 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3105 
3106 	adev->gfx.gfx_off_req_count = 1;
3107 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3108 
3109 	atomic_set(&adev->throttling_logging_enabled, 1);
3110 	/*
3111 	 * If throttling continues, logging will be performed every minute
3112 	 * to avoid log flooding. "-1" is subtracted since the thermal
3113 	 * throttling interrupt comes every second. Thus, the total logging
3114 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3115 	 * for throttling interrupt) = 60 seconds.
3116 	 */
3117 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3118 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3119 
3120 	/* Registers mapping */
3121 	/* TODO: block userspace mapping of io register */
3122 	if (adev->asic_type >= CHIP_BONAIRE) {
3123 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3124 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3125 	} else {
3126 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3127 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3128 	}
3129 
3130 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3131 	if (adev->rmmio == NULL) {
3132 		return -ENOMEM;
3133 	}
3134 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3135 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3136 
3137 	/* io port mapping */
3138 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3139 		if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3140 			adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3141 			adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3142 			break;
3143 		}
3144 	}
3145 	if (adev->rio_mem == NULL)
3146 		DRM_INFO("PCI I/O BAR is not found.\n");
3147 
3148 	/* enable PCIE atomic ops */
3149 	r = pci_enable_atomic_ops_to_root(adev->pdev,
3150 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3151 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3152 	if (r) {
3153 		adev->have_atomics_support = false;
3154 		DRM_INFO("PCIE atomic ops is not supported\n");
3155 	} else {
3156 		adev->have_atomics_support = true;
3157 	}
3158 
3159 	amdgpu_device_get_pcie_info(adev);
3160 
3161 	if (amdgpu_mcbp)
3162 		DRM_INFO("MCBP is enabled\n");
3163 
3164 	if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3165 		adev->enable_mes = true;
3166 
3167 	/* detect hw virtualization here */
3168 	amdgpu_detect_virtualization(adev);
3169 
3170 	r = amdgpu_device_get_job_timeout_settings(adev);
3171 	if (r) {
3172 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3173 		return r;
3174 	}
3175 
3176 	/* early init functions */
3177 	r = amdgpu_device_ip_early_init(adev);
3178 	if (r)
3179 		return r;
3180 
3181 	/* doorbell bar mapping and doorbell index init*/
3182 	amdgpu_device_doorbell_init(adev);
3183 
3184 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3185 	/* this will fail for cards that aren't VGA class devices, just
3186 	 * ignore it */
3187 	vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
3188 
3189 	if (amdgpu_device_supports_boco(ddev))
3190 		boco = true;
3191 	if (amdgpu_has_atpx() &&
3192 	    (amdgpu_is_atpx_hybrid() ||
3193 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
3194 	    !pci_is_thunderbolt_attached(adev->pdev))
3195 		vga_switcheroo_register_client(adev->pdev,
3196 					       &amdgpu_switcheroo_ops, boco);
3197 	if (boco)
3198 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3199 
3200 	if (amdgpu_emu_mode == 1) {
3201 		/* post the asic on emulation mode */
3202 		emu_soc_asic_init(adev);
3203 		goto fence_driver_init;
3204 	}
3205 
3206 	/* detect if we are with an SRIOV vbios */
3207 	amdgpu_device_detect_sriov_bios(adev);
3208 
3209 	/* check if we need to reset the asic
3210 	 *  E.g., driver was not cleanly unloaded previously, etc.
3211 	 */
3212 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3213 		r = amdgpu_asic_reset(adev);
3214 		if (r) {
3215 			dev_err(adev->dev, "asic reset on init failed\n");
3216 			goto failed;
3217 		}
3218 	}
3219 
3220 	/* Post card if necessary */
3221 	if (amdgpu_device_need_post(adev)) {
3222 		if (!adev->bios) {
3223 			dev_err(adev->dev, "no vBIOS found\n");
3224 			r = -EINVAL;
3225 			goto failed;
3226 		}
3227 		DRM_INFO("GPU posting now...\n");
3228 		r = amdgpu_device_asic_init(adev);
3229 		if (r) {
3230 			dev_err(adev->dev, "gpu post error!\n");
3231 			goto failed;
3232 		}
3233 	}
3234 
3235 	if (adev->is_atom_fw) {
3236 		/* Initialize clocks */
3237 		r = amdgpu_atomfirmware_get_clock_info(adev);
3238 		if (r) {
3239 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3240 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3241 			goto failed;
3242 		}
3243 	} else {
3244 		/* Initialize clocks */
3245 		r = amdgpu_atombios_get_clock_info(adev);
3246 		if (r) {
3247 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3248 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3249 			goto failed;
3250 		}
3251 		/* init i2c buses */
3252 		if (!amdgpu_device_has_dc_support(adev))
3253 			amdgpu_atombios_i2c_init(adev);
3254 	}
3255 
3256 fence_driver_init:
3257 	/* Fence driver */
3258 	r = amdgpu_fence_driver_init(adev);
3259 	if (r) {
3260 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
3261 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3262 		goto failed;
3263 	}
3264 
3265 	/* init the mode config */
3266 	drm_mode_config_init(adev_to_drm(adev));
3267 
3268 	r = amdgpu_device_ip_init(adev);
3269 	if (r) {
3270 		/* failed in exclusive mode due to timeout */
3271 		if (amdgpu_sriov_vf(adev) &&
3272 		    !amdgpu_sriov_runtime(adev) &&
3273 		    amdgpu_virt_mmio_blocked(adev) &&
3274 		    !amdgpu_virt_wait_reset(adev)) {
3275 			dev_err(adev->dev, "VF exclusive mode timeout\n");
3276 			/* Don't send request since VF is inactive. */
3277 			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3278 			adev->virt.ops = NULL;
3279 			r = -EAGAIN;
3280 			goto failed;
3281 		}
3282 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3283 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3284 		goto failed;
3285 	}
3286 
3287 	dev_info(adev->dev,
3288 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3289 			adev->gfx.config.max_shader_engines,
3290 			adev->gfx.config.max_sh_per_se,
3291 			adev->gfx.config.max_cu_per_sh,
3292 			adev->gfx.cu_info.number);
3293 
3294 	adev->accel_working = true;
3295 
3296 	amdgpu_vm_check_compute_bug(adev);
3297 
3298 	/* Initialize the buffer migration limit. */
3299 	if (amdgpu_moverate >= 0)
3300 		max_MBps = amdgpu_moverate;
3301 	else
3302 		max_MBps = 8; /* Allow 8 MB/s. */
3303 	/* Get a log2 for easy divisions. */
3304 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3305 
3306 	amdgpu_fbdev_init(adev);
3307 
3308 	r = amdgpu_pm_sysfs_init(adev);
3309 	if (r) {
3310 		adev->pm_sysfs_en = false;
3311 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3312 	} else
3313 		adev->pm_sysfs_en = true;
3314 
3315 	r = amdgpu_ucode_sysfs_init(adev);
3316 	if (r) {
3317 		adev->ucode_sysfs_en = false;
3318 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3319 	} else
3320 		adev->ucode_sysfs_en = true;
3321 
3322 	if ((amdgpu_testing & 1)) {
3323 		if (adev->accel_working)
3324 			amdgpu_test_moves(adev);
3325 		else
3326 			DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3327 	}
3328 	if (amdgpu_benchmarking) {
3329 		if (adev->accel_working)
3330 			amdgpu_benchmark(adev, amdgpu_benchmarking);
3331 		else
3332 			DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3333 	}
3334 
3335 	/*
3336 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3337 	 * Otherwise the mgpu fan boost feature will be skipped due to the
3338 	 * gpu instance is counted less.
3339 	 */
3340 	amdgpu_register_gpu_instance(adev);
3341 
3342 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
3343 	 * explicit gating rather than handling it automatically.
3344 	 */
3345 	r = amdgpu_device_ip_late_init(adev);
3346 	if (r) {
3347 		dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3348 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3349 		goto failed;
3350 	}
3351 
3352 	/* must succeed. */
3353 	amdgpu_ras_resume(adev);
3354 
3355 	queue_delayed_work(system_wq, &adev->delayed_init_work,
3356 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3357 
3358 	if (amdgpu_sriov_vf(adev))
3359 		flush_delayed_work(&adev->delayed_init_work);
3360 
3361 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3362 	if (r) {
3363 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3364 		return r;
3365 	}
3366 
3367 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3368 		r = amdgpu_pmu_init(adev);
3369 	if (r)
3370 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3371 
3372 	return 0;
3373 
3374 failed:
3375 	amdgpu_vf_error_trans_all(adev);
3376 	if (boco)
3377 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
3378 
3379 	return r;
3380 }
3381 
3382 /**
3383  * amdgpu_device_fini - tear down the driver
3384  *
3385  * @adev: amdgpu_device pointer
3386  *
3387  * Tear down the driver info (all asics).
3388  * Called at driver shutdown.
3389  */
3390 void amdgpu_device_fini(struct amdgpu_device *adev)
3391 {
3392 	dev_info(adev->dev, "amdgpu: finishing device.\n");
3393 	flush_delayed_work(&adev->delayed_init_work);
3394 	adev->shutdown = true;
3395 
3396 	/* make sure IB test finished before entering exclusive mode
3397 	 * to avoid preemption on IB test
3398 	 * */
3399 	if (amdgpu_sriov_vf(adev))
3400 		amdgpu_virt_request_full_gpu(adev, false);
3401 
3402 	/* disable all interrupts */
3403 	amdgpu_irq_disable_all(adev);
3404 	if (adev->mode_info.mode_config_initialized){
3405 		if (!amdgpu_device_has_dc_support(adev))
3406 			drm_helper_force_disable_all(adev_to_drm(adev));
3407 		else
3408 			drm_atomic_helper_shutdown(adev_to_drm(adev));
3409 	}
3410 	amdgpu_fence_driver_fini(adev);
3411 	if (adev->pm_sysfs_en)
3412 		amdgpu_pm_sysfs_fini(adev);
3413 	amdgpu_fbdev_fini(adev);
3414 	amdgpu_device_ip_fini(adev);
3415 	release_firmware(adev->firmware.gpu_info_fw);
3416 	adev->firmware.gpu_info_fw = NULL;
3417 	adev->accel_working = false;
3418 	/* free i2c buses */
3419 	if (!amdgpu_device_has_dc_support(adev))
3420 		amdgpu_i2c_fini(adev);
3421 
3422 	if (amdgpu_emu_mode != 1)
3423 		amdgpu_atombios_fini(adev);
3424 
3425 	kfree(adev->bios);
3426 	adev->bios = NULL;
3427 	if (amdgpu_has_atpx() &&
3428 	    (amdgpu_is_atpx_hybrid() ||
3429 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
3430 	    !pci_is_thunderbolt_attached(adev->pdev))
3431 		vga_switcheroo_unregister_client(adev->pdev);
3432 	if (amdgpu_device_supports_boco(adev_to_drm(adev)))
3433 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
3434 	vga_client_register(adev->pdev, NULL, NULL, NULL);
3435 	if (adev->rio_mem)
3436 		pci_iounmap(adev->pdev, adev->rio_mem);
3437 	adev->rio_mem = NULL;
3438 	iounmap(adev->rmmio);
3439 	adev->rmmio = NULL;
3440 	amdgpu_device_doorbell_fini(adev);
3441 
3442 	if (adev->ucode_sysfs_en)
3443 		amdgpu_ucode_sysfs_fini(adev);
3444 
3445 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
3446 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3447 		amdgpu_pmu_fini(adev);
3448 	if (adev->mman.discovery_bin)
3449 		amdgpu_discovery_fini(adev);
3450 }
3451 
3452 
3453 /*
3454  * Suspend & resume.
3455  */
3456 /**
3457  * amdgpu_device_suspend - initiate device suspend
3458  *
3459  * @dev: drm dev pointer
3460  * @fbcon : notify the fbdev of suspend
3461  *
3462  * Puts the hw in the suspend state (all asics).
3463  * Returns 0 for success or an error on failure.
3464  * Called at driver suspend.
3465  */
3466 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
3467 {
3468 	struct amdgpu_device *adev;
3469 	struct drm_crtc *crtc;
3470 	struct drm_connector *connector;
3471 	struct drm_connector_list_iter iter;
3472 	int r;
3473 
3474 	adev = drm_to_adev(dev);
3475 
3476 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3477 		return 0;
3478 
3479 	adev->in_suspend = true;
3480 	drm_kms_helper_poll_disable(dev);
3481 
3482 	if (fbcon)
3483 		amdgpu_fbdev_set_suspend(adev, 1);
3484 
3485 	cancel_delayed_work_sync(&adev->delayed_init_work);
3486 
3487 	if (!amdgpu_device_has_dc_support(adev)) {
3488 		/* turn off display hw */
3489 		drm_modeset_lock_all(dev);
3490 		drm_connector_list_iter_begin(dev, &iter);
3491 		drm_for_each_connector_iter(connector, &iter)
3492 			drm_helper_connector_dpms(connector,
3493 						  DRM_MODE_DPMS_OFF);
3494 		drm_connector_list_iter_end(&iter);
3495 		drm_modeset_unlock_all(dev);
3496 			/* unpin the front buffers and cursors */
3497 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3498 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3499 			struct drm_framebuffer *fb = crtc->primary->fb;
3500 			struct amdgpu_bo *robj;
3501 
3502 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3503 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3504 				r = amdgpu_bo_reserve(aobj, true);
3505 				if (r == 0) {
3506 					amdgpu_bo_unpin(aobj);
3507 					amdgpu_bo_unreserve(aobj);
3508 				}
3509 			}
3510 
3511 			if (fb == NULL || fb->obj[0] == NULL) {
3512 				continue;
3513 			}
3514 			robj = gem_to_amdgpu_bo(fb->obj[0]);
3515 			/* don't unpin kernel fb objects */
3516 			if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3517 				r = amdgpu_bo_reserve(robj, true);
3518 				if (r == 0) {
3519 					amdgpu_bo_unpin(robj);
3520 					amdgpu_bo_unreserve(robj);
3521 				}
3522 			}
3523 		}
3524 	}
3525 
3526 	amdgpu_ras_suspend(adev);
3527 
3528 	r = amdgpu_device_ip_suspend_phase1(adev);
3529 
3530 	amdgpu_amdkfd_suspend(adev, !fbcon);
3531 
3532 	/* evict vram memory */
3533 	amdgpu_bo_evict_vram(adev);
3534 
3535 	amdgpu_fence_driver_suspend(adev);
3536 
3537 	r = amdgpu_device_ip_suspend_phase2(adev);
3538 
3539 	/* evict remaining vram memory
3540 	 * This second call to evict vram is to evict the gart page table
3541 	 * using the CPU.
3542 	 */
3543 	amdgpu_bo_evict_vram(adev);
3544 
3545 	return 0;
3546 }
3547 
3548 /**
3549  * amdgpu_device_resume - initiate device resume
3550  *
3551  * @dev: drm dev pointer
3552  * @fbcon : notify the fbdev of resume
3553  *
3554  * Bring the hw back to operating state (all asics).
3555  * Returns 0 for success or an error on failure.
3556  * Called at driver resume.
3557  */
3558 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
3559 {
3560 	struct drm_connector *connector;
3561 	struct drm_connector_list_iter iter;
3562 	struct amdgpu_device *adev = drm_to_adev(dev);
3563 	struct drm_crtc *crtc;
3564 	int r = 0;
3565 
3566 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3567 		return 0;
3568 
3569 	/* post card */
3570 	if (amdgpu_device_need_post(adev)) {
3571 		r = amdgpu_device_asic_init(adev);
3572 		if (r)
3573 			dev_err(adev->dev, "amdgpu asic init failed\n");
3574 	}
3575 
3576 	r = amdgpu_device_ip_resume(adev);
3577 	if (r) {
3578 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3579 		return r;
3580 	}
3581 	amdgpu_fence_driver_resume(adev);
3582 
3583 
3584 	r = amdgpu_device_ip_late_init(adev);
3585 	if (r)
3586 		return r;
3587 
3588 	queue_delayed_work(system_wq, &adev->delayed_init_work,
3589 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3590 
3591 	if (!amdgpu_device_has_dc_support(adev)) {
3592 		/* pin cursors */
3593 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3594 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3595 
3596 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3597 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3598 				r = amdgpu_bo_reserve(aobj, true);
3599 				if (r == 0) {
3600 					r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3601 					if (r != 0)
3602 						dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
3603 					amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3604 					amdgpu_bo_unreserve(aobj);
3605 				}
3606 			}
3607 		}
3608 	}
3609 	r = amdgpu_amdkfd_resume(adev, !fbcon);
3610 	if (r)
3611 		return r;
3612 
3613 	/* Make sure IB tests flushed */
3614 	flush_delayed_work(&adev->delayed_init_work);
3615 
3616 	/* blat the mode back in */
3617 	if (fbcon) {
3618 		if (!amdgpu_device_has_dc_support(adev)) {
3619 			/* pre DCE11 */
3620 			drm_helper_resume_force_mode(dev);
3621 
3622 			/* turn on display hw */
3623 			drm_modeset_lock_all(dev);
3624 
3625 			drm_connector_list_iter_begin(dev, &iter);
3626 			drm_for_each_connector_iter(connector, &iter)
3627 				drm_helper_connector_dpms(connector,
3628 							  DRM_MODE_DPMS_ON);
3629 			drm_connector_list_iter_end(&iter);
3630 
3631 			drm_modeset_unlock_all(dev);
3632 		}
3633 		amdgpu_fbdev_set_suspend(adev, 0);
3634 	}
3635 
3636 	drm_kms_helper_poll_enable(dev);
3637 
3638 	amdgpu_ras_resume(adev);
3639 
3640 	/*
3641 	 * Most of the connector probing functions try to acquire runtime pm
3642 	 * refs to ensure that the GPU is powered on when connector polling is
3643 	 * performed. Since we're calling this from a runtime PM callback,
3644 	 * trying to acquire rpm refs will cause us to deadlock.
3645 	 *
3646 	 * Since we're guaranteed to be holding the rpm lock, it's safe to
3647 	 * temporarily disable the rpm helpers so this doesn't deadlock us.
3648 	 */
3649 #ifdef CONFIG_PM
3650 	dev->dev->power.disable_depth++;
3651 #endif
3652 	if (!amdgpu_device_has_dc_support(adev))
3653 		drm_helper_hpd_irq_event(dev);
3654 	else
3655 		drm_kms_helper_hotplug_event(dev);
3656 #ifdef CONFIG_PM
3657 	dev->dev->power.disable_depth--;
3658 #endif
3659 	adev->in_suspend = false;
3660 
3661 	return 0;
3662 }
3663 
3664 /**
3665  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3666  *
3667  * @adev: amdgpu_device pointer
3668  *
3669  * The list of all the hardware IPs that make up the asic is walked and
3670  * the check_soft_reset callbacks are run.  check_soft_reset determines
3671  * if the asic is still hung or not.
3672  * Returns true if any of the IPs are still in a hung state, false if not.
3673  */
3674 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
3675 {
3676 	int i;
3677 	bool asic_hang = false;
3678 
3679 	if (amdgpu_sriov_vf(adev))
3680 		return true;
3681 
3682 	if (amdgpu_asic_need_full_reset(adev))
3683 		return true;
3684 
3685 	for (i = 0; i < adev->num_ip_blocks; i++) {
3686 		if (!adev->ip_blocks[i].status.valid)
3687 			continue;
3688 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3689 			adev->ip_blocks[i].status.hang =
3690 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3691 		if (adev->ip_blocks[i].status.hang) {
3692 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
3693 			asic_hang = true;
3694 		}
3695 	}
3696 	return asic_hang;
3697 }
3698 
3699 /**
3700  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3701  *
3702  * @adev: amdgpu_device pointer
3703  *
3704  * The list of all the hardware IPs that make up the asic is walked and the
3705  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
3706  * handles any IP specific hardware or software state changes that are
3707  * necessary for a soft reset to succeed.
3708  * Returns 0 on success, negative error code on failure.
3709  */
3710 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
3711 {
3712 	int i, r = 0;
3713 
3714 	for (i = 0; i < adev->num_ip_blocks; i++) {
3715 		if (!adev->ip_blocks[i].status.valid)
3716 			continue;
3717 		if (adev->ip_blocks[i].status.hang &&
3718 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3719 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
3720 			if (r)
3721 				return r;
3722 		}
3723 	}
3724 
3725 	return 0;
3726 }
3727 
3728 /**
3729  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3730  *
3731  * @adev: amdgpu_device pointer
3732  *
3733  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
3734  * reset is necessary to recover.
3735  * Returns true if a full asic reset is required, false if not.
3736  */
3737 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
3738 {
3739 	int i;
3740 
3741 	if (amdgpu_asic_need_full_reset(adev))
3742 		return true;
3743 
3744 	for (i = 0; i < adev->num_ip_blocks; i++) {
3745 		if (!adev->ip_blocks[i].status.valid)
3746 			continue;
3747 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3748 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3749 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
3750 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3751 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3752 			if (adev->ip_blocks[i].status.hang) {
3753 				dev_info(adev->dev, "Some block need full reset!\n");
3754 				return true;
3755 			}
3756 		}
3757 	}
3758 	return false;
3759 }
3760 
3761 /**
3762  * amdgpu_device_ip_soft_reset - do a soft reset
3763  *
3764  * @adev: amdgpu_device pointer
3765  *
3766  * The list of all the hardware IPs that make up the asic is walked and the
3767  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
3768  * IP specific hardware or software state changes that are necessary to soft
3769  * reset the IP.
3770  * Returns 0 on success, negative error code on failure.
3771  */
3772 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
3773 {
3774 	int i, r = 0;
3775 
3776 	for (i = 0; i < adev->num_ip_blocks; i++) {
3777 		if (!adev->ip_blocks[i].status.valid)
3778 			continue;
3779 		if (adev->ip_blocks[i].status.hang &&
3780 		    adev->ip_blocks[i].version->funcs->soft_reset) {
3781 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
3782 			if (r)
3783 				return r;
3784 		}
3785 	}
3786 
3787 	return 0;
3788 }
3789 
3790 /**
3791  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3792  *
3793  * @adev: amdgpu_device pointer
3794  *
3795  * The list of all the hardware IPs that make up the asic is walked and the
3796  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
3797  * handles any IP specific hardware or software state changes that are
3798  * necessary after the IP has been soft reset.
3799  * Returns 0 on success, negative error code on failure.
3800  */
3801 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
3802 {
3803 	int i, r = 0;
3804 
3805 	for (i = 0; i < adev->num_ip_blocks; i++) {
3806 		if (!adev->ip_blocks[i].status.valid)
3807 			continue;
3808 		if (adev->ip_blocks[i].status.hang &&
3809 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
3810 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
3811 		if (r)
3812 			return r;
3813 	}
3814 
3815 	return 0;
3816 }
3817 
3818 /**
3819  * amdgpu_device_recover_vram - Recover some VRAM contents
3820  *
3821  * @adev: amdgpu_device pointer
3822  *
3823  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
3824  * restore things like GPUVM page tables after a GPU reset where
3825  * the contents of VRAM might be lost.
3826  *
3827  * Returns:
3828  * 0 on success, negative error code on failure.
3829  */
3830 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
3831 {
3832 	struct dma_fence *fence = NULL, *next = NULL;
3833 	struct amdgpu_bo *shadow;
3834 	long r = 1, tmo;
3835 
3836 	if (amdgpu_sriov_runtime(adev))
3837 		tmo = msecs_to_jiffies(8000);
3838 	else
3839 		tmo = msecs_to_jiffies(100);
3840 
3841 	dev_info(adev->dev, "recover vram bo from shadow start\n");
3842 	mutex_lock(&adev->shadow_list_lock);
3843 	list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3844 
3845 		/* No need to recover an evicted BO */
3846 		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3847 		    shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
3848 		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3849 			continue;
3850 
3851 		r = amdgpu_bo_restore_shadow(shadow, &next);
3852 		if (r)
3853 			break;
3854 
3855 		if (fence) {
3856 			tmo = dma_fence_wait_timeout(fence, false, tmo);
3857 			dma_fence_put(fence);
3858 			fence = next;
3859 			if (tmo == 0) {
3860 				r = -ETIMEDOUT;
3861 				break;
3862 			} else if (tmo < 0) {
3863 				r = tmo;
3864 				break;
3865 			}
3866 		} else {
3867 			fence = next;
3868 		}
3869 	}
3870 	mutex_unlock(&adev->shadow_list_lock);
3871 
3872 	if (fence)
3873 		tmo = dma_fence_wait_timeout(fence, false, tmo);
3874 	dma_fence_put(fence);
3875 
3876 	if (r < 0 || tmo <= 0) {
3877 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
3878 		return -EIO;
3879 	}
3880 
3881 	dev_info(adev->dev, "recover vram bo from shadow done\n");
3882 	return 0;
3883 }
3884 
3885 
3886 /**
3887  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
3888  *
3889  * @adev: amdgpu device pointer
3890  * @from_hypervisor: request from hypervisor
3891  *
3892  * do VF FLR and reinitialize Asic
3893  * return 0 means succeeded otherwise failed
3894  */
3895 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3896 				     bool from_hypervisor)
3897 {
3898 	int r;
3899 
3900 	if (from_hypervisor)
3901 		r = amdgpu_virt_request_full_gpu(adev, true);
3902 	else
3903 		r = amdgpu_virt_reset_gpu(adev);
3904 	if (r)
3905 		return r;
3906 
3907 	amdgpu_amdkfd_pre_reset(adev);
3908 
3909 	/* Resume IP prior to SMC */
3910 	r = amdgpu_device_ip_reinit_early_sriov(adev);
3911 	if (r)
3912 		goto error;
3913 
3914 	amdgpu_virt_init_data_exchange(adev);
3915 	/* we need recover gart prior to run SMC/CP/SDMA resume */
3916 	amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
3917 
3918 	r = amdgpu_device_fw_loading(adev);
3919 	if (r)
3920 		return r;
3921 
3922 	/* now we are okay to resume SMC/CP/SDMA */
3923 	r = amdgpu_device_ip_reinit_late_sriov(adev);
3924 	if (r)
3925 		goto error;
3926 
3927 	amdgpu_irq_gpu_reset_resume_helper(adev);
3928 	r = amdgpu_ib_ring_tests(adev);
3929 	amdgpu_amdkfd_post_reset(adev);
3930 
3931 error:
3932 	amdgpu_virt_release_full_gpu(adev, true);
3933 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3934 		amdgpu_inc_vram_lost(adev);
3935 		r = amdgpu_device_recover_vram(adev);
3936 	}
3937 
3938 	return r;
3939 }
3940 
3941 /**
3942  * amdgpu_device_has_job_running - check if there is any job in mirror list
3943  *
3944  * @adev: amdgpu device pointer
3945  *
3946  * check if there is any job in mirror list
3947  */
3948 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
3949 {
3950 	int i;
3951 	struct drm_sched_job *job;
3952 
3953 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3954 		struct amdgpu_ring *ring = adev->rings[i];
3955 
3956 		if (!ring || !ring->sched.thread)
3957 			continue;
3958 
3959 		spin_lock(&ring->sched.job_list_lock);
3960 		job = list_first_entry_or_null(&ring->sched.ring_mirror_list,
3961 				struct drm_sched_job, node);
3962 		spin_unlock(&ring->sched.job_list_lock);
3963 		if (job)
3964 			return true;
3965 	}
3966 	return false;
3967 }
3968 
3969 /**
3970  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3971  *
3972  * @adev: amdgpu device pointer
3973  *
3974  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3975  * a hung GPU.
3976  */
3977 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3978 {
3979 	if (!amdgpu_device_ip_check_soft_reset(adev)) {
3980 		dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
3981 		return false;
3982 	}
3983 
3984 	if (amdgpu_gpu_recovery == 0)
3985 		goto disabled;
3986 
3987 	if (amdgpu_sriov_vf(adev))
3988 		return true;
3989 
3990 	if (amdgpu_gpu_recovery == -1) {
3991 		switch (adev->asic_type) {
3992 		case CHIP_BONAIRE:
3993 		case CHIP_HAWAII:
3994 		case CHIP_TOPAZ:
3995 		case CHIP_TONGA:
3996 		case CHIP_FIJI:
3997 		case CHIP_POLARIS10:
3998 		case CHIP_POLARIS11:
3999 		case CHIP_POLARIS12:
4000 		case CHIP_VEGAM:
4001 		case CHIP_VEGA20:
4002 		case CHIP_VEGA10:
4003 		case CHIP_VEGA12:
4004 		case CHIP_RAVEN:
4005 		case CHIP_ARCTURUS:
4006 		case CHIP_RENOIR:
4007 		case CHIP_NAVI10:
4008 		case CHIP_NAVI14:
4009 		case CHIP_NAVI12:
4010 		case CHIP_SIENNA_CICHLID:
4011 			break;
4012 		default:
4013 			goto disabled;
4014 		}
4015 	}
4016 
4017 	return true;
4018 
4019 disabled:
4020 		dev_info(adev->dev, "GPU recovery disabled.\n");
4021 		return false;
4022 }
4023 
4024 
4025 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4026 					struct amdgpu_job *job,
4027 					bool *need_full_reset_arg)
4028 {
4029 	int i, r = 0;
4030 	bool need_full_reset  = *need_full_reset_arg;
4031 
4032 	amdgpu_debugfs_wait_dump(adev);
4033 
4034 	/* block all schedulers and reset given job's ring */
4035 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4036 		struct amdgpu_ring *ring = adev->rings[i];
4037 
4038 		if (!ring || !ring->sched.thread)
4039 			continue;
4040 
4041 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4042 		amdgpu_fence_driver_force_completion(ring);
4043 	}
4044 
4045 	if(job)
4046 		drm_sched_increase_karma(&job->base);
4047 
4048 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
4049 	if (!amdgpu_sriov_vf(adev)) {
4050 
4051 		if (!need_full_reset)
4052 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4053 
4054 		if (!need_full_reset) {
4055 			amdgpu_device_ip_pre_soft_reset(adev);
4056 			r = amdgpu_device_ip_soft_reset(adev);
4057 			amdgpu_device_ip_post_soft_reset(adev);
4058 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4059 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
4060 				need_full_reset = true;
4061 			}
4062 		}
4063 
4064 		if (need_full_reset)
4065 			r = amdgpu_device_ip_suspend(adev);
4066 
4067 		*need_full_reset_arg = need_full_reset;
4068 	}
4069 
4070 	return r;
4071 }
4072 
4073 static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
4074 			       struct list_head *device_list_handle,
4075 			       bool *need_full_reset_arg)
4076 {
4077 	struct amdgpu_device *tmp_adev = NULL;
4078 	bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4079 	int r = 0;
4080 
4081 	/*
4082 	 * ASIC reset has to be done on all HGMI hive nodes ASAP
4083 	 * to allow proper links negotiation in FW (within 1 sec)
4084 	 */
4085 	if (need_full_reset) {
4086 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4087 			/* For XGMI run all resets in parallel to speed up the process */
4088 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4089 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4090 					r = -EALREADY;
4091 			} else
4092 				r = amdgpu_asic_reset(tmp_adev);
4093 
4094 			if (r) {
4095 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4096 					 r, adev_to_drm(tmp_adev)->unique);
4097 				break;
4098 			}
4099 		}
4100 
4101 		/* For XGMI wait for all resets to complete before proceed */
4102 		if (!r) {
4103 			list_for_each_entry(tmp_adev, device_list_handle,
4104 					    gmc.xgmi.head) {
4105 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4106 					flush_work(&tmp_adev->xgmi_reset_work);
4107 					r = tmp_adev->asic_reset_res;
4108 					if (r)
4109 						break;
4110 				}
4111 			}
4112 		}
4113 	}
4114 
4115 	if (!r && amdgpu_ras_intr_triggered()) {
4116 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4117 			if (tmp_adev->mmhub.funcs &&
4118 			    tmp_adev->mmhub.funcs->reset_ras_error_count)
4119 				tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4120 		}
4121 
4122 		amdgpu_ras_intr_cleared();
4123 	}
4124 
4125 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4126 		if (need_full_reset) {
4127 			/* post card */
4128 			if (amdgpu_device_asic_init(tmp_adev))
4129 				dev_warn(tmp_adev->dev, "asic atom init failed!");
4130 
4131 			if (!r) {
4132 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4133 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
4134 				if (r)
4135 					goto out;
4136 
4137 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4138 				if (vram_lost) {
4139 					DRM_INFO("VRAM is lost due to GPU reset!\n");
4140 					amdgpu_inc_vram_lost(tmp_adev);
4141 				}
4142 
4143 				r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
4144 				if (r)
4145 					goto out;
4146 
4147 				r = amdgpu_device_fw_loading(tmp_adev);
4148 				if (r)
4149 					return r;
4150 
4151 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
4152 				if (r)
4153 					goto out;
4154 
4155 				if (vram_lost)
4156 					amdgpu_device_fill_reset_magic(tmp_adev);
4157 
4158 				/*
4159 				 * Add this ASIC as tracked as reset was already
4160 				 * complete successfully.
4161 				 */
4162 				amdgpu_register_gpu_instance(tmp_adev);
4163 
4164 				r = amdgpu_device_ip_late_init(tmp_adev);
4165 				if (r)
4166 					goto out;
4167 
4168 				amdgpu_fbdev_set_suspend(tmp_adev, 0);
4169 
4170 				/*
4171 				 * The GPU enters bad state once faulty pages
4172 				 * by ECC has reached the threshold, and ras
4173 				 * recovery is scheduled next. So add one check
4174 				 * here to break recovery if it indeed exceeds
4175 				 * bad page threshold, and remind user to
4176 				 * retire this GPU or setting one bigger
4177 				 * bad_page_threshold value to fix this once
4178 				 * probing driver again.
4179 				 */
4180 				if (!amdgpu_ras_check_err_threshold(tmp_adev)) {
4181 					/* must succeed. */
4182 					amdgpu_ras_resume(tmp_adev);
4183 				} else {
4184 					r = -EINVAL;
4185 					goto out;
4186 				}
4187 
4188 				/* Update PSP FW topology after reset */
4189 				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4190 					r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4191 			}
4192 		}
4193 
4194 out:
4195 		if (!r) {
4196 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4197 			r = amdgpu_ib_ring_tests(tmp_adev);
4198 			if (r) {
4199 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4200 				r = amdgpu_device_ip_suspend(tmp_adev);
4201 				need_full_reset = true;
4202 				r = -EAGAIN;
4203 				goto end;
4204 			}
4205 		}
4206 
4207 		if (!r)
4208 			r = amdgpu_device_recover_vram(tmp_adev);
4209 		else
4210 			tmp_adev->asic_reset_res = r;
4211 	}
4212 
4213 end:
4214 	*need_full_reset_arg = need_full_reset;
4215 	return r;
4216 }
4217 
4218 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
4219 				struct amdgpu_hive_info *hive)
4220 {
4221 	if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
4222 		return false;
4223 
4224 	if (hive) {
4225 		down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
4226 	} else {
4227 		down_write(&adev->reset_sem);
4228 	}
4229 
4230 	atomic_inc(&adev->gpu_reset_counter);
4231 	switch (amdgpu_asic_reset_method(adev)) {
4232 	case AMD_RESET_METHOD_MODE1:
4233 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4234 		break;
4235 	case AMD_RESET_METHOD_MODE2:
4236 		adev->mp1_state = PP_MP1_STATE_RESET;
4237 		break;
4238 	default:
4239 		adev->mp1_state = PP_MP1_STATE_NONE;
4240 		break;
4241 	}
4242 
4243 	return true;
4244 }
4245 
4246 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4247 {
4248 	amdgpu_vf_error_trans_all(adev);
4249 	adev->mp1_state = PP_MP1_STATE_NONE;
4250 	atomic_set(&adev->in_gpu_reset, 0);
4251 	up_write(&adev->reset_sem);
4252 }
4253 
4254 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4255 {
4256 	struct pci_dev *p = NULL;
4257 
4258 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4259 			adev->pdev->bus->number, 1);
4260 	if (p) {
4261 		pm_runtime_enable(&(p->dev));
4262 		pm_runtime_resume(&(p->dev));
4263 	}
4264 }
4265 
4266 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4267 {
4268 	enum amd_reset_method reset_method;
4269 	struct pci_dev *p = NULL;
4270 	u64 expires;
4271 
4272 	/*
4273 	 * For now, only BACO and mode1 reset are confirmed
4274 	 * to suffer the audio issue without proper suspended.
4275 	 */
4276 	reset_method = amdgpu_asic_reset_method(adev);
4277 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
4278 	     (reset_method != AMD_RESET_METHOD_MODE1))
4279 		return -EINVAL;
4280 
4281 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4282 			adev->pdev->bus->number, 1);
4283 	if (!p)
4284 		return -ENODEV;
4285 
4286 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
4287 	if (!expires)
4288 		/*
4289 		 * If we cannot get the audio device autosuspend delay,
4290 		 * a fixed 4S interval will be used. Considering 3S is
4291 		 * the audio controller default autosuspend delay setting.
4292 		 * 4S used here is guaranteed to cover that.
4293 		 */
4294 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
4295 
4296 	while (!pm_runtime_status_suspended(&(p->dev))) {
4297 		if (!pm_runtime_suspend(&(p->dev)))
4298 			break;
4299 
4300 		if (expires < ktime_get_mono_fast_ns()) {
4301 			dev_warn(adev->dev, "failed to suspend display audio\n");
4302 			/* TODO: abort the succeeding gpu reset? */
4303 			return -ETIMEDOUT;
4304 		}
4305 	}
4306 
4307 	pm_runtime_disable(&(p->dev));
4308 
4309 	return 0;
4310 }
4311 
4312 /**
4313  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4314  *
4315  * @adev: amdgpu device pointer
4316  * @job: which job trigger hang
4317  *
4318  * Attempt to reset the GPU if it has hung (all asics).
4319  * Attempt to do soft-reset or full-reset and reinitialize Asic
4320  * Returns 0 for success or an error on failure.
4321  */
4322 
4323 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4324 			      struct amdgpu_job *job)
4325 {
4326 	struct list_head device_list, *device_list_handle =  NULL;
4327 	bool need_full_reset = false;
4328 	bool job_signaled = false;
4329 	struct amdgpu_hive_info *hive = NULL;
4330 	struct amdgpu_device *tmp_adev = NULL;
4331 	int i, r = 0;
4332 	bool need_emergency_restart = false;
4333 	bool audio_suspended = false;
4334 
4335 	/**
4336 	 * Special case: RAS triggered and full reset isn't supported
4337 	 */
4338 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4339 
4340 	/*
4341 	 * Flush RAM to disk so that after reboot
4342 	 * the user can read log and see why the system rebooted.
4343 	 */
4344 	if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
4345 		DRM_WARN("Emergency reboot.");
4346 
4347 		ksys_sync_helper();
4348 		emergency_restart();
4349 	}
4350 
4351 	dev_info(adev->dev, "GPU %s begin!\n",
4352 		need_emergency_restart ? "jobs stop":"reset");
4353 
4354 	/*
4355 	 * Here we trylock to avoid chain of resets executing from
4356 	 * either trigger by jobs on different adevs in XGMI hive or jobs on
4357 	 * different schedulers for same device while this TO handler is running.
4358 	 * We always reset all schedulers for device and all devices for XGMI
4359 	 * hive so that should take care of them too.
4360 	 */
4361 	hive = amdgpu_get_xgmi_hive(adev);
4362 	if (hive) {
4363 		if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
4364 			DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4365 				job ? job->base.id : -1, hive->hive_id);
4366 			amdgpu_put_xgmi_hive(hive);
4367 			return 0;
4368 		}
4369 		mutex_lock(&hive->hive_lock);
4370 	}
4371 
4372 	/*
4373 	 * Build list of devices to reset.
4374 	 * In case we are in XGMI hive mode, resort the device list
4375 	 * to put adev in the 1st position.
4376 	 */
4377 	INIT_LIST_HEAD(&device_list);
4378 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
4379 		if (!hive)
4380 			return -ENODEV;
4381 		if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4382 			list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
4383 		device_list_handle = &hive->device_list;
4384 	} else {
4385 		list_add_tail(&adev->gmc.xgmi.head, &device_list);
4386 		device_list_handle = &device_list;
4387 	}
4388 
4389 	/* block all schedulers and reset given job's ring */
4390 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4391 		if (!amdgpu_device_lock_adev(tmp_adev, hive)) {
4392 			dev_info(tmp_adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
4393 				  job ? job->base.id : -1);
4394 			r = 0;
4395 			goto skip_recovery;
4396 		}
4397 
4398 		/*
4399 		 * Try to put the audio codec into suspend state
4400 		 * before gpu reset started.
4401 		 *
4402 		 * Due to the power domain of the graphics device
4403 		 * is shared with AZ power domain. Without this,
4404 		 * we may change the audio hardware from behind
4405 		 * the audio driver's back. That will trigger
4406 		 * some audio codec errors.
4407 		 */
4408 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
4409 			audio_suspended = true;
4410 
4411 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
4412 
4413 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4414 
4415 		if (!amdgpu_sriov_vf(tmp_adev))
4416 			amdgpu_amdkfd_pre_reset(tmp_adev);
4417 
4418 		/*
4419 		 * Mark these ASICs to be reseted as untracked first
4420 		 * And add them back after reset completed
4421 		 */
4422 		amdgpu_unregister_gpu_instance(tmp_adev);
4423 
4424 		amdgpu_fbdev_set_suspend(tmp_adev, 1);
4425 
4426 		/* disable ras on ALL IPs */
4427 		if (!need_emergency_restart &&
4428 		      amdgpu_device_ip_need_full_reset(tmp_adev))
4429 			amdgpu_ras_suspend(tmp_adev);
4430 
4431 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4432 			struct amdgpu_ring *ring = tmp_adev->rings[i];
4433 
4434 			if (!ring || !ring->sched.thread)
4435 				continue;
4436 
4437 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
4438 
4439 			if (need_emergency_restart)
4440 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
4441 		}
4442 	}
4443 
4444 	if (need_emergency_restart)
4445 		goto skip_sched_resume;
4446 
4447 	/*
4448 	 * Must check guilty signal here since after this point all old
4449 	 * HW fences are force signaled.
4450 	 *
4451 	 * job->base holds a reference to parent fence
4452 	 */
4453 	if (job && job->base.s_fence->parent &&
4454 	    dma_fence_is_signaled(job->base.s_fence->parent)) {
4455 		job_signaled = true;
4456 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4457 		goto skip_hw_reset;
4458 	}
4459 
4460 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
4461 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4462 		r = amdgpu_device_pre_asic_reset(tmp_adev,
4463 						 NULL,
4464 						 &need_full_reset);
4465 		/*TODO Should we stop ?*/
4466 		if (r) {
4467 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4468 				  r, adev_to_drm(tmp_adev)->unique);
4469 			tmp_adev->asic_reset_res = r;
4470 		}
4471 	}
4472 
4473 	/* Actual ASIC resets if needed.*/
4474 	/* TODO Implement XGMI hive reset logic for SRIOV */
4475 	if (amdgpu_sriov_vf(adev)) {
4476 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
4477 		if (r)
4478 			adev->asic_reset_res = r;
4479 	} else {
4480 		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
4481 		if (r && r == -EAGAIN)
4482 			goto retry;
4483 	}
4484 
4485 skip_hw_reset:
4486 
4487 	/* Post ASIC reset for all devs .*/
4488 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4489 
4490 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4491 			struct amdgpu_ring *ring = tmp_adev->rings[i];
4492 
4493 			if (!ring || !ring->sched.thread)
4494 				continue;
4495 
4496 			/* No point to resubmit jobs if we didn't HW reset*/
4497 			if (!tmp_adev->asic_reset_res && !job_signaled)
4498 				drm_sched_resubmit_jobs(&ring->sched);
4499 
4500 			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4501 		}
4502 
4503 		if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4504 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
4505 		}
4506 
4507 		tmp_adev->asic_reset_res = 0;
4508 
4509 		if (r) {
4510 			/* bad news, how to tell it to userspace ? */
4511 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
4512 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4513 		} else {
4514 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
4515 		}
4516 	}
4517 
4518 skip_sched_resume:
4519 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4520 		/*unlock kfd: SRIOV would do it separately */
4521 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
4522 	                amdgpu_amdkfd_post_reset(tmp_adev);
4523 		if (audio_suspended)
4524 			amdgpu_device_resume_display_audio(tmp_adev);
4525 		amdgpu_device_unlock_adev(tmp_adev);
4526 	}
4527 
4528 skip_recovery:
4529 	if (hive) {
4530 		atomic_set(&hive->in_reset, 0);
4531 		mutex_unlock(&hive->hive_lock);
4532 		amdgpu_put_xgmi_hive(hive);
4533 	}
4534 
4535 	if (r)
4536 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
4537 	return r;
4538 }
4539 
4540 /**
4541  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4542  *
4543  * @adev: amdgpu_device pointer
4544  *
4545  * Fetchs and stores in the driver the PCIE capabilities (gen speed
4546  * and lanes) of the slot the device is in. Handles APUs and
4547  * virtualized environments where PCIE config space may not be available.
4548  */
4549 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
4550 {
4551 	struct pci_dev *pdev;
4552 	enum pci_bus_speed speed_cap, platform_speed_cap;
4553 	enum pcie_link_width platform_link_width;
4554 
4555 	if (amdgpu_pcie_gen_cap)
4556 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
4557 
4558 	if (amdgpu_pcie_lane_cap)
4559 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
4560 
4561 	/* covers APUs as well */
4562 	if (pci_is_root_bus(adev->pdev->bus)) {
4563 		if (adev->pm.pcie_gen_mask == 0)
4564 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4565 		if (adev->pm.pcie_mlw_mask == 0)
4566 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
4567 		return;
4568 	}
4569 
4570 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4571 		return;
4572 
4573 	pcie_bandwidth_available(adev->pdev, NULL,
4574 				 &platform_speed_cap, &platform_link_width);
4575 
4576 	if (adev->pm.pcie_gen_mask == 0) {
4577 		/* asic caps */
4578 		pdev = adev->pdev;
4579 		speed_cap = pcie_get_speed_cap(pdev);
4580 		if (speed_cap == PCI_SPEED_UNKNOWN) {
4581 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4582 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4583 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4584 		} else {
4585 			if (speed_cap == PCIE_SPEED_16_0GT)
4586 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4587 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4588 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4589 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4590 			else if (speed_cap == PCIE_SPEED_8_0GT)
4591 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4592 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4593 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4594 			else if (speed_cap == PCIE_SPEED_5_0GT)
4595 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4596 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4597 			else
4598 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4599 		}
4600 		/* platform caps */
4601 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
4602 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4603 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4604 		} else {
4605 			if (platform_speed_cap == PCIE_SPEED_16_0GT)
4606 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4607 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4608 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4609 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
4610 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
4611 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4612 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4613 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
4614 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
4615 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4616 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4617 			else
4618 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4619 
4620 		}
4621 	}
4622 	if (adev->pm.pcie_mlw_mask == 0) {
4623 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
4624 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4625 		} else {
4626 			switch (platform_link_width) {
4627 			case PCIE_LNK_X32:
4628 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4629 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4630 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4631 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4632 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4633 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4634 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4635 				break;
4636 			case PCIE_LNK_X16:
4637 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4638 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4639 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4640 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4641 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4642 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4643 				break;
4644 			case PCIE_LNK_X12:
4645 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4646 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4647 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4648 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4649 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4650 				break;
4651 			case PCIE_LNK_X8:
4652 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4653 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4654 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4655 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4656 				break;
4657 			case PCIE_LNK_X4:
4658 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4659 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4660 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4661 				break;
4662 			case PCIE_LNK_X2:
4663 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4664 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4665 				break;
4666 			case PCIE_LNK_X1:
4667 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4668 				break;
4669 			default:
4670 				break;
4671 			}
4672 		}
4673 	}
4674 }
4675 
4676 int amdgpu_device_baco_enter(struct drm_device *dev)
4677 {
4678 	struct amdgpu_device *adev = drm_to_adev(dev);
4679 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4680 
4681 	if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
4682 		return -ENOTSUPP;
4683 
4684 	if (ras && ras->supported)
4685 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4686 
4687 	return amdgpu_dpm_baco_enter(adev);
4688 }
4689 
4690 int amdgpu_device_baco_exit(struct drm_device *dev)
4691 {
4692 	struct amdgpu_device *adev = drm_to_adev(dev);
4693 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4694 	int ret = 0;
4695 
4696 	if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
4697 		return -ENOTSUPP;
4698 
4699 	ret = amdgpu_dpm_baco_exit(adev);
4700 	if (ret)
4701 		return ret;
4702 
4703 	if (ras && ras->supported)
4704 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4705 
4706 	return 0;
4707 }
4708