xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision 6ca7415f11af5200ab10bd420b513f846e9bfb99)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/devcoredump.h>
36 #include <generated/utsrelease.h>
37 #include <linux/pci-p2pdma.h>
38 
39 #include <drm/drm_aperture.h>
40 #include <drm/drm_atomic_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/vgaarb.h>
45 #include <linux/vga_switcheroo.h>
46 #include <linux/efi.h>
47 #include "amdgpu.h"
48 #include "amdgpu_trace.h"
49 #include "amdgpu_i2c.h"
50 #include "atom.h"
51 #include "amdgpu_atombios.h"
52 #include "amdgpu_atomfirmware.h"
53 #include "amd_pcie.h"
54 #ifdef CONFIG_DRM_AMDGPU_SI
55 #include "si.h"
56 #endif
57 #ifdef CONFIG_DRM_AMDGPU_CIK
58 #include "cik.h"
59 #endif
60 #include "vi.h"
61 #include "soc15.h"
62 #include "nv.h"
63 #include "bif/bif_4_1_d.h"
64 #include <linux/firmware.h>
65 #include "amdgpu_vf_error.h"
66 
67 #include "amdgpu_amdkfd.h"
68 #include "amdgpu_pm.h"
69 
70 #include "amdgpu_xgmi.h"
71 #include "amdgpu_ras.h"
72 #include "amdgpu_pmu.h"
73 #include "amdgpu_fru_eeprom.h"
74 #include "amdgpu_reset.h"
75 
76 #include <linux/suspend.h>
77 #include <drm/task_barrier.h>
78 #include <linux/pm_runtime.h>
79 
80 #include <drm/drm_drv.h>
81 
82 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
83 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
84 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
85 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
86 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
87 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
88 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
89 
90 #define AMDGPU_RESUME_MS		2000
91 #define AMDGPU_MAX_RETRY_LIMIT		2
92 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
93 
94 static const struct drm_driver amdgpu_kms_driver;
95 
96 const char *amdgpu_asic_name[] = {
97 	"TAHITI",
98 	"PITCAIRN",
99 	"VERDE",
100 	"OLAND",
101 	"HAINAN",
102 	"BONAIRE",
103 	"KAVERI",
104 	"KABINI",
105 	"HAWAII",
106 	"MULLINS",
107 	"TOPAZ",
108 	"TONGA",
109 	"FIJI",
110 	"CARRIZO",
111 	"STONEY",
112 	"POLARIS10",
113 	"POLARIS11",
114 	"POLARIS12",
115 	"VEGAM",
116 	"VEGA10",
117 	"VEGA12",
118 	"VEGA20",
119 	"RAVEN",
120 	"ARCTURUS",
121 	"RENOIR",
122 	"ALDEBARAN",
123 	"NAVI10",
124 	"CYAN_SKILLFISH",
125 	"NAVI14",
126 	"NAVI12",
127 	"SIENNA_CICHLID",
128 	"NAVY_FLOUNDER",
129 	"VANGOGH",
130 	"DIMGREY_CAVEFISH",
131 	"BEIGE_GOBY",
132 	"YELLOW_CARP",
133 	"IP DISCOVERY",
134 	"LAST",
135 };
136 
137 /**
138  * DOC: pcie_replay_count
139  *
140  * The amdgpu driver provides a sysfs API for reporting the total number
141  * of PCIe replays (NAKs)
142  * The file pcie_replay_count is used for this and returns the total
143  * number of replays as a sum of the NAKs generated and NAKs received
144  */
145 
146 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
147 		struct device_attribute *attr, char *buf)
148 {
149 	struct drm_device *ddev = dev_get_drvdata(dev);
150 	struct amdgpu_device *adev = drm_to_adev(ddev);
151 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
152 
153 	return sysfs_emit(buf, "%llu\n", cnt);
154 }
155 
156 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
157 		amdgpu_device_get_pcie_replay_count, NULL);
158 
159 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
160 
161 /**
162  * DOC: product_name
163  *
164  * The amdgpu driver provides a sysfs API for reporting the product name
165  * for the device
166  * The file serial_number is used for this and returns the product name
167  * as returned from the FRU.
168  * NOTE: This is only available for certain server cards
169  */
170 
171 static ssize_t amdgpu_device_get_product_name(struct device *dev,
172 		struct device_attribute *attr, char *buf)
173 {
174 	struct drm_device *ddev = dev_get_drvdata(dev);
175 	struct amdgpu_device *adev = drm_to_adev(ddev);
176 
177 	return sysfs_emit(buf, "%s\n", adev->product_name);
178 }
179 
180 static DEVICE_ATTR(product_name, S_IRUGO,
181 		amdgpu_device_get_product_name, NULL);
182 
183 /**
184  * DOC: product_number
185  *
186  * The amdgpu driver provides a sysfs API for reporting the part number
187  * for the device
188  * The file serial_number is used for this and returns the part number
189  * as returned from the FRU.
190  * NOTE: This is only available for certain server cards
191  */
192 
193 static ssize_t amdgpu_device_get_product_number(struct device *dev,
194 		struct device_attribute *attr, char *buf)
195 {
196 	struct drm_device *ddev = dev_get_drvdata(dev);
197 	struct amdgpu_device *adev = drm_to_adev(ddev);
198 
199 	return sysfs_emit(buf, "%s\n", adev->product_number);
200 }
201 
202 static DEVICE_ATTR(product_number, S_IRUGO,
203 		amdgpu_device_get_product_number, NULL);
204 
205 /**
206  * DOC: serial_number
207  *
208  * The amdgpu driver provides a sysfs API for reporting the serial number
209  * for the device
210  * The file serial_number is used for this and returns the serial number
211  * as returned from the FRU.
212  * NOTE: This is only available for certain server cards
213  */
214 
215 static ssize_t amdgpu_device_get_serial_number(struct device *dev,
216 		struct device_attribute *attr, char *buf)
217 {
218 	struct drm_device *ddev = dev_get_drvdata(dev);
219 	struct amdgpu_device *adev = drm_to_adev(ddev);
220 
221 	return sysfs_emit(buf, "%s\n", adev->serial);
222 }
223 
224 static DEVICE_ATTR(serial_number, S_IRUGO,
225 		amdgpu_device_get_serial_number, NULL);
226 
227 /**
228  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
229  *
230  * @dev: drm_device pointer
231  *
232  * Returns true if the device is a dGPU with ATPX power control,
233  * otherwise return false.
234  */
235 bool amdgpu_device_supports_px(struct drm_device *dev)
236 {
237 	struct amdgpu_device *adev = drm_to_adev(dev);
238 
239 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
240 		return true;
241 	return false;
242 }
243 
244 /**
245  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
246  *
247  * @dev: drm_device pointer
248  *
249  * Returns true if the device is a dGPU with ACPI power control,
250  * otherwise return false.
251  */
252 bool amdgpu_device_supports_boco(struct drm_device *dev)
253 {
254 	struct amdgpu_device *adev = drm_to_adev(dev);
255 
256 	if (adev->has_pr3 ||
257 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
258 		return true;
259 	return false;
260 }
261 
262 /**
263  * amdgpu_device_supports_baco - Does the device support BACO
264  *
265  * @dev: drm_device pointer
266  *
267  * Returns true if the device supporte BACO,
268  * otherwise return false.
269  */
270 bool amdgpu_device_supports_baco(struct drm_device *dev)
271 {
272 	struct amdgpu_device *adev = drm_to_adev(dev);
273 
274 	return amdgpu_asic_supports_baco(adev);
275 }
276 
277 /**
278  * amdgpu_device_supports_smart_shift - Is the device dGPU with
279  * smart shift support
280  *
281  * @dev: drm_device pointer
282  *
283  * Returns true if the device is a dGPU with Smart Shift support,
284  * otherwise returns false.
285  */
286 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
287 {
288 	return (amdgpu_device_supports_boco(dev) &&
289 		amdgpu_acpi_is_power_shift_control_supported());
290 }
291 
292 /*
293  * VRAM access helper functions
294  */
295 
296 /**
297  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
298  *
299  * @adev: amdgpu_device pointer
300  * @pos: offset of the buffer in vram
301  * @buf: virtual address of the buffer in system memory
302  * @size: read/write size, sizeof(@buf) must > @size
303  * @write: true - write to vram, otherwise - read from vram
304  */
305 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
306 			     void *buf, size_t size, bool write)
307 {
308 	unsigned long flags;
309 	uint32_t hi = ~0, tmp = 0;
310 	uint32_t *data = buf;
311 	uint64_t last;
312 	int idx;
313 
314 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
315 		return;
316 
317 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
318 
319 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
320 	for (last = pos + size; pos < last; pos += 4) {
321 		tmp = pos >> 31;
322 
323 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
324 		if (tmp != hi) {
325 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
326 			hi = tmp;
327 		}
328 		if (write)
329 			WREG32_NO_KIQ(mmMM_DATA, *data++);
330 		else
331 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
332 	}
333 
334 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
335 	drm_dev_exit(idx);
336 }
337 
338 /**
339  * amdgpu_device_aper_access - access vram by vram aperature
340  *
341  * @adev: amdgpu_device pointer
342  * @pos: offset of the buffer in vram
343  * @buf: virtual address of the buffer in system memory
344  * @size: read/write size, sizeof(@buf) must > @size
345  * @write: true - write to vram, otherwise - read from vram
346  *
347  * The return value means how many bytes have been transferred.
348  */
349 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
350 				 void *buf, size_t size, bool write)
351 {
352 #ifdef CONFIG_64BIT
353 	void __iomem *addr;
354 	size_t count = 0;
355 	uint64_t last;
356 
357 	if (!adev->mman.aper_base_kaddr)
358 		return 0;
359 
360 	last = min(pos + size, adev->gmc.visible_vram_size);
361 	if (last > pos) {
362 		addr = adev->mman.aper_base_kaddr + pos;
363 		count = last - pos;
364 
365 		if (write) {
366 			memcpy_toio(addr, buf, count);
367 			mb();
368 			amdgpu_device_flush_hdp(adev, NULL);
369 		} else {
370 			amdgpu_device_invalidate_hdp(adev, NULL);
371 			mb();
372 			memcpy_fromio(buf, addr, count);
373 		}
374 
375 	}
376 
377 	return count;
378 #else
379 	return 0;
380 #endif
381 }
382 
383 /**
384  * amdgpu_device_vram_access - read/write a buffer in vram
385  *
386  * @adev: amdgpu_device pointer
387  * @pos: offset of the buffer in vram
388  * @buf: virtual address of the buffer in system memory
389  * @size: read/write size, sizeof(@buf) must > @size
390  * @write: true - write to vram, otherwise - read from vram
391  */
392 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
393 			       void *buf, size_t size, bool write)
394 {
395 	size_t count;
396 
397 	/* try to using vram apreature to access vram first */
398 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
399 	size -= count;
400 	if (size) {
401 		/* using MM to access rest vram */
402 		pos += count;
403 		buf += count;
404 		amdgpu_device_mm_access(adev, pos, buf, size, write);
405 	}
406 }
407 
408 /*
409  * register access helper functions.
410  */
411 
412 /* Check if hw access should be skipped because of hotplug or device error */
413 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
414 {
415 	if (adev->no_hw_access)
416 		return true;
417 
418 #ifdef CONFIG_LOCKDEP
419 	/*
420 	 * This is a bit complicated to understand, so worth a comment. What we assert
421 	 * here is that the GPU reset is not running on another thread in parallel.
422 	 *
423 	 * For this we trylock the read side of the reset semaphore, if that succeeds
424 	 * we know that the reset is not running in paralell.
425 	 *
426 	 * If the trylock fails we assert that we are either already holding the read
427 	 * side of the lock or are the reset thread itself and hold the write side of
428 	 * the lock.
429 	 */
430 	if (in_task()) {
431 		if (down_read_trylock(&adev->reset_domain->sem))
432 			up_read(&adev->reset_domain->sem);
433 		else
434 			lockdep_assert_held(&adev->reset_domain->sem);
435 	}
436 #endif
437 	return false;
438 }
439 
440 /**
441  * amdgpu_device_rreg - read a memory mapped IO or indirect register
442  *
443  * @adev: amdgpu_device pointer
444  * @reg: dword aligned register offset
445  * @acc_flags: access flags which require special behavior
446  *
447  * Returns the 32 bit value from the offset specified.
448  */
449 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
450 			    uint32_t reg, uint32_t acc_flags)
451 {
452 	uint32_t ret;
453 
454 	if (amdgpu_device_skip_hw_access(adev))
455 		return 0;
456 
457 	if ((reg * 4) < adev->rmmio_size) {
458 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
459 		    amdgpu_sriov_runtime(adev) &&
460 		    down_read_trylock(&adev->reset_domain->sem)) {
461 			ret = amdgpu_kiq_rreg(adev, reg);
462 			up_read(&adev->reset_domain->sem);
463 		} else {
464 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
465 		}
466 	} else {
467 		ret = adev->pcie_rreg(adev, reg * 4);
468 	}
469 
470 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
471 
472 	return ret;
473 }
474 
475 /*
476  * MMIO register read with bytes helper functions
477  * @offset:bytes offset from MMIO start
478  *
479 */
480 
481 /**
482  * amdgpu_mm_rreg8 - read a memory mapped IO register
483  *
484  * @adev: amdgpu_device pointer
485  * @offset: byte aligned register offset
486  *
487  * Returns the 8 bit value from the offset specified.
488  */
489 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
490 {
491 	if (amdgpu_device_skip_hw_access(adev))
492 		return 0;
493 
494 	if (offset < adev->rmmio_size)
495 		return (readb(adev->rmmio + offset));
496 	BUG();
497 }
498 
499 /*
500  * MMIO register write with bytes helper functions
501  * @offset:bytes offset from MMIO start
502  * @value: the value want to be written to the register
503  *
504 */
505 /**
506  * amdgpu_mm_wreg8 - read a memory mapped IO register
507  *
508  * @adev: amdgpu_device pointer
509  * @offset: byte aligned register offset
510  * @value: 8 bit value to write
511  *
512  * Writes the value specified to the offset specified.
513  */
514 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
515 {
516 	if (amdgpu_device_skip_hw_access(adev))
517 		return;
518 
519 	if (offset < adev->rmmio_size)
520 		writeb(value, adev->rmmio + offset);
521 	else
522 		BUG();
523 }
524 
525 /**
526  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
527  *
528  * @adev: amdgpu_device pointer
529  * @reg: dword aligned register offset
530  * @v: 32 bit value to write to the register
531  * @acc_flags: access flags which require special behavior
532  *
533  * Writes the value specified to the offset specified.
534  */
535 void amdgpu_device_wreg(struct amdgpu_device *adev,
536 			uint32_t reg, uint32_t v,
537 			uint32_t acc_flags)
538 {
539 	if (amdgpu_device_skip_hw_access(adev))
540 		return;
541 
542 	if ((reg * 4) < adev->rmmio_size) {
543 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
544 		    amdgpu_sriov_runtime(adev) &&
545 		    down_read_trylock(&adev->reset_domain->sem)) {
546 			amdgpu_kiq_wreg(adev, reg, v);
547 			up_read(&adev->reset_domain->sem);
548 		} else {
549 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
550 		}
551 	} else {
552 		adev->pcie_wreg(adev, reg * 4, v);
553 	}
554 
555 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
556 }
557 
558 /**
559  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
560  *
561  * @adev: amdgpu_device pointer
562  * @reg: mmio/rlc register
563  * @v: value to write
564  *
565  * this function is invoked only for the debugfs register access
566  */
567 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
568 			     uint32_t reg, uint32_t v)
569 {
570 	if (amdgpu_device_skip_hw_access(adev))
571 		return;
572 
573 	if (amdgpu_sriov_fullaccess(adev) &&
574 	    adev->gfx.rlc.funcs &&
575 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
576 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
577 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
578 	} else if ((reg * 4) >= adev->rmmio_size) {
579 		adev->pcie_wreg(adev, reg * 4, v);
580 	} else {
581 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
582 	}
583 }
584 
585 /**
586  * amdgpu_mm_rdoorbell - read a doorbell dword
587  *
588  * @adev: amdgpu_device pointer
589  * @index: doorbell index
590  *
591  * Returns the value in the doorbell aperture at the
592  * requested doorbell index (CIK).
593  */
594 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
595 {
596 	if (amdgpu_device_skip_hw_access(adev))
597 		return 0;
598 
599 	if (index < adev->doorbell.num_doorbells) {
600 		return readl(adev->doorbell.ptr + index);
601 	} else {
602 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
603 		return 0;
604 	}
605 }
606 
607 /**
608  * amdgpu_mm_wdoorbell - write a doorbell dword
609  *
610  * @adev: amdgpu_device pointer
611  * @index: doorbell index
612  * @v: value to write
613  *
614  * Writes @v to the doorbell aperture at the
615  * requested doorbell index (CIK).
616  */
617 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
618 {
619 	if (amdgpu_device_skip_hw_access(adev))
620 		return;
621 
622 	if (index < adev->doorbell.num_doorbells) {
623 		writel(v, adev->doorbell.ptr + index);
624 	} else {
625 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
626 	}
627 }
628 
629 /**
630  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
631  *
632  * @adev: amdgpu_device pointer
633  * @index: doorbell index
634  *
635  * Returns the value in the doorbell aperture at the
636  * requested doorbell index (VEGA10+).
637  */
638 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
639 {
640 	if (amdgpu_device_skip_hw_access(adev))
641 		return 0;
642 
643 	if (index < adev->doorbell.num_doorbells) {
644 		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
645 	} else {
646 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
647 		return 0;
648 	}
649 }
650 
651 /**
652  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
653  *
654  * @adev: amdgpu_device pointer
655  * @index: doorbell index
656  * @v: value to write
657  *
658  * Writes @v to the doorbell aperture at the
659  * requested doorbell index (VEGA10+).
660  */
661 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
662 {
663 	if (amdgpu_device_skip_hw_access(adev))
664 		return;
665 
666 	if (index < adev->doorbell.num_doorbells) {
667 		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
668 	} else {
669 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
670 	}
671 }
672 
673 /**
674  * amdgpu_device_indirect_rreg - read an indirect register
675  *
676  * @adev: amdgpu_device pointer
677  * @pcie_index: mmio register offset
678  * @pcie_data: mmio register offset
679  * @reg_addr: indirect register address to read from
680  *
681  * Returns the value of indirect register @reg_addr
682  */
683 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
684 				u32 pcie_index, u32 pcie_data,
685 				u32 reg_addr)
686 {
687 	unsigned long flags;
688 	u32 r;
689 	void __iomem *pcie_index_offset;
690 	void __iomem *pcie_data_offset;
691 
692 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
693 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
694 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
695 
696 	writel(reg_addr, pcie_index_offset);
697 	readl(pcie_index_offset);
698 	r = readl(pcie_data_offset);
699 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
700 
701 	return r;
702 }
703 
704 /**
705  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
706  *
707  * @adev: amdgpu_device pointer
708  * @pcie_index: mmio register offset
709  * @pcie_data: mmio register offset
710  * @reg_addr: indirect register address to read from
711  *
712  * Returns the value of indirect register @reg_addr
713  */
714 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
715 				  u32 pcie_index, u32 pcie_data,
716 				  u32 reg_addr)
717 {
718 	unsigned long flags;
719 	u64 r;
720 	void __iomem *pcie_index_offset;
721 	void __iomem *pcie_data_offset;
722 
723 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
724 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
725 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
726 
727 	/* read low 32 bits */
728 	writel(reg_addr, pcie_index_offset);
729 	readl(pcie_index_offset);
730 	r = readl(pcie_data_offset);
731 	/* read high 32 bits */
732 	writel(reg_addr + 4, pcie_index_offset);
733 	readl(pcie_index_offset);
734 	r |= ((u64)readl(pcie_data_offset) << 32);
735 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
736 
737 	return r;
738 }
739 
740 /**
741  * amdgpu_device_indirect_wreg - write an indirect register address
742  *
743  * @adev: amdgpu_device pointer
744  * @pcie_index: mmio register offset
745  * @pcie_data: mmio register offset
746  * @reg_addr: indirect register offset
747  * @reg_data: indirect register data
748  *
749  */
750 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
751 				 u32 pcie_index, u32 pcie_data,
752 				 u32 reg_addr, u32 reg_data)
753 {
754 	unsigned long flags;
755 	void __iomem *pcie_index_offset;
756 	void __iomem *pcie_data_offset;
757 
758 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
759 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
760 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
761 
762 	writel(reg_addr, pcie_index_offset);
763 	readl(pcie_index_offset);
764 	writel(reg_data, pcie_data_offset);
765 	readl(pcie_data_offset);
766 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
767 }
768 
769 /**
770  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
771  *
772  * @adev: amdgpu_device pointer
773  * @pcie_index: mmio register offset
774  * @pcie_data: mmio register offset
775  * @reg_addr: indirect register offset
776  * @reg_data: indirect register data
777  *
778  */
779 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
780 				   u32 pcie_index, u32 pcie_data,
781 				   u32 reg_addr, u64 reg_data)
782 {
783 	unsigned long flags;
784 	void __iomem *pcie_index_offset;
785 	void __iomem *pcie_data_offset;
786 
787 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
788 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
789 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
790 
791 	/* write low 32 bits */
792 	writel(reg_addr, pcie_index_offset);
793 	readl(pcie_index_offset);
794 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
795 	readl(pcie_data_offset);
796 	/* write high 32 bits */
797 	writel(reg_addr + 4, pcie_index_offset);
798 	readl(pcie_index_offset);
799 	writel((u32)(reg_data >> 32), pcie_data_offset);
800 	readl(pcie_data_offset);
801 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
802 }
803 
804 /**
805  * amdgpu_invalid_rreg - dummy reg read function
806  *
807  * @adev: amdgpu_device pointer
808  * @reg: offset of register
809  *
810  * Dummy register read function.  Used for register blocks
811  * that certain asics don't have (all asics).
812  * Returns the value in the register.
813  */
814 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
815 {
816 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
817 	BUG();
818 	return 0;
819 }
820 
821 /**
822  * amdgpu_invalid_wreg - dummy reg write function
823  *
824  * @adev: amdgpu_device pointer
825  * @reg: offset of register
826  * @v: value to write to the register
827  *
828  * Dummy register read function.  Used for register blocks
829  * that certain asics don't have (all asics).
830  */
831 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
832 {
833 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
834 		  reg, v);
835 	BUG();
836 }
837 
838 /**
839  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
840  *
841  * @adev: amdgpu_device pointer
842  * @reg: offset of register
843  *
844  * Dummy register read function.  Used for register blocks
845  * that certain asics don't have (all asics).
846  * Returns the value in the register.
847  */
848 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
849 {
850 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
851 	BUG();
852 	return 0;
853 }
854 
855 /**
856  * amdgpu_invalid_wreg64 - dummy reg write function
857  *
858  * @adev: amdgpu_device pointer
859  * @reg: offset of register
860  * @v: value to write to the register
861  *
862  * Dummy register read function.  Used for register blocks
863  * that certain asics don't have (all asics).
864  */
865 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
866 {
867 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
868 		  reg, v);
869 	BUG();
870 }
871 
872 /**
873  * amdgpu_block_invalid_rreg - dummy reg read function
874  *
875  * @adev: amdgpu_device pointer
876  * @block: offset of instance
877  * @reg: offset of register
878  *
879  * Dummy register read function.  Used for register blocks
880  * that certain asics don't have (all asics).
881  * Returns the value in the register.
882  */
883 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
884 					  uint32_t block, uint32_t reg)
885 {
886 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
887 		  reg, block);
888 	BUG();
889 	return 0;
890 }
891 
892 /**
893  * amdgpu_block_invalid_wreg - dummy reg write function
894  *
895  * @adev: amdgpu_device pointer
896  * @block: offset of instance
897  * @reg: offset of register
898  * @v: value to write to the register
899  *
900  * Dummy register read function.  Used for register blocks
901  * that certain asics don't have (all asics).
902  */
903 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
904 				      uint32_t block,
905 				      uint32_t reg, uint32_t v)
906 {
907 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
908 		  reg, block, v);
909 	BUG();
910 }
911 
912 /**
913  * amdgpu_device_asic_init - Wrapper for atom asic_init
914  *
915  * @adev: amdgpu_device pointer
916  *
917  * Does any asic specific work and then calls atom asic init.
918  */
919 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
920 {
921 	amdgpu_asic_pre_asic_init(adev);
922 
923 	if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
924 		return amdgpu_atomfirmware_asic_init(adev, true);
925 	else
926 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
927 }
928 
929 /**
930  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
931  *
932  * @adev: amdgpu_device pointer
933  *
934  * Allocates a scratch page of VRAM for use by various things in the
935  * driver.
936  */
937 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
938 {
939 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
940 				       AMDGPU_GEM_DOMAIN_VRAM |
941 				       AMDGPU_GEM_DOMAIN_GTT,
942 				       &adev->mem_scratch.robj,
943 				       &adev->mem_scratch.gpu_addr,
944 				       (void **)&adev->mem_scratch.ptr);
945 }
946 
947 /**
948  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
949  *
950  * @adev: amdgpu_device pointer
951  *
952  * Frees the VRAM scratch page.
953  */
954 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
955 {
956 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
957 }
958 
959 /**
960  * amdgpu_device_program_register_sequence - program an array of registers.
961  *
962  * @adev: amdgpu_device pointer
963  * @registers: pointer to the register array
964  * @array_size: size of the register array
965  *
966  * Programs an array or registers with and and or masks.
967  * This is a helper for setting golden registers.
968  */
969 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
970 					     const u32 *registers,
971 					     const u32 array_size)
972 {
973 	u32 tmp, reg, and_mask, or_mask;
974 	int i;
975 
976 	if (array_size % 3)
977 		return;
978 
979 	for (i = 0; i < array_size; i +=3) {
980 		reg = registers[i + 0];
981 		and_mask = registers[i + 1];
982 		or_mask = registers[i + 2];
983 
984 		if (and_mask == 0xffffffff) {
985 			tmp = or_mask;
986 		} else {
987 			tmp = RREG32(reg);
988 			tmp &= ~and_mask;
989 			if (adev->family >= AMDGPU_FAMILY_AI)
990 				tmp |= (or_mask & and_mask);
991 			else
992 				tmp |= or_mask;
993 		}
994 		WREG32(reg, tmp);
995 	}
996 }
997 
998 /**
999  * amdgpu_device_pci_config_reset - reset the GPU
1000  *
1001  * @adev: amdgpu_device pointer
1002  *
1003  * Resets the GPU using the pci config reset sequence.
1004  * Only applicable to asics prior to vega10.
1005  */
1006 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1007 {
1008 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1009 }
1010 
1011 /**
1012  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1013  *
1014  * @adev: amdgpu_device pointer
1015  *
1016  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1017  */
1018 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1019 {
1020 	return pci_reset_function(adev->pdev);
1021 }
1022 
1023 /*
1024  * GPU doorbell aperture helpers function.
1025  */
1026 /**
1027  * amdgpu_device_doorbell_init - Init doorbell driver information.
1028  *
1029  * @adev: amdgpu_device pointer
1030  *
1031  * Init doorbell driver information (CIK)
1032  * Returns 0 on success, error on failure.
1033  */
1034 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
1035 {
1036 
1037 	/* No doorbell on SI hardware generation */
1038 	if (adev->asic_type < CHIP_BONAIRE) {
1039 		adev->doorbell.base = 0;
1040 		adev->doorbell.size = 0;
1041 		adev->doorbell.num_doorbells = 0;
1042 		adev->doorbell.ptr = NULL;
1043 		return 0;
1044 	}
1045 
1046 	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1047 		return -EINVAL;
1048 
1049 	amdgpu_asic_init_doorbell_index(adev);
1050 
1051 	/* doorbell bar mapping */
1052 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1053 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1054 
1055 	if (adev->enable_mes) {
1056 		adev->doorbell.num_doorbells =
1057 			adev->doorbell.size / sizeof(u32);
1058 	} else {
1059 		adev->doorbell.num_doorbells =
1060 			min_t(u32, adev->doorbell.size / sizeof(u32),
1061 			      adev->doorbell_index.max_assignment+1);
1062 		if (adev->doorbell.num_doorbells == 0)
1063 			return -EINVAL;
1064 
1065 		/* For Vega, reserve and map two pages on doorbell BAR since SDMA
1066 		 * paging queue doorbell use the second page. The
1067 		 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1068 		 * doorbells are in the first page. So with paging queue enabled,
1069 		 * the max num_doorbells should + 1 page (0x400 in dword)
1070 		 */
1071 		if (adev->asic_type >= CHIP_VEGA10)
1072 			adev->doorbell.num_doorbells += 0x400;
1073 	}
1074 
1075 	adev->doorbell.ptr = ioremap(adev->doorbell.base,
1076 				     adev->doorbell.num_doorbells *
1077 				     sizeof(u32));
1078 	if (adev->doorbell.ptr == NULL)
1079 		return -ENOMEM;
1080 
1081 	return 0;
1082 }
1083 
1084 /**
1085  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
1086  *
1087  * @adev: amdgpu_device pointer
1088  *
1089  * Tear down doorbell driver information (CIK)
1090  */
1091 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
1092 {
1093 	iounmap(adev->doorbell.ptr);
1094 	adev->doorbell.ptr = NULL;
1095 }
1096 
1097 
1098 
1099 /*
1100  * amdgpu_device_wb_*()
1101  * Writeback is the method by which the GPU updates special pages in memory
1102  * with the status of certain GPU events (fences, ring pointers,etc.).
1103  */
1104 
1105 /**
1106  * amdgpu_device_wb_fini - Disable Writeback and free memory
1107  *
1108  * @adev: amdgpu_device pointer
1109  *
1110  * Disables Writeback and frees the Writeback memory (all asics).
1111  * Used at driver shutdown.
1112  */
1113 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1114 {
1115 	if (adev->wb.wb_obj) {
1116 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1117 				      &adev->wb.gpu_addr,
1118 				      (void **)&adev->wb.wb);
1119 		adev->wb.wb_obj = NULL;
1120 	}
1121 }
1122 
1123 /**
1124  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1125  *
1126  * @adev: amdgpu_device pointer
1127  *
1128  * Initializes writeback and allocates writeback memory (all asics).
1129  * Used at driver startup.
1130  * Returns 0 on success or an -error on failure.
1131  */
1132 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1133 {
1134 	int r;
1135 
1136 	if (adev->wb.wb_obj == NULL) {
1137 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1138 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1139 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1140 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1141 					    (void **)&adev->wb.wb);
1142 		if (r) {
1143 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1144 			return r;
1145 		}
1146 
1147 		adev->wb.num_wb = AMDGPU_MAX_WB;
1148 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1149 
1150 		/* clear wb memory */
1151 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1152 	}
1153 
1154 	return 0;
1155 }
1156 
1157 /**
1158  * amdgpu_device_wb_get - Allocate a wb entry
1159  *
1160  * @adev: amdgpu_device pointer
1161  * @wb: wb index
1162  *
1163  * Allocate a wb slot for use by the driver (all asics).
1164  * Returns 0 on success or -EINVAL on failure.
1165  */
1166 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1167 {
1168 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1169 
1170 	if (offset < adev->wb.num_wb) {
1171 		__set_bit(offset, adev->wb.used);
1172 		*wb = offset << 3; /* convert to dw offset */
1173 		return 0;
1174 	} else {
1175 		return -EINVAL;
1176 	}
1177 }
1178 
1179 /**
1180  * amdgpu_device_wb_free - Free a wb entry
1181  *
1182  * @adev: amdgpu_device pointer
1183  * @wb: wb index
1184  *
1185  * Free a wb slot allocated for use by the driver (all asics)
1186  */
1187 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1188 {
1189 	wb >>= 3;
1190 	if (wb < adev->wb.num_wb)
1191 		__clear_bit(wb, adev->wb.used);
1192 }
1193 
1194 /**
1195  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1196  *
1197  * @adev: amdgpu_device pointer
1198  *
1199  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1200  * to fail, but if any of the BARs is not accessible after the size we abort
1201  * driver loading by returning -ENODEV.
1202  */
1203 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1204 {
1205 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1206 	struct pci_bus *root;
1207 	struct resource *res;
1208 	unsigned i;
1209 	u16 cmd;
1210 	int r;
1211 
1212 	/* Bypass for VF */
1213 	if (amdgpu_sriov_vf(adev))
1214 		return 0;
1215 
1216 	/* skip if the bios has already enabled large BAR */
1217 	if (adev->gmc.real_vram_size &&
1218 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1219 		return 0;
1220 
1221 	/* Check if the root BUS has 64bit memory resources */
1222 	root = adev->pdev->bus;
1223 	while (root->parent)
1224 		root = root->parent;
1225 
1226 	pci_bus_for_each_resource(root, res, i) {
1227 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1228 		    res->start > 0x100000000ull)
1229 			break;
1230 	}
1231 
1232 	/* Trying to resize is pointless without a root hub window above 4GB */
1233 	if (!res)
1234 		return 0;
1235 
1236 	/* Limit the BAR size to what is available */
1237 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1238 			rbar_size);
1239 
1240 	/* Disable memory decoding while we change the BAR addresses and size */
1241 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1242 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1243 			      cmd & ~PCI_COMMAND_MEMORY);
1244 
1245 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1246 	amdgpu_device_doorbell_fini(adev);
1247 	if (adev->asic_type >= CHIP_BONAIRE)
1248 		pci_release_resource(adev->pdev, 2);
1249 
1250 	pci_release_resource(adev->pdev, 0);
1251 
1252 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1253 	if (r == -ENOSPC)
1254 		DRM_INFO("Not enough PCI address space for a large BAR.");
1255 	else if (r && r != -ENOTSUPP)
1256 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1257 
1258 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1259 
1260 	/* When the doorbell or fb BAR isn't available we have no chance of
1261 	 * using the device.
1262 	 */
1263 	r = amdgpu_device_doorbell_init(adev);
1264 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1265 		return -ENODEV;
1266 
1267 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1268 
1269 	return 0;
1270 }
1271 
1272 /*
1273  * GPU helpers function.
1274  */
1275 /**
1276  * amdgpu_device_need_post - check if the hw need post or not
1277  *
1278  * @adev: amdgpu_device pointer
1279  *
1280  * Check if the asic has been initialized (all asics) at driver startup
1281  * or post is needed if  hw reset is performed.
1282  * Returns true if need or false if not.
1283  */
1284 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1285 {
1286 	uint32_t reg;
1287 
1288 	if (amdgpu_sriov_vf(adev))
1289 		return false;
1290 
1291 	if (amdgpu_passthrough(adev)) {
1292 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1293 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1294 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1295 		 * vpost executed for smc version below 22.15
1296 		 */
1297 		if (adev->asic_type == CHIP_FIJI) {
1298 			int err;
1299 			uint32_t fw_ver;
1300 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1301 			/* force vPost if error occured */
1302 			if (err)
1303 				return true;
1304 
1305 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1306 			if (fw_ver < 0x00160e00)
1307 				return true;
1308 		}
1309 	}
1310 
1311 	/* Don't post if we need to reset whole hive on init */
1312 	if (adev->gmc.xgmi.pending_reset)
1313 		return false;
1314 
1315 	if (adev->has_hw_reset) {
1316 		adev->has_hw_reset = false;
1317 		return true;
1318 	}
1319 
1320 	/* bios scratch used on CIK+ */
1321 	if (adev->asic_type >= CHIP_BONAIRE)
1322 		return amdgpu_atombios_scratch_need_asic_init(adev);
1323 
1324 	/* check MEM_SIZE for older asics */
1325 	reg = amdgpu_asic_get_config_memsize(adev);
1326 
1327 	if ((reg != 0) && (reg != 0xffffffff))
1328 		return false;
1329 
1330 	return true;
1331 }
1332 
1333 /**
1334  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1335  *
1336  * @adev: amdgpu_device pointer
1337  *
1338  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1339  * be set for this device.
1340  *
1341  * Returns true if it should be used or false if not.
1342  */
1343 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1344 {
1345 	switch (amdgpu_aspm) {
1346 	case -1:
1347 		break;
1348 	case 0:
1349 		return false;
1350 	case 1:
1351 		return true;
1352 	default:
1353 		return false;
1354 	}
1355 	return pcie_aspm_enabled(adev->pdev);
1356 }
1357 
1358 /* if we get transitioned to only one device, take VGA back */
1359 /**
1360  * amdgpu_device_vga_set_decode - enable/disable vga decode
1361  *
1362  * @pdev: PCI device pointer
1363  * @state: enable/disable vga decode
1364  *
1365  * Enable/disable vga decode (all asics).
1366  * Returns VGA resource flags.
1367  */
1368 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1369 		bool state)
1370 {
1371 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1372 	amdgpu_asic_set_vga_state(adev, state);
1373 	if (state)
1374 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1375 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1376 	else
1377 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1378 }
1379 
1380 /**
1381  * amdgpu_device_check_block_size - validate the vm block size
1382  *
1383  * @adev: amdgpu_device pointer
1384  *
1385  * Validates the vm block size specified via module parameter.
1386  * The vm block size defines number of bits in page table versus page directory,
1387  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1388  * page table and the remaining bits are in the page directory.
1389  */
1390 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1391 {
1392 	/* defines number of bits in page table versus page directory,
1393 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1394 	 * page table and the remaining bits are in the page directory */
1395 	if (amdgpu_vm_block_size == -1)
1396 		return;
1397 
1398 	if (amdgpu_vm_block_size < 9) {
1399 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1400 			 amdgpu_vm_block_size);
1401 		amdgpu_vm_block_size = -1;
1402 	}
1403 }
1404 
1405 /**
1406  * amdgpu_device_check_vm_size - validate the vm size
1407  *
1408  * @adev: amdgpu_device pointer
1409  *
1410  * Validates the vm size in GB specified via module parameter.
1411  * The VM size is the size of the GPU virtual memory space in GB.
1412  */
1413 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1414 {
1415 	/* no need to check the default value */
1416 	if (amdgpu_vm_size == -1)
1417 		return;
1418 
1419 	if (amdgpu_vm_size < 1) {
1420 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1421 			 amdgpu_vm_size);
1422 		amdgpu_vm_size = -1;
1423 	}
1424 }
1425 
1426 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1427 {
1428 	struct sysinfo si;
1429 	bool is_os_64 = (sizeof(void *) == 8);
1430 	uint64_t total_memory;
1431 	uint64_t dram_size_seven_GB = 0x1B8000000;
1432 	uint64_t dram_size_three_GB = 0xB8000000;
1433 
1434 	if (amdgpu_smu_memory_pool_size == 0)
1435 		return;
1436 
1437 	if (!is_os_64) {
1438 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1439 		goto def_value;
1440 	}
1441 	si_meminfo(&si);
1442 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1443 
1444 	if ((amdgpu_smu_memory_pool_size == 1) ||
1445 		(amdgpu_smu_memory_pool_size == 2)) {
1446 		if (total_memory < dram_size_three_GB)
1447 			goto def_value1;
1448 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1449 		(amdgpu_smu_memory_pool_size == 8)) {
1450 		if (total_memory < dram_size_seven_GB)
1451 			goto def_value1;
1452 	} else {
1453 		DRM_WARN("Smu memory pool size not supported\n");
1454 		goto def_value;
1455 	}
1456 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1457 
1458 	return;
1459 
1460 def_value1:
1461 	DRM_WARN("No enough system memory\n");
1462 def_value:
1463 	adev->pm.smu_prv_buffer_size = 0;
1464 }
1465 
1466 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1467 {
1468 	if (!(adev->flags & AMD_IS_APU) ||
1469 	    adev->asic_type < CHIP_RAVEN)
1470 		return 0;
1471 
1472 	switch (adev->asic_type) {
1473 	case CHIP_RAVEN:
1474 		if (adev->pdev->device == 0x15dd)
1475 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1476 		if (adev->pdev->device == 0x15d8)
1477 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1478 		break;
1479 	case CHIP_RENOIR:
1480 		if ((adev->pdev->device == 0x1636) ||
1481 		    (adev->pdev->device == 0x164c))
1482 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1483 		else
1484 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1485 		break;
1486 	case CHIP_VANGOGH:
1487 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1488 		break;
1489 	case CHIP_YELLOW_CARP:
1490 		break;
1491 	case CHIP_CYAN_SKILLFISH:
1492 		if ((adev->pdev->device == 0x13FE) ||
1493 		    (adev->pdev->device == 0x143F))
1494 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1495 		break;
1496 	default:
1497 		break;
1498 	}
1499 
1500 	return 0;
1501 }
1502 
1503 /**
1504  * amdgpu_device_check_arguments - validate module params
1505  *
1506  * @adev: amdgpu_device pointer
1507  *
1508  * Validates certain module parameters and updates
1509  * the associated values used by the driver (all asics).
1510  */
1511 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1512 {
1513 	if (amdgpu_sched_jobs < 4) {
1514 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1515 			 amdgpu_sched_jobs);
1516 		amdgpu_sched_jobs = 4;
1517 	} else if (!is_power_of_2(amdgpu_sched_jobs)){
1518 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1519 			 amdgpu_sched_jobs);
1520 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1521 	}
1522 
1523 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1524 		/* gart size must be greater or equal to 32M */
1525 		dev_warn(adev->dev, "gart size (%d) too small\n",
1526 			 amdgpu_gart_size);
1527 		amdgpu_gart_size = -1;
1528 	}
1529 
1530 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1531 		/* gtt size must be greater or equal to 32M */
1532 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1533 				 amdgpu_gtt_size);
1534 		amdgpu_gtt_size = -1;
1535 	}
1536 
1537 	/* valid range is between 4 and 9 inclusive */
1538 	if (amdgpu_vm_fragment_size != -1 &&
1539 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1540 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1541 		amdgpu_vm_fragment_size = -1;
1542 	}
1543 
1544 	if (amdgpu_sched_hw_submission < 2) {
1545 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1546 			 amdgpu_sched_hw_submission);
1547 		amdgpu_sched_hw_submission = 2;
1548 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1549 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1550 			 amdgpu_sched_hw_submission);
1551 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1552 	}
1553 
1554 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1555 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1556 		amdgpu_reset_method = -1;
1557 	}
1558 
1559 	amdgpu_device_check_smu_prv_buffer_size(adev);
1560 
1561 	amdgpu_device_check_vm_size(adev);
1562 
1563 	amdgpu_device_check_block_size(adev);
1564 
1565 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1566 
1567 	return 0;
1568 }
1569 
1570 /**
1571  * amdgpu_switcheroo_set_state - set switcheroo state
1572  *
1573  * @pdev: pci dev pointer
1574  * @state: vga_switcheroo state
1575  *
1576  * Callback for the switcheroo driver.  Suspends or resumes
1577  * the asics before or after it is powered up using ACPI methods.
1578  */
1579 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1580 					enum vga_switcheroo_state state)
1581 {
1582 	struct drm_device *dev = pci_get_drvdata(pdev);
1583 	int r;
1584 
1585 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1586 		return;
1587 
1588 	if (state == VGA_SWITCHEROO_ON) {
1589 		pr_info("switched on\n");
1590 		/* don't suspend or resume card normally */
1591 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1592 
1593 		pci_set_power_state(pdev, PCI_D0);
1594 		amdgpu_device_load_pci_state(pdev);
1595 		r = pci_enable_device(pdev);
1596 		if (r)
1597 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1598 		amdgpu_device_resume(dev, true);
1599 
1600 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1601 	} else {
1602 		pr_info("switched off\n");
1603 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1604 		amdgpu_device_suspend(dev, true);
1605 		amdgpu_device_cache_pci_state(pdev);
1606 		/* Shut down the device */
1607 		pci_disable_device(pdev);
1608 		pci_set_power_state(pdev, PCI_D3cold);
1609 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1610 	}
1611 }
1612 
1613 /**
1614  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1615  *
1616  * @pdev: pci dev pointer
1617  *
1618  * Callback for the switcheroo driver.  Check of the switcheroo
1619  * state can be changed.
1620  * Returns true if the state can be changed, false if not.
1621  */
1622 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1623 {
1624 	struct drm_device *dev = pci_get_drvdata(pdev);
1625 
1626 	/*
1627 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1628 	* locking inversion with the driver load path. And the access here is
1629 	* completely racy anyway. So don't bother with locking for now.
1630 	*/
1631 	return atomic_read(&dev->open_count) == 0;
1632 }
1633 
1634 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1635 	.set_gpu_state = amdgpu_switcheroo_set_state,
1636 	.reprobe = NULL,
1637 	.can_switch = amdgpu_switcheroo_can_switch,
1638 };
1639 
1640 /**
1641  * amdgpu_device_ip_set_clockgating_state - set the CG state
1642  *
1643  * @dev: amdgpu_device pointer
1644  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1645  * @state: clockgating state (gate or ungate)
1646  *
1647  * Sets the requested clockgating state for all instances of
1648  * the hardware IP specified.
1649  * Returns the error code from the last instance.
1650  */
1651 int amdgpu_device_ip_set_clockgating_state(void *dev,
1652 					   enum amd_ip_block_type block_type,
1653 					   enum amd_clockgating_state state)
1654 {
1655 	struct amdgpu_device *adev = dev;
1656 	int i, r = 0;
1657 
1658 	for (i = 0; i < adev->num_ip_blocks; i++) {
1659 		if (!adev->ip_blocks[i].status.valid)
1660 			continue;
1661 		if (adev->ip_blocks[i].version->type != block_type)
1662 			continue;
1663 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1664 			continue;
1665 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1666 			(void *)adev, state);
1667 		if (r)
1668 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1669 				  adev->ip_blocks[i].version->funcs->name, r);
1670 	}
1671 	return r;
1672 }
1673 
1674 /**
1675  * amdgpu_device_ip_set_powergating_state - set the PG state
1676  *
1677  * @dev: amdgpu_device pointer
1678  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1679  * @state: powergating state (gate or ungate)
1680  *
1681  * Sets the requested powergating state for all instances of
1682  * the hardware IP specified.
1683  * Returns the error code from the last instance.
1684  */
1685 int amdgpu_device_ip_set_powergating_state(void *dev,
1686 					   enum amd_ip_block_type block_type,
1687 					   enum amd_powergating_state state)
1688 {
1689 	struct amdgpu_device *adev = dev;
1690 	int i, r = 0;
1691 
1692 	for (i = 0; i < adev->num_ip_blocks; i++) {
1693 		if (!adev->ip_blocks[i].status.valid)
1694 			continue;
1695 		if (adev->ip_blocks[i].version->type != block_type)
1696 			continue;
1697 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1698 			continue;
1699 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1700 			(void *)adev, state);
1701 		if (r)
1702 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1703 				  adev->ip_blocks[i].version->funcs->name, r);
1704 	}
1705 	return r;
1706 }
1707 
1708 /**
1709  * amdgpu_device_ip_get_clockgating_state - get the CG state
1710  *
1711  * @adev: amdgpu_device pointer
1712  * @flags: clockgating feature flags
1713  *
1714  * Walks the list of IPs on the device and updates the clockgating
1715  * flags for each IP.
1716  * Updates @flags with the feature flags for each hardware IP where
1717  * clockgating is enabled.
1718  */
1719 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1720 					    u64 *flags)
1721 {
1722 	int i;
1723 
1724 	for (i = 0; i < adev->num_ip_blocks; i++) {
1725 		if (!adev->ip_blocks[i].status.valid)
1726 			continue;
1727 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1728 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1729 	}
1730 }
1731 
1732 /**
1733  * amdgpu_device_ip_wait_for_idle - wait for idle
1734  *
1735  * @adev: amdgpu_device pointer
1736  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1737  *
1738  * Waits for the request hardware IP to be idle.
1739  * Returns 0 for success or a negative error code on failure.
1740  */
1741 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1742 				   enum amd_ip_block_type block_type)
1743 {
1744 	int i, r;
1745 
1746 	for (i = 0; i < adev->num_ip_blocks; i++) {
1747 		if (!adev->ip_blocks[i].status.valid)
1748 			continue;
1749 		if (adev->ip_blocks[i].version->type == block_type) {
1750 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1751 			if (r)
1752 				return r;
1753 			break;
1754 		}
1755 	}
1756 	return 0;
1757 
1758 }
1759 
1760 /**
1761  * amdgpu_device_ip_is_idle - is the hardware IP idle
1762  *
1763  * @adev: amdgpu_device pointer
1764  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1765  *
1766  * Check if the hardware IP is idle or not.
1767  * Returns true if it the IP is idle, false if not.
1768  */
1769 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1770 			      enum amd_ip_block_type block_type)
1771 {
1772 	int i;
1773 
1774 	for (i = 0; i < adev->num_ip_blocks; i++) {
1775 		if (!adev->ip_blocks[i].status.valid)
1776 			continue;
1777 		if (adev->ip_blocks[i].version->type == block_type)
1778 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1779 	}
1780 	return true;
1781 
1782 }
1783 
1784 /**
1785  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1786  *
1787  * @adev: amdgpu_device pointer
1788  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1789  *
1790  * Returns a pointer to the hardware IP block structure
1791  * if it exists for the asic, otherwise NULL.
1792  */
1793 struct amdgpu_ip_block *
1794 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1795 			      enum amd_ip_block_type type)
1796 {
1797 	int i;
1798 
1799 	for (i = 0; i < adev->num_ip_blocks; i++)
1800 		if (adev->ip_blocks[i].version->type == type)
1801 			return &adev->ip_blocks[i];
1802 
1803 	return NULL;
1804 }
1805 
1806 /**
1807  * amdgpu_device_ip_block_version_cmp
1808  *
1809  * @adev: amdgpu_device pointer
1810  * @type: enum amd_ip_block_type
1811  * @major: major version
1812  * @minor: minor version
1813  *
1814  * return 0 if equal or greater
1815  * return 1 if smaller or the ip_block doesn't exist
1816  */
1817 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1818 				       enum amd_ip_block_type type,
1819 				       u32 major, u32 minor)
1820 {
1821 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1822 
1823 	if (ip_block && ((ip_block->version->major > major) ||
1824 			((ip_block->version->major == major) &&
1825 			(ip_block->version->minor >= minor))))
1826 		return 0;
1827 
1828 	return 1;
1829 }
1830 
1831 /**
1832  * amdgpu_device_ip_block_add
1833  *
1834  * @adev: amdgpu_device pointer
1835  * @ip_block_version: pointer to the IP to add
1836  *
1837  * Adds the IP block driver information to the collection of IPs
1838  * on the asic.
1839  */
1840 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1841 			       const struct amdgpu_ip_block_version *ip_block_version)
1842 {
1843 	if (!ip_block_version)
1844 		return -EINVAL;
1845 
1846 	switch (ip_block_version->type) {
1847 	case AMD_IP_BLOCK_TYPE_VCN:
1848 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1849 			return 0;
1850 		break;
1851 	case AMD_IP_BLOCK_TYPE_JPEG:
1852 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1853 			return 0;
1854 		break;
1855 	default:
1856 		break;
1857 	}
1858 
1859 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1860 		  ip_block_version->funcs->name);
1861 
1862 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1863 
1864 	return 0;
1865 }
1866 
1867 /**
1868  * amdgpu_device_enable_virtual_display - enable virtual display feature
1869  *
1870  * @adev: amdgpu_device pointer
1871  *
1872  * Enabled the virtual display feature if the user has enabled it via
1873  * the module parameter virtual_display.  This feature provides a virtual
1874  * display hardware on headless boards or in virtualized environments.
1875  * This function parses and validates the configuration string specified by
1876  * the user and configues the virtual display configuration (number of
1877  * virtual connectors, crtcs, etc.) specified.
1878  */
1879 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1880 {
1881 	adev->enable_virtual_display = false;
1882 
1883 	if (amdgpu_virtual_display) {
1884 		const char *pci_address_name = pci_name(adev->pdev);
1885 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1886 
1887 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1888 		pciaddstr_tmp = pciaddstr;
1889 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1890 			pciaddname = strsep(&pciaddname_tmp, ",");
1891 			if (!strcmp("all", pciaddname)
1892 			    || !strcmp(pci_address_name, pciaddname)) {
1893 				long num_crtc;
1894 				int res = -1;
1895 
1896 				adev->enable_virtual_display = true;
1897 
1898 				if (pciaddname_tmp)
1899 					res = kstrtol(pciaddname_tmp, 10,
1900 						      &num_crtc);
1901 
1902 				if (!res) {
1903 					if (num_crtc < 1)
1904 						num_crtc = 1;
1905 					if (num_crtc > 6)
1906 						num_crtc = 6;
1907 					adev->mode_info.num_crtc = num_crtc;
1908 				} else {
1909 					adev->mode_info.num_crtc = 1;
1910 				}
1911 				break;
1912 			}
1913 		}
1914 
1915 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1916 			 amdgpu_virtual_display, pci_address_name,
1917 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1918 
1919 		kfree(pciaddstr);
1920 	}
1921 }
1922 
1923 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1924 {
1925 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1926 		adev->mode_info.num_crtc = 1;
1927 		adev->enable_virtual_display = true;
1928 		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1929 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1930 	}
1931 }
1932 
1933 /**
1934  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1935  *
1936  * @adev: amdgpu_device pointer
1937  *
1938  * Parses the asic configuration parameters specified in the gpu info
1939  * firmware and makes them availale to the driver for use in configuring
1940  * the asic.
1941  * Returns 0 on success, -EINVAL on failure.
1942  */
1943 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1944 {
1945 	const char *chip_name;
1946 	char fw_name[40];
1947 	int err;
1948 	const struct gpu_info_firmware_header_v1_0 *hdr;
1949 
1950 	adev->firmware.gpu_info_fw = NULL;
1951 
1952 	if (adev->mman.discovery_bin) {
1953 		/*
1954 		 * FIXME: The bounding box is still needed by Navi12, so
1955 		 * temporarily read it from gpu_info firmware. Should be dropped
1956 		 * when DAL no longer needs it.
1957 		 */
1958 		if (adev->asic_type != CHIP_NAVI12)
1959 			return 0;
1960 	}
1961 
1962 	switch (adev->asic_type) {
1963 	default:
1964 		return 0;
1965 	case CHIP_VEGA10:
1966 		chip_name = "vega10";
1967 		break;
1968 	case CHIP_VEGA12:
1969 		chip_name = "vega12";
1970 		break;
1971 	case CHIP_RAVEN:
1972 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1973 			chip_name = "raven2";
1974 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1975 			chip_name = "picasso";
1976 		else
1977 			chip_name = "raven";
1978 		break;
1979 	case CHIP_ARCTURUS:
1980 		chip_name = "arcturus";
1981 		break;
1982 	case CHIP_NAVI12:
1983 		chip_name = "navi12";
1984 		break;
1985 	}
1986 
1987 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1988 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
1989 	if (err) {
1990 		dev_err(adev->dev,
1991 			"Failed to get gpu_info firmware \"%s\"\n",
1992 			fw_name);
1993 		goto out;
1994 	}
1995 
1996 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1997 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1998 
1999 	switch (hdr->version_major) {
2000 	case 1:
2001 	{
2002 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2003 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2004 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2005 
2006 		/*
2007 		 * Should be droped when DAL no longer needs it.
2008 		 */
2009 		if (adev->asic_type == CHIP_NAVI12)
2010 			goto parse_soc_bounding_box;
2011 
2012 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2013 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2014 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2015 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2016 		adev->gfx.config.max_texture_channel_caches =
2017 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2018 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2019 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2020 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2021 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2022 		adev->gfx.config.double_offchip_lds_buf =
2023 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2024 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2025 		adev->gfx.cu_info.max_waves_per_simd =
2026 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2027 		adev->gfx.cu_info.max_scratch_slots_per_cu =
2028 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2029 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2030 		if (hdr->version_minor >= 1) {
2031 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2032 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2033 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2034 			adev->gfx.config.num_sc_per_sh =
2035 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2036 			adev->gfx.config.num_packer_per_sc =
2037 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2038 		}
2039 
2040 parse_soc_bounding_box:
2041 		/*
2042 		 * soc bounding box info is not integrated in disocovery table,
2043 		 * we always need to parse it from gpu info firmware if needed.
2044 		 */
2045 		if (hdr->version_minor == 2) {
2046 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2047 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2048 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2049 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2050 		}
2051 		break;
2052 	}
2053 	default:
2054 		dev_err(adev->dev,
2055 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2056 		err = -EINVAL;
2057 		goto out;
2058 	}
2059 out:
2060 	return err;
2061 }
2062 
2063 /**
2064  * amdgpu_device_ip_early_init - run early init for hardware IPs
2065  *
2066  * @adev: amdgpu_device pointer
2067  *
2068  * Early initialization pass for hardware IPs.  The hardware IPs that make
2069  * up each asic are discovered each IP's early_init callback is run.  This
2070  * is the first stage in initializing the asic.
2071  * Returns 0 on success, negative error code on failure.
2072  */
2073 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2074 {
2075 	struct drm_device *dev = adev_to_drm(adev);
2076 	struct pci_dev *parent;
2077 	int i, r;
2078 	bool total;
2079 
2080 	amdgpu_device_enable_virtual_display(adev);
2081 
2082 	if (amdgpu_sriov_vf(adev)) {
2083 		r = amdgpu_virt_request_full_gpu(adev, true);
2084 		if (r)
2085 			return r;
2086 	}
2087 
2088 	switch (adev->asic_type) {
2089 #ifdef CONFIG_DRM_AMDGPU_SI
2090 	case CHIP_VERDE:
2091 	case CHIP_TAHITI:
2092 	case CHIP_PITCAIRN:
2093 	case CHIP_OLAND:
2094 	case CHIP_HAINAN:
2095 		adev->family = AMDGPU_FAMILY_SI;
2096 		r = si_set_ip_blocks(adev);
2097 		if (r)
2098 			return r;
2099 		break;
2100 #endif
2101 #ifdef CONFIG_DRM_AMDGPU_CIK
2102 	case CHIP_BONAIRE:
2103 	case CHIP_HAWAII:
2104 	case CHIP_KAVERI:
2105 	case CHIP_KABINI:
2106 	case CHIP_MULLINS:
2107 		if (adev->flags & AMD_IS_APU)
2108 			adev->family = AMDGPU_FAMILY_KV;
2109 		else
2110 			adev->family = AMDGPU_FAMILY_CI;
2111 
2112 		r = cik_set_ip_blocks(adev);
2113 		if (r)
2114 			return r;
2115 		break;
2116 #endif
2117 	case CHIP_TOPAZ:
2118 	case CHIP_TONGA:
2119 	case CHIP_FIJI:
2120 	case CHIP_POLARIS10:
2121 	case CHIP_POLARIS11:
2122 	case CHIP_POLARIS12:
2123 	case CHIP_VEGAM:
2124 	case CHIP_CARRIZO:
2125 	case CHIP_STONEY:
2126 		if (adev->flags & AMD_IS_APU)
2127 			adev->family = AMDGPU_FAMILY_CZ;
2128 		else
2129 			adev->family = AMDGPU_FAMILY_VI;
2130 
2131 		r = vi_set_ip_blocks(adev);
2132 		if (r)
2133 			return r;
2134 		break;
2135 	default:
2136 		r = amdgpu_discovery_set_ip_blocks(adev);
2137 		if (r)
2138 			return r;
2139 		break;
2140 	}
2141 
2142 	if (amdgpu_has_atpx() &&
2143 	    (amdgpu_is_atpx_hybrid() ||
2144 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2145 	    ((adev->flags & AMD_IS_APU) == 0) &&
2146 	    !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2147 		adev->flags |= AMD_IS_PX;
2148 
2149 	if (!(adev->flags & AMD_IS_APU)) {
2150 		parent = pci_upstream_bridge(adev->pdev);
2151 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2152 	}
2153 
2154 	amdgpu_amdkfd_device_probe(adev);
2155 
2156 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2157 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2158 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2159 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2160 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2161 
2162 	total = true;
2163 	for (i = 0; i < adev->num_ip_blocks; i++) {
2164 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2165 			DRM_ERROR("disabled ip block: %d <%s>\n",
2166 				  i, adev->ip_blocks[i].version->funcs->name);
2167 			adev->ip_blocks[i].status.valid = false;
2168 		} else {
2169 			if (adev->ip_blocks[i].version->funcs->early_init) {
2170 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2171 				if (r == -ENOENT) {
2172 					adev->ip_blocks[i].status.valid = false;
2173 				} else if (r) {
2174 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2175 						  adev->ip_blocks[i].version->funcs->name, r);
2176 					total = false;
2177 				} else {
2178 					adev->ip_blocks[i].status.valid = true;
2179 				}
2180 			} else {
2181 				adev->ip_blocks[i].status.valid = true;
2182 			}
2183 		}
2184 		/* get the vbios after the asic_funcs are set up */
2185 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2186 			r = amdgpu_device_parse_gpu_info_fw(adev);
2187 			if (r)
2188 				return r;
2189 
2190 			/* Read BIOS */
2191 			if (!amdgpu_get_bios(adev))
2192 				return -EINVAL;
2193 
2194 			r = amdgpu_atombios_init(adev);
2195 			if (r) {
2196 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2197 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2198 				return r;
2199 			}
2200 
2201 			/*get pf2vf msg info at it's earliest time*/
2202 			if (amdgpu_sriov_vf(adev))
2203 				amdgpu_virt_init_data_exchange(adev);
2204 
2205 		}
2206 	}
2207 	if (!total)
2208 		return -ENODEV;
2209 
2210 	adev->cg_flags &= amdgpu_cg_mask;
2211 	adev->pg_flags &= amdgpu_pg_mask;
2212 
2213 	return 0;
2214 }
2215 
2216 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2217 {
2218 	int i, r;
2219 
2220 	for (i = 0; i < adev->num_ip_blocks; i++) {
2221 		if (!adev->ip_blocks[i].status.sw)
2222 			continue;
2223 		if (adev->ip_blocks[i].status.hw)
2224 			continue;
2225 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2226 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2227 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2228 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2229 			if (r) {
2230 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2231 					  adev->ip_blocks[i].version->funcs->name, r);
2232 				return r;
2233 			}
2234 			adev->ip_blocks[i].status.hw = true;
2235 		}
2236 	}
2237 
2238 	return 0;
2239 }
2240 
2241 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2242 {
2243 	int i, r;
2244 
2245 	for (i = 0; i < adev->num_ip_blocks; i++) {
2246 		if (!adev->ip_blocks[i].status.sw)
2247 			continue;
2248 		if (adev->ip_blocks[i].status.hw)
2249 			continue;
2250 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2251 		if (r) {
2252 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2253 				  adev->ip_blocks[i].version->funcs->name, r);
2254 			return r;
2255 		}
2256 		adev->ip_blocks[i].status.hw = true;
2257 	}
2258 
2259 	return 0;
2260 }
2261 
2262 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2263 {
2264 	int r = 0;
2265 	int i;
2266 	uint32_t smu_version;
2267 
2268 	if (adev->asic_type >= CHIP_VEGA10) {
2269 		for (i = 0; i < adev->num_ip_blocks; i++) {
2270 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2271 				continue;
2272 
2273 			if (!adev->ip_blocks[i].status.sw)
2274 				continue;
2275 
2276 			/* no need to do the fw loading again if already done*/
2277 			if (adev->ip_blocks[i].status.hw == true)
2278 				break;
2279 
2280 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2281 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2282 				if (r) {
2283 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2284 							  adev->ip_blocks[i].version->funcs->name, r);
2285 					return r;
2286 				}
2287 			} else {
2288 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2289 				if (r) {
2290 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2291 							  adev->ip_blocks[i].version->funcs->name, r);
2292 					return r;
2293 				}
2294 			}
2295 
2296 			adev->ip_blocks[i].status.hw = true;
2297 			break;
2298 		}
2299 	}
2300 
2301 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2302 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2303 
2304 	return r;
2305 }
2306 
2307 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2308 {
2309 	long timeout;
2310 	int r, i;
2311 
2312 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2313 		struct amdgpu_ring *ring = adev->rings[i];
2314 
2315 		/* No need to setup the GPU scheduler for rings that don't need it */
2316 		if (!ring || ring->no_scheduler)
2317 			continue;
2318 
2319 		switch (ring->funcs->type) {
2320 		case AMDGPU_RING_TYPE_GFX:
2321 			timeout = adev->gfx_timeout;
2322 			break;
2323 		case AMDGPU_RING_TYPE_COMPUTE:
2324 			timeout = adev->compute_timeout;
2325 			break;
2326 		case AMDGPU_RING_TYPE_SDMA:
2327 			timeout = adev->sdma_timeout;
2328 			break;
2329 		default:
2330 			timeout = adev->video_timeout;
2331 			break;
2332 		}
2333 
2334 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2335 				   ring->num_hw_submission, amdgpu_job_hang_limit,
2336 				   timeout, adev->reset_domain->wq,
2337 				   ring->sched_score, ring->name,
2338 				   adev->dev);
2339 		if (r) {
2340 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2341 				  ring->name);
2342 			return r;
2343 		}
2344 	}
2345 
2346 	return 0;
2347 }
2348 
2349 
2350 /**
2351  * amdgpu_device_ip_init - run init for hardware IPs
2352  *
2353  * @adev: amdgpu_device pointer
2354  *
2355  * Main initialization pass for hardware IPs.  The list of all the hardware
2356  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2357  * are run.  sw_init initializes the software state associated with each IP
2358  * and hw_init initializes the hardware associated with each IP.
2359  * Returns 0 on success, negative error code on failure.
2360  */
2361 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2362 {
2363 	int i, r;
2364 
2365 	r = amdgpu_ras_init(adev);
2366 	if (r)
2367 		return r;
2368 
2369 	for (i = 0; i < adev->num_ip_blocks; i++) {
2370 		if (!adev->ip_blocks[i].status.valid)
2371 			continue;
2372 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2373 		if (r) {
2374 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2375 				  adev->ip_blocks[i].version->funcs->name, r);
2376 			goto init_failed;
2377 		}
2378 		adev->ip_blocks[i].status.sw = true;
2379 
2380 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2381 			/* need to do common hw init early so everything is set up for gmc */
2382 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2383 			if (r) {
2384 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2385 				goto init_failed;
2386 			}
2387 			adev->ip_blocks[i].status.hw = true;
2388 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2389 			/* need to do gmc hw init early so we can allocate gpu mem */
2390 			/* Try to reserve bad pages early */
2391 			if (amdgpu_sriov_vf(adev))
2392 				amdgpu_virt_exchange_data(adev);
2393 
2394 			r = amdgpu_device_mem_scratch_init(adev);
2395 			if (r) {
2396 				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2397 				goto init_failed;
2398 			}
2399 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2400 			if (r) {
2401 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2402 				goto init_failed;
2403 			}
2404 			r = amdgpu_device_wb_init(adev);
2405 			if (r) {
2406 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2407 				goto init_failed;
2408 			}
2409 			adev->ip_blocks[i].status.hw = true;
2410 
2411 			/* right after GMC hw init, we create CSA */
2412 			if (amdgpu_mcbp) {
2413 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2414 							       AMDGPU_GEM_DOMAIN_VRAM |
2415 							       AMDGPU_GEM_DOMAIN_GTT,
2416 							       AMDGPU_CSA_SIZE);
2417 				if (r) {
2418 					DRM_ERROR("allocate CSA failed %d\n", r);
2419 					goto init_failed;
2420 				}
2421 			}
2422 		}
2423 	}
2424 
2425 	if (amdgpu_sriov_vf(adev))
2426 		amdgpu_virt_init_data_exchange(adev);
2427 
2428 	r = amdgpu_ib_pool_init(adev);
2429 	if (r) {
2430 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2431 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2432 		goto init_failed;
2433 	}
2434 
2435 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2436 	if (r)
2437 		goto init_failed;
2438 
2439 	r = amdgpu_device_ip_hw_init_phase1(adev);
2440 	if (r)
2441 		goto init_failed;
2442 
2443 	r = amdgpu_device_fw_loading(adev);
2444 	if (r)
2445 		goto init_failed;
2446 
2447 	r = amdgpu_device_ip_hw_init_phase2(adev);
2448 	if (r)
2449 		goto init_failed;
2450 
2451 	/*
2452 	 * retired pages will be loaded from eeprom and reserved here,
2453 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2454 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2455 	 * for I2C communication which only true at this point.
2456 	 *
2457 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2458 	 * failure from bad gpu situation and stop amdgpu init process
2459 	 * accordingly. For other failed cases, it will still release all
2460 	 * the resource and print error message, rather than returning one
2461 	 * negative value to upper level.
2462 	 *
2463 	 * Note: theoretically, this should be called before all vram allocations
2464 	 * to protect retired page from abusing
2465 	 */
2466 	r = amdgpu_ras_recovery_init(adev);
2467 	if (r)
2468 		goto init_failed;
2469 
2470 	/**
2471 	 * In case of XGMI grab extra reference for reset domain for this device
2472 	 */
2473 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2474 		if (amdgpu_xgmi_add_device(adev) == 0) {
2475 			if (!amdgpu_sriov_vf(adev)) {
2476 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2477 
2478 				if (WARN_ON(!hive)) {
2479 					r = -ENOENT;
2480 					goto init_failed;
2481 				}
2482 
2483 				if (!hive->reset_domain ||
2484 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2485 					r = -ENOENT;
2486 					amdgpu_put_xgmi_hive(hive);
2487 					goto init_failed;
2488 				}
2489 
2490 				/* Drop the early temporary reset domain we created for device */
2491 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2492 				adev->reset_domain = hive->reset_domain;
2493 				amdgpu_put_xgmi_hive(hive);
2494 			}
2495 		}
2496 	}
2497 
2498 	r = amdgpu_device_init_schedulers(adev);
2499 	if (r)
2500 		goto init_failed;
2501 
2502 	/* Don't init kfd if whole hive need to be reset during init */
2503 	if (!adev->gmc.xgmi.pending_reset)
2504 		amdgpu_amdkfd_device_init(adev);
2505 
2506 	amdgpu_fru_get_product_info(adev);
2507 
2508 init_failed:
2509 	if (amdgpu_sriov_vf(adev))
2510 		amdgpu_virt_release_full_gpu(adev, true);
2511 
2512 	return r;
2513 }
2514 
2515 /**
2516  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2517  *
2518  * @adev: amdgpu_device pointer
2519  *
2520  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2521  * this function before a GPU reset.  If the value is retained after a
2522  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2523  */
2524 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2525 {
2526 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2527 }
2528 
2529 /**
2530  * amdgpu_device_check_vram_lost - check if vram is valid
2531  *
2532  * @adev: amdgpu_device pointer
2533  *
2534  * Checks the reset magic value written to the gart pointer in VRAM.
2535  * The driver calls this after a GPU reset to see if the contents of
2536  * VRAM is lost or now.
2537  * returns true if vram is lost, false if not.
2538  */
2539 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2540 {
2541 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2542 			AMDGPU_RESET_MAGIC_NUM))
2543 		return true;
2544 
2545 	if (!amdgpu_in_reset(adev))
2546 		return false;
2547 
2548 	/*
2549 	 * For all ASICs with baco/mode1 reset, the VRAM is
2550 	 * always assumed to be lost.
2551 	 */
2552 	switch (amdgpu_asic_reset_method(adev)) {
2553 	case AMD_RESET_METHOD_BACO:
2554 	case AMD_RESET_METHOD_MODE1:
2555 		return true;
2556 	default:
2557 		return false;
2558 	}
2559 }
2560 
2561 /**
2562  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2563  *
2564  * @adev: amdgpu_device pointer
2565  * @state: clockgating state (gate or ungate)
2566  *
2567  * The list of all the hardware IPs that make up the asic is walked and the
2568  * set_clockgating_state callbacks are run.
2569  * Late initialization pass enabling clockgating for hardware IPs.
2570  * Fini or suspend, pass disabling clockgating for hardware IPs.
2571  * Returns 0 on success, negative error code on failure.
2572  */
2573 
2574 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2575 			       enum amd_clockgating_state state)
2576 {
2577 	int i, j, r;
2578 
2579 	if (amdgpu_emu_mode == 1)
2580 		return 0;
2581 
2582 	for (j = 0; j < adev->num_ip_blocks; j++) {
2583 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2584 		if (!adev->ip_blocks[i].status.late_initialized)
2585 			continue;
2586 		/* skip CG for GFX, SDMA on S0ix */
2587 		if (adev->in_s0ix &&
2588 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2589 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2590 			continue;
2591 		/* skip CG for VCE/UVD, it's handled specially */
2592 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2593 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2594 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2595 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2596 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2597 			/* enable clockgating to save power */
2598 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2599 										     state);
2600 			if (r) {
2601 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2602 					  adev->ip_blocks[i].version->funcs->name, r);
2603 				return r;
2604 			}
2605 		}
2606 	}
2607 
2608 	return 0;
2609 }
2610 
2611 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2612 			       enum amd_powergating_state state)
2613 {
2614 	int i, j, r;
2615 
2616 	if (amdgpu_emu_mode == 1)
2617 		return 0;
2618 
2619 	for (j = 0; j < adev->num_ip_blocks; j++) {
2620 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2621 		if (!adev->ip_blocks[i].status.late_initialized)
2622 			continue;
2623 		/* skip PG for GFX, SDMA on S0ix */
2624 		if (adev->in_s0ix &&
2625 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2626 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2627 			continue;
2628 		/* skip CG for VCE/UVD, it's handled specially */
2629 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2630 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2631 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2632 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2633 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2634 			/* enable powergating to save power */
2635 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2636 											state);
2637 			if (r) {
2638 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2639 					  adev->ip_blocks[i].version->funcs->name, r);
2640 				return r;
2641 			}
2642 		}
2643 	}
2644 	return 0;
2645 }
2646 
2647 static int amdgpu_device_enable_mgpu_fan_boost(void)
2648 {
2649 	struct amdgpu_gpu_instance *gpu_ins;
2650 	struct amdgpu_device *adev;
2651 	int i, ret = 0;
2652 
2653 	mutex_lock(&mgpu_info.mutex);
2654 
2655 	/*
2656 	 * MGPU fan boost feature should be enabled
2657 	 * only when there are two or more dGPUs in
2658 	 * the system
2659 	 */
2660 	if (mgpu_info.num_dgpu < 2)
2661 		goto out;
2662 
2663 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2664 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2665 		adev = gpu_ins->adev;
2666 		if (!(adev->flags & AMD_IS_APU) &&
2667 		    !gpu_ins->mgpu_fan_enabled) {
2668 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2669 			if (ret)
2670 				break;
2671 
2672 			gpu_ins->mgpu_fan_enabled = 1;
2673 		}
2674 	}
2675 
2676 out:
2677 	mutex_unlock(&mgpu_info.mutex);
2678 
2679 	return ret;
2680 }
2681 
2682 /**
2683  * amdgpu_device_ip_late_init - run late init for hardware IPs
2684  *
2685  * @adev: amdgpu_device pointer
2686  *
2687  * Late initialization pass for hardware IPs.  The list of all the hardware
2688  * IPs that make up the asic is walked and the late_init callbacks are run.
2689  * late_init covers any special initialization that an IP requires
2690  * after all of the have been initialized or something that needs to happen
2691  * late in the init process.
2692  * Returns 0 on success, negative error code on failure.
2693  */
2694 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2695 {
2696 	struct amdgpu_gpu_instance *gpu_instance;
2697 	int i = 0, r;
2698 
2699 	for (i = 0; i < adev->num_ip_blocks; i++) {
2700 		if (!adev->ip_blocks[i].status.hw)
2701 			continue;
2702 		if (adev->ip_blocks[i].version->funcs->late_init) {
2703 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2704 			if (r) {
2705 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
2706 					  adev->ip_blocks[i].version->funcs->name, r);
2707 				return r;
2708 			}
2709 		}
2710 		adev->ip_blocks[i].status.late_initialized = true;
2711 	}
2712 
2713 	r = amdgpu_ras_late_init(adev);
2714 	if (r) {
2715 		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2716 		return r;
2717 	}
2718 
2719 	amdgpu_ras_set_error_query_ready(adev, true);
2720 
2721 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2722 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2723 
2724 	amdgpu_device_fill_reset_magic(adev);
2725 
2726 	r = amdgpu_device_enable_mgpu_fan_boost();
2727 	if (r)
2728 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2729 
2730 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2731 	if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2732 			       adev->asic_type == CHIP_ALDEBARAN ))
2733 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
2734 
2735 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2736 		mutex_lock(&mgpu_info.mutex);
2737 
2738 		/*
2739 		 * Reset device p-state to low as this was booted with high.
2740 		 *
2741 		 * This should be performed only after all devices from the same
2742 		 * hive get initialized.
2743 		 *
2744 		 * However, it's unknown how many device in the hive in advance.
2745 		 * As this is counted one by one during devices initializations.
2746 		 *
2747 		 * So, we wait for all XGMI interlinked devices initialized.
2748 		 * This may bring some delays as those devices may come from
2749 		 * different hives. But that should be OK.
2750 		 */
2751 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2752 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2753 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2754 				if (gpu_instance->adev->flags & AMD_IS_APU)
2755 					continue;
2756 
2757 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2758 						AMDGPU_XGMI_PSTATE_MIN);
2759 				if (r) {
2760 					DRM_ERROR("pstate setting failed (%d).\n", r);
2761 					break;
2762 				}
2763 			}
2764 		}
2765 
2766 		mutex_unlock(&mgpu_info.mutex);
2767 	}
2768 
2769 	return 0;
2770 }
2771 
2772 /**
2773  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2774  *
2775  * @adev: amdgpu_device pointer
2776  *
2777  * For ASICs need to disable SMC first
2778  */
2779 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2780 {
2781 	int i, r;
2782 
2783 	if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2784 		return;
2785 
2786 	for (i = 0; i < adev->num_ip_blocks; i++) {
2787 		if (!adev->ip_blocks[i].status.hw)
2788 			continue;
2789 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2790 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2791 			/* XXX handle errors */
2792 			if (r) {
2793 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2794 					  adev->ip_blocks[i].version->funcs->name, r);
2795 			}
2796 			adev->ip_blocks[i].status.hw = false;
2797 			break;
2798 		}
2799 	}
2800 }
2801 
2802 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2803 {
2804 	int i, r;
2805 
2806 	for (i = 0; i < adev->num_ip_blocks; i++) {
2807 		if (!adev->ip_blocks[i].version->funcs->early_fini)
2808 			continue;
2809 
2810 		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2811 		if (r) {
2812 			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2813 				  adev->ip_blocks[i].version->funcs->name, r);
2814 		}
2815 	}
2816 
2817 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2818 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2819 
2820 	amdgpu_amdkfd_suspend(adev, false);
2821 
2822 	/* Workaroud for ASICs need to disable SMC first */
2823 	amdgpu_device_smu_fini_early(adev);
2824 
2825 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2826 		if (!adev->ip_blocks[i].status.hw)
2827 			continue;
2828 
2829 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2830 		/* XXX handle errors */
2831 		if (r) {
2832 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2833 				  adev->ip_blocks[i].version->funcs->name, r);
2834 		}
2835 
2836 		adev->ip_blocks[i].status.hw = false;
2837 	}
2838 
2839 	if (amdgpu_sriov_vf(adev)) {
2840 		if (amdgpu_virt_release_full_gpu(adev, false))
2841 			DRM_ERROR("failed to release exclusive mode on fini\n");
2842 	}
2843 
2844 	return 0;
2845 }
2846 
2847 /**
2848  * amdgpu_device_ip_fini - run fini for hardware IPs
2849  *
2850  * @adev: amdgpu_device pointer
2851  *
2852  * Main teardown pass for hardware IPs.  The list of all the hardware
2853  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2854  * are run.  hw_fini tears down the hardware associated with each IP
2855  * and sw_fini tears down any software state associated with each IP.
2856  * Returns 0 on success, negative error code on failure.
2857  */
2858 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2859 {
2860 	int i, r;
2861 
2862 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2863 		amdgpu_virt_release_ras_err_handler_data(adev);
2864 
2865 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2866 		amdgpu_xgmi_remove_device(adev);
2867 
2868 	amdgpu_amdkfd_device_fini_sw(adev);
2869 
2870 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2871 		if (!adev->ip_blocks[i].status.sw)
2872 			continue;
2873 
2874 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2875 			amdgpu_ucode_free_bo(adev);
2876 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2877 			amdgpu_device_wb_fini(adev);
2878 			amdgpu_device_mem_scratch_fini(adev);
2879 			amdgpu_ib_pool_fini(adev);
2880 		}
2881 
2882 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2883 		/* XXX handle errors */
2884 		if (r) {
2885 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2886 				  adev->ip_blocks[i].version->funcs->name, r);
2887 		}
2888 		adev->ip_blocks[i].status.sw = false;
2889 		adev->ip_blocks[i].status.valid = false;
2890 	}
2891 
2892 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2893 		if (!adev->ip_blocks[i].status.late_initialized)
2894 			continue;
2895 		if (adev->ip_blocks[i].version->funcs->late_fini)
2896 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2897 		adev->ip_blocks[i].status.late_initialized = false;
2898 	}
2899 
2900 	amdgpu_ras_fini(adev);
2901 
2902 	return 0;
2903 }
2904 
2905 /**
2906  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2907  *
2908  * @work: work_struct.
2909  */
2910 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2911 {
2912 	struct amdgpu_device *adev =
2913 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2914 	int r;
2915 
2916 	r = amdgpu_ib_ring_tests(adev);
2917 	if (r)
2918 		DRM_ERROR("ib ring test failed (%d).\n", r);
2919 }
2920 
2921 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2922 {
2923 	struct amdgpu_device *adev =
2924 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2925 
2926 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
2927 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2928 
2929 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2930 		adev->gfx.gfx_off_state = true;
2931 }
2932 
2933 /**
2934  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2935  *
2936  * @adev: amdgpu_device pointer
2937  *
2938  * Main suspend function for hardware IPs.  The list of all the hardware
2939  * IPs that make up the asic is walked, clockgating is disabled and the
2940  * suspend callbacks are run.  suspend puts the hardware and software state
2941  * in each IP into a state suitable for suspend.
2942  * Returns 0 on success, negative error code on failure.
2943  */
2944 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2945 {
2946 	int i, r;
2947 
2948 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2949 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2950 
2951 	/*
2952 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
2953 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2954 	 * scenario. Add the missing df cstate disablement here.
2955 	 */
2956 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2957 		dev_warn(adev->dev, "Failed to disallow df cstate");
2958 
2959 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2960 		if (!adev->ip_blocks[i].status.valid)
2961 			continue;
2962 
2963 		/* displays are handled separately */
2964 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2965 			continue;
2966 
2967 		/* XXX handle errors */
2968 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2969 		/* XXX handle errors */
2970 		if (r) {
2971 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2972 				  adev->ip_blocks[i].version->funcs->name, r);
2973 			return r;
2974 		}
2975 
2976 		adev->ip_blocks[i].status.hw = false;
2977 	}
2978 
2979 	return 0;
2980 }
2981 
2982 /**
2983  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2984  *
2985  * @adev: amdgpu_device pointer
2986  *
2987  * Main suspend function for hardware IPs.  The list of all the hardware
2988  * IPs that make up the asic is walked, clockgating is disabled and the
2989  * suspend callbacks are run.  suspend puts the hardware and software state
2990  * in each IP into a state suitable for suspend.
2991  * Returns 0 on success, negative error code on failure.
2992  */
2993 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2994 {
2995 	int i, r;
2996 
2997 	if (adev->in_s0ix)
2998 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
2999 
3000 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3001 		if (!adev->ip_blocks[i].status.valid)
3002 			continue;
3003 		/* displays are handled in phase1 */
3004 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3005 			continue;
3006 		/* PSP lost connection when err_event_athub occurs */
3007 		if (amdgpu_ras_intr_triggered() &&
3008 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3009 			adev->ip_blocks[i].status.hw = false;
3010 			continue;
3011 		}
3012 
3013 		/* skip unnecessary suspend if we do not initialize them yet */
3014 		if (adev->gmc.xgmi.pending_reset &&
3015 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3016 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3017 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3018 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3019 			adev->ip_blocks[i].status.hw = false;
3020 			continue;
3021 		}
3022 
3023 		/* skip suspend of gfx/mes and psp for S0ix
3024 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3025 		 * like at runtime. PSP is also part of the always on hardware
3026 		 * so no need to suspend it.
3027 		 */
3028 		if (adev->in_s0ix &&
3029 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3030 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3031 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3032 			continue;
3033 
3034 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3035 		if (adev->in_s0ix &&
3036 		    (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3037 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3038 			continue;
3039 
3040 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3041 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3042 		 * from this location and RLC Autoload automatically also gets loaded
3043 		 * from here based on PMFW -> PSP message during re-init sequence.
3044 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3045 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3046 		 */
3047 		if (amdgpu_in_reset(adev) &&
3048 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3049 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3050 			continue;
3051 
3052 		/* XXX handle errors */
3053 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3054 		/* XXX handle errors */
3055 		if (r) {
3056 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3057 				  adev->ip_blocks[i].version->funcs->name, r);
3058 		}
3059 		adev->ip_blocks[i].status.hw = false;
3060 		/* handle putting the SMC in the appropriate state */
3061 		if(!amdgpu_sriov_vf(adev)){
3062 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3063 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3064 				if (r) {
3065 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3066 							adev->mp1_state, r);
3067 					return r;
3068 				}
3069 			}
3070 		}
3071 	}
3072 
3073 	return 0;
3074 }
3075 
3076 /**
3077  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3078  *
3079  * @adev: amdgpu_device pointer
3080  *
3081  * Main suspend function for hardware IPs.  The list of all the hardware
3082  * IPs that make up the asic is walked, clockgating is disabled and the
3083  * suspend callbacks are run.  suspend puts the hardware and software state
3084  * in each IP into a state suitable for suspend.
3085  * Returns 0 on success, negative error code on failure.
3086  */
3087 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3088 {
3089 	int r;
3090 
3091 	if (amdgpu_sriov_vf(adev)) {
3092 		amdgpu_virt_fini_data_exchange(adev);
3093 		amdgpu_virt_request_full_gpu(adev, false);
3094 	}
3095 
3096 	r = amdgpu_device_ip_suspend_phase1(adev);
3097 	if (r)
3098 		return r;
3099 	r = amdgpu_device_ip_suspend_phase2(adev);
3100 
3101 	if (amdgpu_sriov_vf(adev))
3102 		amdgpu_virt_release_full_gpu(adev, false);
3103 
3104 	return r;
3105 }
3106 
3107 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3108 {
3109 	int i, r;
3110 
3111 	static enum amd_ip_block_type ip_order[] = {
3112 		AMD_IP_BLOCK_TYPE_COMMON,
3113 		AMD_IP_BLOCK_TYPE_GMC,
3114 		AMD_IP_BLOCK_TYPE_PSP,
3115 		AMD_IP_BLOCK_TYPE_IH,
3116 	};
3117 
3118 	for (i = 0; i < adev->num_ip_blocks; i++) {
3119 		int j;
3120 		struct amdgpu_ip_block *block;
3121 
3122 		block = &adev->ip_blocks[i];
3123 		block->status.hw = false;
3124 
3125 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3126 
3127 			if (block->version->type != ip_order[j] ||
3128 				!block->status.valid)
3129 				continue;
3130 
3131 			r = block->version->funcs->hw_init(adev);
3132 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3133 			if (r)
3134 				return r;
3135 			block->status.hw = true;
3136 		}
3137 	}
3138 
3139 	return 0;
3140 }
3141 
3142 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3143 {
3144 	int i, r;
3145 
3146 	static enum amd_ip_block_type ip_order[] = {
3147 		AMD_IP_BLOCK_TYPE_SMC,
3148 		AMD_IP_BLOCK_TYPE_DCE,
3149 		AMD_IP_BLOCK_TYPE_GFX,
3150 		AMD_IP_BLOCK_TYPE_SDMA,
3151 		AMD_IP_BLOCK_TYPE_UVD,
3152 		AMD_IP_BLOCK_TYPE_VCE,
3153 		AMD_IP_BLOCK_TYPE_VCN
3154 	};
3155 
3156 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3157 		int j;
3158 		struct amdgpu_ip_block *block;
3159 
3160 		for (j = 0; j < adev->num_ip_blocks; j++) {
3161 			block = &adev->ip_blocks[j];
3162 
3163 			if (block->version->type != ip_order[i] ||
3164 				!block->status.valid ||
3165 				block->status.hw)
3166 				continue;
3167 
3168 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3169 				r = block->version->funcs->resume(adev);
3170 			else
3171 				r = block->version->funcs->hw_init(adev);
3172 
3173 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3174 			if (r)
3175 				return r;
3176 			block->status.hw = true;
3177 		}
3178 	}
3179 
3180 	return 0;
3181 }
3182 
3183 /**
3184  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3185  *
3186  * @adev: amdgpu_device pointer
3187  *
3188  * First resume function for hardware IPs.  The list of all the hardware
3189  * IPs that make up the asic is walked and the resume callbacks are run for
3190  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3191  * after a suspend and updates the software state as necessary.  This
3192  * function is also used for restoring the GPU after a GPU reset.
3193  * Returns 0 on success, negative error code on failure.
3194  */
3195 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3196 {
3197 	int i, r;
3198 
3199 	for (i = 0; i < adev->num_ip_blocks; i++) {
3200 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3201 			continue;
3202 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3203 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3204 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3205 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3206 
3207 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3208 			if (r) {
3209 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3210 					  adev->ip_blocks[i].version->funcs->name, r);
3211 				return r;
3212 			}
3213 			adev->ip_blocks[i].status.hw = true;
3214 		}
3215 	}
3216 
3217 	return 0;
3218 }
3219 
3220 /**
3221  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3222  *
3223  * @adev: amdgpu_device pointer
3224  *
3225  * First resume function for hardware IPs.  The list of all the hardware
3226  * IPs that make up the asic is walked and the resume callbacks are run for
3227  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3228  * functional state after a suspend and updates the software state as
3229  * necessary.  This function is also used for restoring the GPU after a GPU
3230  * reset.
3231  * Returns 0 on success, negative error code on failure.
3232  */
3233 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3234 {
3235 	int i, r;
3236 
3237 	for (i = 0; i < adev->num_ip_blocks; i++) {
3238 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3239 			continue;
3240 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3241 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3242 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3243 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3244 			continue;
3245 		r = adev->ip_blocks[i].version->funcs->resume(adev);
3246 		if (r) {
3247 			DRM_ERROR("resume of IP block <%s> failed %d\n",
3248 				  adev->ip_blocks[i].version->funcs->name, r);
3249 			return r;
3250 		}
3251 		adev->ip_blocks[i].status.hw = true;
3252 	}
3253 
3254 	return 0;
3255 }
3256 
3257 /**
3258  * amdgpu_device_ip_resume - run resume for hardware IPs
3259  *
3260  * @adev: amdgpu_device pointer
3261  *
3262  * Main resume function for hardware IPs.  The hardware IPs
3263  * are split into two resume functions because they are
3264  * are also used in in recovering from a GPU reset and some additional
3265  * steps need to be take between them.  In this case (S3/S4) they are
3266  * run sequentially.
3267  * Returns 0 on success, negative error code on failure.
3268  */
3269 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3270 {
3271 	int r;
3272 
3273 	r = amdgpu_amdkfd_resume_iommu(adev);
3274 	if (r)
3275 		return r;
3276 
3277 	r = amdgpu_device_ip_resume_phase1(adev);
3278 	if (r)
3279 		return r;
3280 
3281 	r = amdgpu_device_fw_loading(adev);
3282 	if (r)
3283 		return r;
3284 
3285 	r = amdgpu_device_ip_resume_phase2(adev);
3286 
3287 	return r;
3288 }
3289 
3290 /**
3291  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3292  *
3293  * @adev: amdgpu_device pointer
3294  *
3295  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3296  */
3297 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3298 {
3299 	if (amdgpu_sriov_vf(adev)) {
3300 		if (adev->is_atom_fw) {
3301 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3302 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3303 		} else {
3304 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3305 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3306 		}
3307 
3308 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3309 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3310 	}
3311 }
3312 
3313 /**
3314  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3315  *
3316  * @asic_type: AMD asic type
3317  *
3318  * Check if there is DC (new modesetting infrastructre) support for an asic.
3319  * returns true if DC has support, false if not.
3320  */
3321 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3322 {
3323 	switch (asic_type) {
3324 #ifdef CONFIG_DRM_AMDGPU_SI
3325 	case CHIP_HAINAN:
3326 #endif
3327 	case CHIP_TOPAZ:
3328 		/* chips with no display hardware */
3329 		return false;
3330 #if defined(CONFIG_DRM_AMD_DC)
3331 	case CHIP_TAHITI:
3332 	case CHIP_PITCAIRN:
3333 	case CHIP_VERDE:
3334 	case CHIP_OLAND:
3335 		/*
3336 		 * We have systems in the wild with these ASICs that require
3337 		 * LVDS and VGA support which is not supported with DC.
3338 		 *
3339 		 * Fallback to the non-DC driver here by default so as not to
3340 		 * cause regressions.
3341 		 */
3342 #if defined(CONFIG_DRM_AMD_DC_SI)
3343 		return amdgpu_dc > 0;
3344 #else
3345 		return false;
3346 #endif
3347 	case CHIP_BONAIRE:
3348 	case CHIP_KAVERI:
3349 	case CHIP_KABINI:
3350 	case CHIP_MULLINS:
3351 		/*
3352 		 * We have systems in the wild with these ASICs that require
3353 		 * VGA support which is not supported with DC.
3354 		 *
3355 		 * Fallback to the non-DC driver here by default so as not to
3356 		 * cause regressions.
3357 		 */
3358 		return amdgpu_dc > 0;
3359 	default:
3360 		return amdgpu_dc != 0;
3361 #else
3362 	default:
3363 		if (amdgpu_dc > 0)
3364 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
3365 					 "but isn't supported by ASIC, ignoring\n");
3366 		return false;
3367 #endif
3368 	}
3369 }
3370 
3371 /**
3372  * amdgpu_device_has_dc_support - check if dc is supported
3373  *
3374  * @adev: amdgpu_device pointer
3375  *
3376  * Returns true for supported, false for not supported
3377  */
3378 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3379 {
3380 	if (adev->enable_virtual_display ||
3381 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3382 		return false;
3383 
3384 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3385 }
3386 
3387 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3388 {
3389 	struct amdgpu_device *adev =
3390 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3391 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3392 
3393 	/* It's a bug to not have a hive within this function */
3394 	if (WARN_ON(!hive))
3395 		return;
3396 
3397 	/*
3398 	 * Use task barrier to synchronize all xgmi reset works across the
3399 	 * hive. task_barrier_enter and task_barrier_exit will block
3400 	 * until all the threads running the xgmi reset works reach
3401 	 * those points. task_barrier_full will do both blocks.
3402 	 */
3403 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3404 
3405 		task_barrier_enter(&hive->tb);
3406 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3407 
3408 		if (adev->asic_reset_res)
3409 			goto fail;
3410 
3411 		task_barrier_exit(&hive->tb);
3412 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3413 
3414 		if (adev->asic_reset_res)
3415 			goto fail;
3416 
3417 		if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3418 		    adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3419 			adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
3420 	} else {
3421 
3422 		task_barrier_full(&hive->tb);
3423 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3424 	}
3425 
3426 fail:
3427 	if (adev->asic_reset_res)
3428 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3429 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3430 	amdgpu_put_xgmi_hive(hive);
3431 }
3432 
3433 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3434 {
3435 	char *input = amdgpu_lockup_timeout;
3436 	char *timeout_setting = NULL;
3437 	int index = 0;
3438 	long timeout;
3439 	int ret = 0;
3440 
3441 	/*
3442 	 * By default timeout for non compute jobs is 10000
3443 	 * and 60000 for compute jobs.
3444 	 * In SR-IOV or passthrough mode, timeout for compute
3445 	 * jobs are 60000 by default.
3446 	 */
3447 	adev->gfx_timeout = msecs_to_jiffies(10000);
3448 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3449 	if (amdgpu_sriov_vf(adev))
3450 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3451 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3452 	else
3453 		adev->compute_timeout =  msecs_to_jiffies(60000);
3454 
3455 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3456 		while ((timeout_setting = strsep(&input, ",")) &&
3457 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3458 			ret = kstrtol(timeout_setting, 0, &timeout);
3459 			if (ret)
3460 				return ret;
3461 
3462 			if (timeout == 0) {
3463 				index++;
3464 				continue;
3465 			} else if (timeout < 0) {
3466 				timeout = MAX_SCHEDULE_TIMEOUT;
3467 				dev_warn(adev->dev, "lockup timeout disabled");
3468 				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3469 			} else {
3470 				timeout = msecs_to_jiffies(timeout);
3471 			}
3472 
3473 			switch (index++) {
3474 			case 0:
3475 				adev->gfx_timeout = timeout;
3476 				break;
3477 			case 1:
3478 				adev->compute_timeout = timeout;
3479 				break;
3480 			case 2:
3481 				adev->sdma_timeout = timeout;
3482 				break;
3483 			case 3:
3484 				adev->video_timeout = timeout;
3485 				break;
3486 			default:
3487 				break;
3488 			}
3489 		}
3490 		/*
3491 		 * There is only one value specified and
3492 		 * it should apply to all non-compute jobs.
3493 		 */
3494 		if (index == 1) {
3495 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3496 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3497 				adev->compute_timeout = adev->gfx_timeout;
3498 		}
3499 	}
3500 
3501 	return ret;
3502 }
3503 
3504 /**
3505  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3506  *
3507  * @adev: amdgpu_device pointer
3508  *
3509  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3510  */
3511 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3512 {
3513 	struct iommu_domain *domain;
3514 
3515 	domain = iommu_get_domain_for_dev(adev->dev);
3516 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3517 		adev->ram_is_direct_mapped = true;
3518 }
3519 
3520 static const struct attribute *amdgpu_dev_attributes[] = {
3521 	&dev_attr_product_name.attr,
3522 	&dev_attr_product_number.attr,
3523 	&dev_attr_serial_number.attr,
3524 	&dev_attr_pcie_replay_count.attr,
3525 	NULL
3526 };
3527 
3528 /**
3529  * amdgpu_device_init - initialize the driver
3530  *
3531  * @adev: amdgpu_device pointer
3532  * @flags: driver flags
3533  *
3534  * Initializes the driver info and hw (all asics).
3535  * Returns 0 for success or an error on failure.
3536  * Called at driver startup.
3537  */
3538 int amdgpu_device_init(struct amdgpu_device *adev,
3539 		       uint32_t flags)
3540 {
3541 	struct drm_device *ddev = adev_to_drm(adev);
3542 	struct pci_dev *pdev = adev->pdev;
3543 	int r, i;
3544 	bool px = false;
3545 	u32 max_MBps;
3546 
3547 	adev->shutdown = false;
3548 	adev->flags = flags;
3549 
3550 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3551 		adev->asic_type = amdgpu_force_asic_type;
3552 	else
3553 		adev->asic_type = flags & AMD_ASIC_MASK;
3554 
3555 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3556 	if (amdgpu_emu_mode == 1)
3557 		adev->usec_timeout *= 10;
3558 	adev->gmc.gart_size = 512 * 1024 * 1024;
3559 	adev->accel_working = false;
3560 	adev->num_rings = 0;
3561 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3562 	adev->mman.buffer_funcs = NULL;
3563 	adev->mman.buffer_funcs_ring = NULL;
3564 	adev->vm_manager.vm_pte_funcs = NULL;
3565 	adev->vm_manager.vm_pte_num_scheds = 0;
3566 	adev->gmc.gmc_funcs = NULL;
3567 	adev->harvest_ip_mask = 0x0;
3568 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3569 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3570 
3571 	adev->smc_rreg = &amdgpu_invalid_rreg;
3572 	adev->smc_wreg = &amdgpu_invalid_wreg;
3573 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3574 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3575 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3576 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3577 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3578 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3579 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3580 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3581 	adev->didt_rreg = &amdgpu_invalid_rreg;
3582 	adev->didt_wreg = &amdgpu_invalid_wreg;
3583 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3584 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3585 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3586 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3587 
3588 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3589 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3590 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3591 
3592 	/* mutex initialization are all done here so we
3593 	 * can recall function without having locking issues */
3594 	mutex_init(&adev->firmware.mutex);
3595 	mutex_init(&adev->pm.mutex);
3596 	mutex_init(&adev->gfx.gpu_clock_mutex);
3597 	mutex_init(&adev->srbm_mutex);
3598 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3599 	mutex_init(&adev->gfx.gfx_off_mutex);
3600 	mutex_init(&adev->grbm_idx_mutex);
3601 	mutex_init(&adev->mn_lock);
3602 	mutex_init(&adev->virt.vf_errors.lock);
3603 	hash_init(adev->mn_hash);
3604 	mutex_init(&adev->psp.mutex);
3605 	mutex_init(&adev->notifier_lock);
3606 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3607 	mutex_init(&adev->benchmark_mutex);
3608 
3609 	amdgpu_device_init_apu_flags(adev);
3610 
3611 	r = amdgpu_device_check_arguments(adev);
3612 	if (r)
3613 		return r;
3614 
3615 	spin_lock_init(&adev->mmio_idx_lock);
3616 	spin_lock_init(&adev->smc_idx_lock);
3617 	spin_lock_init(&adev->pcie_idx_lock);
3618 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3619 	spin_lock_init(&adev->didt_idx_lock);
3620 	spin_lock_init(&adev->gc_cac_idx_lock);
3621 	spin_lock_init(&adev->se_cac_idx_lock);
3622 	spin_lock_init(&adev->audio_endpt_idx_lock);
3623 	spin_lock_init(&adev->mm_stats.lock);
3624 
3625 	INIT_LIST_HEAD(&adev->shadow_list);
3626 	mutex_init(&adev->shadow_list_lock);
3627 
3628 	INIT_LIST_HEAD(&adev->reset_list);
3629 
3630 	INIT_LIST_HEAD(&adev->ras_list);
3631 
3632 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3633 			  amdgpu_device_delayed_init_work_handler);
3634 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3635 			  amdgpu_device_delay_enable_gfx_off);
3636 
3637 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3638 
3639 	adev->gfx.gfx_off_req_count = 1;
3640 	adev->gfx.gfx_off_residency = 0;
3641 	adev->gfx.gfx_off_entrycount = 0;
3642 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3643 
3644 	atomic_set(&adev->throttling_logging_enabled, 1);
3645 	/*
3646 	 * If throttling continues, logging will be performed every minute
3647 	 * to avoid log flooding. "-1" is subtracted since the thermal
3648 	 * throttling interrupt comes every second. Thus, the total logging
3649 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3650 	 * for throttling interrupt) = 60 seconds.
3651 	 */
3652 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3653 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3654 
3655 	/* Registers mapping */
3656 	/* TODO: block userspace mapping of io register */
3657 	if (adev->asic_type >= CHIP_BONAIRE) {
3658 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3659 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3660 	} else {
3661 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3662 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3663 	}
3664 
3665 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3666 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3667 
3668 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3669 	if (adev->rmmio == NULL) {
3670 		return -ENOMEM;
3671 	}
3672 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3673 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3674 
3675 	amdgpu_device_get_pcie_info(adev);
3676 
3677 	if (amdgpu_mcbp)
3678 		DRM_INFO("MCBP is enabled\n");
3679 
3680 	/*
3681 	 * Reset domain needs to be present early, before XGMI hive discovered
3682 	 * (if any) and intitialized to use reset sem and in_gpu reset flag
3683 	 * early on during init and before calling to RREG32.
3684 	 */
3685 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3686 	if (!adev->reset_domain)
3687 		return -ENOMEM;
3688 
3689 	/* detect hw virtualization here */
3690 	amdgpu_detect_virtualization(adev);
3691 
3692 	r = amdgpu_device_get_job_timeout_settings(adev);
3693 	if (r) {
3694 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3695 		return r;
3696 	}
3697 
3698 	/* early init functions */
3699 	r = amdgpu_device_ip_early_init(adev);
3700 	if (r)
3701 		return r;
3702 
3703 	/* Get rid of things like offb */
3704 	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3705 	if (r)
3706 		return r;
3707 
3708 	/* Enable TMZ based on IP_VERSION */
3709 	amdgpu_gmc_tmz_set(adev);
3710 
3711 	amdgpu_gmc_noretry_set(adev);
3712 	/* Need to get xgmi info early to decide the reset behavior*/
3713 	if (adev->gmc.xgmi.supported) {
3714 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
3715 		if (r)
3716 			return r;
3717 	}
3718 
3719 	/* enable PCIE atomic ops */
3720 	if (amdgpu_sriov_vf(adev))
3721 		adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3722 			adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3723 			(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3724 	else
3725 		adev->have_atomics_support =
3726 			!pci_enable_atomic_ops_to_root(adev->pdev,
3727 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3728 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3729 	if (!adev->have_atomics_support)
3730 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3731 
3732 	/* doorbell bar mapping and doorbell index init*/
3733 	amdgpu_device_doorbell_init(adev);
3734 
3735 	if (amdgpu_emu_mode == 1) {
3736 		/* post the asic on emulation mode */
3737 		emu_soc_asic_init(adev);
3738 		goto fence_driver_init;
3739 	}
3740 
3741 	amdgpu_reset_init(adev);
3742 
3743 	/* detect if we are with an SRIOV vbios */
3744 	amdgpu_device_detect_sriov_bios(adev);
3745 
3746 	/* check if we need to reset the asic
3747 	 *  E.g., driver was not cleanly unloaded previously, etc.
3748 	 */
3749 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3750 		if (adev->gmc.xgmi.num_physical_nodes) {
3751 			dev_info(adev->dev, "Pending hive reset.\n");
3752 			adev->gmc.xgmi.pending_reset = true;
3753 			/* Only need to init necessary block for SMU to handle the reset */
3754 			for (i = 0; i < adev->num_ip_blocks; i++) {
3755 				if (!adev->ip_blocks[i].status.valid)
3756 					continue;
3757 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3758 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3759 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3760 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
3761 					DRM_DEBUG("IP %s disabled for hw_init.\n",
3762 						adev->ip_blocks[i].version->funcs->name);
3763 					adev->ip_blocks[i].status.hw = true;
3764 				}
3765 			}
3766 		} else {
3767 			r = amdgpu_asic_reset(adev);
3768 			if (r) {
3769 				dev_err(adev->dev, "asic reset on init failed\n");
3770 				goto failed;
3771 			}
3772 		}
3773 	}
3774 
3775 	pci_enable_pcie_error_reporting(adev->pdev);
3776 
3777 	/* Post card if necessary */
3778 	if (amdgpu_device_need_post(adev)) {
3779 		if (!adev->bios) {
3780 			dev_err(adev->dev, "no vBIOS found\n");
3781 			r = -EINVAL;
3782 			goto failed;
3783 		}
3784 		DRM_INFO("GPU posting now...\n");
3785 		r = amdgpu_device_asic_init(adev);
3786 		if (r) {
3787 			dev_err(adev->dev, "gpu post error!\n");
3788 			goto failed;
3789 		}
3790 	}
3791 
3792 	if (adev->is_atom_fw) {
3793 		/* Initialize clocks */
3794 		r = amdgpu_atomfirmware_get_clock_info(adev);
3795 		if (r) {
3796 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3797 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3798 			goto failed;
3799 		}
3800 	} else {
3801 		/* Initialize clocks */
3802 		r = amdgpu_atombios_get_clock_info(adev);
3803 		if (r) {
3804 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3805 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3806 			goto failed;
3807 		}
3808 		/* init i2c buses */
3809 		if (!amdgpu_device_has_dc_support(adev))
3810 			amdgpu_atombios_i2c_init(adev);
3811 	}
3812 
3813 fence_driver_init:
3814 	/* Fence driver */
3815 	r = amdgpu_fence_driver_sw_init(adev);
3816 	if (r) {
3817 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
3818 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3819 		goto failed;
3820 	}
3821 
3822 	/* init the mode config */
3823 	drm_mode_config_init(adev_to_drm(adev));
3824 
3825 	r = amdgpu_device_ip_init(adev);
3826 	if (r) {
3827 		/* failed in exclusive mode due to timeout */
3828 		if (amdgpu_sriov_vf(adev) &&
3829 		    !amdgpu_sriov_runtime(adev) &&
3830 		    amdgpu_virt_mmio_blocked(adev) &&
3831 		    !amdgpu_virt_wait_reset(adev)) {
3832 			dev_err(adev->dev, "VF exclusive mode timeout\n");
3833 			/* Don't send request since VF is inactive. */
3834 			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3835 			adev->virt.ops = NULL;
3836 			r = -EAGAIN;
3837 			goto release_ras_con;
3838 		}
3839 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3840 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3841 		goto release_ras_con;
3842 	}
3843 
3844 	amdgpu_fence_driver_hw_init(adev);
3845 
3846 	dev_info(adev->dev,
3847 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3848 			adev->gfx.config.max_shader_engines,
3849 			adev->gfx.config.max_sh_per_se,
3850 			adev->gfx.config.max_cu_per_sh,
3851 			adev->gfx.cu_info.number);
3852 
3853 	adev->accel_working = true;
3854 
3855 	amdgpu_vm_check_compute_bug(adev);
3856 
3857 	/* Initialize the buffer migration limit. */
3858 	if (amdgpu_moverate >= 0)
3859 		max_MBps = amdgpu_moverate;
3860 	else
3861 		max_MBps = 8; /* Allow 8 MB/s. */
3862 	/* Get a log2 for easy divisions. */
3863 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3864 
3865 	r = amdgpu_pm_sysfs_init(adev);
3866 	if (r) {
3867 		adev->pm_sysfs_en = false;
3868 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3869 	} else
3870 		adev->pm_sysfs_en = true;
3871 
3872 	r = amdgpu_ucode_sysfs_init(adev);
3873 	if (r) {
3874 		adev->ucode_sysfs_en = false;
3875 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3876 	} else
3877 		adev->ucode_sysfs_en = true;
3878 
3879 	r = amdgpu_psp_sysfs_init(adev);
3880 	if (r) {
3881 		adev->psp_sysfs_en = false;
3882 		if (!amdgpu_sriov_vf(adev))
3883 			DRM_ERROR("Creating psp sysfs failed\n");
3884 	} else
3885 		adev->psp_sysfs_en = true;
3886 
3887 	/*
3888 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3889 	 * Otherwise the mgpu fan boost feature will be skipped due to the
3890 	 * gpu instance is counted less.
3891 	 */
3892 	amdgpu_register_gpu_instance(adev);
3893 
3894 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
3895 	 * explicit gating rather than handling it automatically.
3896 	 */
3897 	if (!adev->gmc.xgmi.pending_reset) {
3898 		r = amdgpu_device_ip_late_init(adev);
3899 		if (r) {
3900 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3901 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3902 			goto release_ras_con;
3903 		}
3904 		/* must succeed. */
3905 		amdgpu_ras_resume(adev);
3906 		queue_delayed_work(system_wq, &adev->delayed_init_work,
3907 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
3908 	}
3909 
3910 	if (amdgpu_sriov_vf(adev))
3911 		flush_delayed_work(&adev->delayed_init_work);
3912 
3913 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3914 	if (r)
3915 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3916 
3917 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3918 		r = amdgpu_pmu_init(adev);
3919 	if (r)
3920 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3921 
3922 	/* Have stored pci confspace at hand for restore in sudden PCI error */
3923 	if (amdgpu_device_cache_pci_state(adev->pdev))
3924 		pci_restore_state(pdev);
3925 
3926 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3927 	/* this will fail for cards that aren't VGA class devices, just
3928 	 * ignore it */
3929 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3930 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
3931 
3932 	if (amdgpu_device_supports_px(ddev)) {
3933 		px = true;
3934 		vga_switcheroo_register_client(adev->pdev,
3935 					       &amdgpu_switcheroo_ops, px);
3936 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3937 	}
3938 
3939 	if (adev->gmc.xgmi.pending_reset)
3940 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3941 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
3942 
3943 	amdgpu_device_check_iommu_direct_map(adev);
3944 
3945 	return 0;
3946 
3947 release_ras_con:
3948 	amdgpu_release_ras_context(adev);
3949 
3950 failed:
3951 	amdgpu_vf_error_trans_all(adev);
3952 
3953 	return r;
3954 }
3955 
3956 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3957 {
3958 
3959 	/* Clear all CPU mappings pointing to this device */
3960 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3961 
3962 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
3963 	amdgpu_device_doorbell_fini(adev);
3964 
3965 	iounmap(adev->rmmio);
3966 	adev->rmmio = NULL;
3967 	if (adev->mman.aper_base_kaddr)
3968 		iounmap(adev->mman.aper_base_kaddr);
3969 	adev->mman.aper_base_kaddr = NULL;
3970 
3971 	/* Memory manager related */
3972 	if (!adev->gmc.xgmi.connected_to_cpu) {
3973 		arch_phys_wc_del(adev->gmc.vram_mtrr);
3974 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3975 	}
3976 }
3977 
3978 /**
3979  * amdgpu_device_fini_hw - tear down the driver
3980  *
3981  * @adev: amdgpu_device pointer
3982  *
3983  * Tear down the driver info (all asics).
3984  * Called at driver shutdown.
3985  */
3986 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
3987 {
3988 	dev_info(adev->dev, "amdgpu: finishing device.\n");
3989 	flush_delayed_work(&adev->delayed_init_work);
3990 	adev->shutdown = true;
3991 
3992 	/* make sure IB test finished before entering exclusive mode
3993 	 * to avoid preemption on IB test
3994 	 * */
3995 	if (amdgpu_sriov_vf(adev)) {
3996 		amdgpu_virt_request_full_gpu(adev, false);
3997 		amdgpu_virt_fini_data_exchange(adev);
3998 	}
3999 
4000 	/* disable all interrupts */
4001 	amdgpu_irq_disable_all(adev);
4002 	if (adev->mode_info.mode_config_initialized){
4003 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4004 			drm_helper_force_disable_all(adev_to_drm(adev));
4005 		else
4006 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4007 	}
4008 	amdgpu_fence_driver_hw_fini(adev);
4009 
4010 	if (adev->mman.initialized) {
4011 		flush_delayed_work(&adev->mman.bdev.wq);
4012 		ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
4013 	}
4014 
4015 	if (adev->pm_sysfs_en)
4016 		amdgpu_pm_sysfs_fini(adev);
4017 	if (adev->ucode_sysfs_en)
4018 		amdgpu_ucode_sysfs_fini(adev);
4019 	if (adev->psp_sysfs_en)
4020 		amdgpu_psp_sysfs_fini(adev);
4021 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4022 
4023 	/* disable ras feature must before hw fini */
4024 	amdgpu_ras_pre_fini(adev);
4025 
4026 	amdgpu_device_ip_fini_early(adev);
4027 
4028 	amdgpu_irq_fini_hw(adev);
4029 
4030 	if (adev->mman.initialized)
4031 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4032 
4033 	amdgpu_gart_dummy_page_fini(adev);
4034 
4035 	amdgpu_device_unmap_mmio(adev);
4036 
4037 }
4038 
4039 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4040 {
4041 	int idx;
4042 
4043 	amdgpu_fence_driver_sw_fini(adev);
4044 	amdgpu_device_ip_fini(adev);
4045 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4046 	adev->accel_working = false;
4047 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4048 
4049 	amdgpu_reset_fini(adev);
4050 
4051 	/* free i2c buses */
4052 	if (!amdgpu_device_has_dc_support(adev))
4053 		amdgpu_i2c_fini(adev);
4054 
4055 	if (amdgpu_emu_mode != 1)
4056 		amdgpu_atombios_fini(adev);
4057 
4058 	kfree(adev->bios);
4059 	adev->bios = NULL;
4060 	if (amdgpu_device_supports_px(adev_to_drm(adev))) {
4061 		vga_switcheroo_unregister_client(adev->pdev);
4062 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4063 	}
4064 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4065 		vga_client_unregister(adev->pdev);
4066 
4067 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4068 
4069 		iounmap(adev->rmmio);
4070 		adev->rmmio = NULL;
4071 		amdgpu_device_doorbell_fini(adev);
4072 		drm_dev_exit(idx);
4073 	}
4074 
4075 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4076 		amdgpu_pmu_fini(adev);
4077 	if (adev->mman.discovery_bin)
4078 		amdgpu_discovery_fini(adev);
4079 
4080 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4081 	adev->reset_domain = NULL;
4082 
4083 	kfree(adev->pci_state);
4084 
4085 }
4086 
4087 /**
4088  * amdgpu_device_evict_resources - evict device resources
4089  * @adev: amdgpu device object
4090  *
4091  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4092  * of the vram memory type. Mainly used for evicting device resources
4093  * at suspend time.
4094  *
4095  */
4096 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4097 {
4098 	int ret;
4099 
4100 	/* No need to evict vram on APUs for suspend to ram or s2idle */
4101 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4102 		return 0;
4103 
4104 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4105 	if (ret)
4106 		DRM_WARN("evicting device resources failed\n");
4107 	return ret;
4108 }
4109 
4110 /*
4111  * Suspend & resume.
4112  */
4113 /**
4114  * amdgpu_device_suspend - initiate device suspend
4115  *
4116  * @dev: drm dev pointer
4117  * @fbcon : notify the fbdev of suspend
4118  *
4119  * Puts the hw in the suspend state (all asics).
4120  * Returns 0 for success or an error on failure.
4121  * Called at driver suspend.
4122  */
4123 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4124 {
4125 	struct amdgpu_device *adev = drm_to_adev(dev);
4126 	int r = 0;
4127 
4128 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4129 		return 0;
4130 
4131 	adev->in_suspend = true;
4132 
4133 	/* Evict the majority of BOs before grabbing the full access */
4134 	r = amdgpu_device_evict_resources(adev);
4135 	if (r)
4136 		return r;
4137 
4138 	if (amdgpu_sriov_vf(adev)) {
4139 		amdgpu_virt_fini_data_exchange(adev);
4140 		r = amdgpu_virt_request_full_gpu(adev, false);
4141 		if (r)
4142 			return r;
4143 	}
4144 
4145 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4146 		DRM_WARN("smart shift update failed\n");
4147 
4148 	drm_kms_helper_poll_disable(dev);
4149 
4150 	if (fbcon)
4151 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4152 
4153 	cancel_delayed_work_sync(&adev->delayed_init_work);
4154 
4155 	amdgpu_ras_suspend(adev);
4156 
4157 	amdgpu_device_ip_suspend_phase1(adev);
4158 
4159 	if (!adev->in_s0ix)
4160 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4161 
4162 	r = amdgpu_device_evict_resources(adev);
4163 	if (r)
4164 		return r;
4165 
4166 	amdgpu_fence_driver_hw_fini(adev);
4167 
4168 	amdgpu_device_ip_suspend_phase2(adev);
4169 
4170 	if (amdgpu_sriov_vf(adev))
4171 		amdgpu_virt_release_full_gpu(adev, false);
4172 
4173 	return 0;
4174 }
4175 
4176 /**
4177  * amdgpu_device_resume - initiate device resume
4178  *
4179  * @dev: drm dev pointer
4180  * @fbcon : notify the fbdev of resume
4181  *
4182  * Bring the hw back to operating state (all asics).
4183  * Returns 0 for success or an error on failure.
4184  * Called at driver resume.
4185  */
4186 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4187 {
4188 	struct amdgpu_device *adev = drm_to_adev(dev);
4189 	int r = 0;
4190 
4191 	if (amdgpu_sriov_vf(adev)) {
4192 		r = amdgpu_virt_request_full_gpu(adev, true);
4193 		if (r)
4194 			return r;
4195 	}
4196 
4197 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4198 		return 0;
4199 
4200 	if (adev->in_s0ix)
4201 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4202 
4203 	/* post card */
4204 	if (amdgpu_device_need_post(adev)) {
4205 		r = amdgpu_device_asic_init(adev);
4206 		if (r)
4207 			dev_err(adev->dev, "amdgpu asic init failed\n");
4208 	}
4209 
4210 	r = amdgpu_device_ip_resume(adev);
4211 
4212 	if (r) {
4213 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4214 		goto exit;
4215 	}
4216 	amdgpu_fence_driver_hw_init(adev);
4217 
4218 	r = amdgpu_device_ip_late_init(adev);
4219 	if (r)
4220 		goto exit;
4221 
4222 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4223 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4224 
4225 	if (!adev->in_s0ix) {
4226 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4227 		if (r)
4228 			goto exit;
4229 	}
4230 
4231 exit:
4232 	if (amdgpu_sriov_vf(adev)) {
4233 		amdgpu_virt_init_data_exchange(adev);
4234 		amdgpu_virt_release_full_gpu(adev, true);
4235 	}
4236 
4237 	if (r)
4238 		return r;
4239 
4240 	/* Make sure IB tests flushed */
4241 	flush_delayed_work(&adev->delayed_init_work);
4242 
4243 	if (fbcon)
4244 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4245 
4246 	drm_kms_helper_poll_enable(dev);
4247 
4248 	amdgpu_ras_resume(adev);
4249 
4250 	if (adev->mode_info.num_crtc) {
4251 		/*
4252 		 * Most of the connector probing functions try to acquire runtime pm
4253 		 * refs to ensure that the GPU is powered on when connector polling is
4254 		 * performed. Since we're calling this from a runtime PM callback,
4255 		 * trying to acquire rpm refs will cause us to deadlock.
4256 		 *
4257 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4258 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4259 		 */
4260 #ifdef CONFIG_PM
4261 		dev->dev->power.disable_depth++;
4262 #endif
4263 		if (!adev->dc_enabled)
4264 			drm_helper_hpd_irq_event(dev);
4265 		else
4266 			drm_kms_helper_hotplug_event(dev);
4267 #ifdef CONFIG_PM
4268 		dev->dev->power.disable_depth--;
4269 #endif
4270 	}
4271 	adev->in_suspend = false;
4272 
4273 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4274 		DRM_WARN("smart shift update failed\n");
4275 
4276 	return 0;
4277 }
4278 
4279 /**
4280  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4281  *
4282  * @adev: amdgpu_device pointer
4283  *
4284  * The list of all the hardware IPs that make up the asic is walked and
4285  * the check_soft_reset callbacks are run.  check_soft_reset determines
4286  * if the asic is still hung or not.
4287  * Returns true if any of the IPs are still in a hung state, false if not.
4288  */
4289 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4290 {
4291 	int i;
4292 	bool asic_hang = false;
4293 
4294 	if (amdgpu_sriov_vf(adev))
4295 		return true;
4296 
4297 	if (amdgpu_asic_need_full_reset(adev))
4298 		return true;
4299 
4300 	for (i = 0; i < adev->num_ip_blocks; i++) {
4301 		if (!adev->ip_blocks[i].status.valid)
4302 			continue;
4303 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4304 			adev->ip_blocks[i].status.hang =
4305 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4306 		if (adev->ip_blocks[i].status.hang) {
4307 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4308 			asic_hang = true;
4309 		}
4310 	}
4311 	return asic_hang;
4312 }
4313 
4314 /**
4315  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4316  *
4317  * @adev: amdgpu_device pointer
4318  *
4319  * The list of all the hardware IPs that make up the asic is walked and the
4320  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4321  * handles any IP specific hardware or software state changes that are
4322  * necessary for a soft reset to succeed.
4323  * Returns 0 on success, negative error code on failure.
4324  */
4325 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4326 {
4327 	int i, r = 0;
4328 
4329 	for (i = 0; i < adev->num_ip_blocks; i++) {
4330 		if (!adev->ip_blocks[i].status.valid)
4331 			continue;
4332 		if (adev->ip_blocks[i].status.hang &&
4333 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4334 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4335 			if (r)
4336 				return r;
4337 		}
4338 	}
4339 
4340 	return 0;
4341 }
4342 
4343 /**
4344  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4345  *
4346  * @adev: amdgpu_device pointer
4347  *
4348  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4349  * reset is necessary to recover.
4350  * Returns true if a full asic reset is required, false if not.
4351  */
4352 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4353 {
4354 	int i;
4355 
4356 	if (amdgpu_asic_need_full_reset(adev))
4357 		return true;
4358 
4359 	for (i = 0; i < adev->num_ip_blocks; i++) {
4360 		if (!adev->ip_blocks[i].status.valid)
4361 			continue;
4362 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4363 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4364 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4365 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4366 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4367 			if (adev->ip_blocks[i].status.hang) {
4368 				dev_info(adev->dev, "Some block need full reset!\n");
4369 				return true;
4370 			}
4371 		}
4372 	}
4373 	return false;
4374 }
4375 
4376 /**
4377  * amdgpu_device_ip_soft_reset - do a soft reset
4378  *
4379  * @adev: amdgpu_device pointer
4380  *
4381  * The list of all the hardware IPs that make up the asic is walked and the
4382  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4383  * IP specific hardware or software state changes that are necessary to soft
4384  * reset the IP.
4385  * Returns 0 on success, negative error code on failure.
4386  */
4387 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4388 {
4389 	int i, r = 0;
4390 
4391 	for (i = 0; i < adev->num_ip_blocks; i++) {
4392 		if (!adev->ip_blocks[i].status.valid)
4393 			continue;
4394 		if (adev->ip_blocks[i].status.hang &&
4395 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4396 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4397 			if (r)
4398 				return r;
4399 		}
4400 	}
4401 
4402 	return 0;
4403 }
4404 
4405 /**
4406  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4407  *
4408  * @adev: amdgpu_device pointer
4409  *
4410  * The list of all the hardware IPs that make up the asic is walked and the
4411  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4412  * handles any IP specific hardware or software state changes that are
4413  * necessary after the IP has been soft reset.
4414  * Returns 0 on success, negative error code on failure.
4415  */
4416 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4417 {
4418 	int i, r = 0;
4419 
4420 	for (i = 0; i < adev->num_ip_blocks; i++) {
4421 		if (!adev->ip_blocks[i].status.valid)
4422 			continue;
4423 		if (adev->ip_blocks[i].status.hang &&
4424 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4425 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4426 		if (r)
4427 			return r;
4428 	}
4429 
4430 	return 0;
4431 }
4432 
4433 /**
4434  * amdgpu_device_recover_vram - Recover some VRAM contents
4435  *
4436  * @adev: amdgpu_device pointer
4437  *
4438  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4439  * restore things like GPUVM page tables after a GPU reset where
4440  * the contents of VRAM might be lost.
4441  *
4442  * Returns:
4443  * 0 on success, negative error code on failure.
4444  */
4445 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4446 {
4447 	struct dma_fence *fence = NULL, *next = NULL;
4448 	struct amdgpu_bo *shadow;
4449 	struct amdgpu_bo_vm *vmbo;
4450 	long r = 1, tmo;
4451 
4452 	if (amdgpu_sriov_runtime(adev))
4453 		tmo = msecs_to_jiffies(8000);
4454 	else
4455 		tmo = msecs_to_jiffies(100);
4456 
4457 	dev_info(adev->dev, "recover vram bo from shadow start\n");
4458 	mutex_lock(&adev->shadow_list_lock);
4459 	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4460 		shadow = &vmbo->bo;
4461 		/* No need to recover an evicted BO */
4462 		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4463 		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4464 		    shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4465 			continue;
4466 
4467 		r = amdgpu_bo_restore_shadow(shadow, &next);
4468 		if (r)
4469 			break;
4470 
4471 		if (fence) {
4472 			tmo = dma_fence_wait_timeout(fence, false, tmo);
4473 			dma_fence_put(fence);
4474 			fence = next;
4475 			if (tmo == 0) {
4476 				r = -ETIMEDOUT;
4477 				break;
4478 			} else if (tmo < 0) {
4479 				r = tmo;
4480 				break;
4481 			}
4482 		} else {
4483 			fence = next;
4484 		}
4485 	}
4486 	mutex_unlock(&adev->shadow_list_lock);
4487 
4488 	if (fence)
4489 		tmo = dma_fence_wait_timeout(fence, false, tmo);
4490 	dma_fence_put(fence);
4491 
4492 	if (r < 0 || tmo <= 0) {
4493 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4494 		return -EIO;
4495 	}
4496 
4497 	dev_info(adev->dev, "recover vram bo from shadow done\n");
4498 	return 0;
4499 }
4500 
4501 
4502 /**
4503  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4504  *
4505  * @adev: amdgpu_device pointer
4506  * @from_hypervisor: request from hypervisor
4507  *
4508  * do VF FLR and reinitialize Asic
4509  * return 0 means succeeded otherwise failed
4510  */
4511 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4512 				     bool from_hypervisor)
4513 {
4514 	int r;
4515 	struct amdgpu_hive_info *hive = NULL;
4516 	int retry_limit = 0;
4517 
4518 retry:
4519 	amdgpu_amdkfd_pre_reset(adev);
4520 
4521 	if (from_hypervisor)
4522 		r = amdgpu_virt_request_full_gpu(adev, true);
4523 	else
4524 		r = amdgpu_virt_reset_gpu(adev);
4525 	if (r)
4526 		return r;
4527 
4528 	/* Resume IP prior to SMC */
4529 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4530 	if (r)
4531 		goto error;
4532 
4533 	amdgpu_virt_init_data_exchange(adev);
4534 
4535 	r = amdgpu_device_fw_loading(adev);
4536 	if (r)
4537 		return r;
4538 
4539 	/* now we are okay to resume SMC/CP/SDMA */
4540 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4541 	if (r)
4542 		goto error;
4543 
4544 	hive = amdgpu_get_xgmi_hive(adev);
4545 	/* Update PSP FW topology after reset */
4546 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4547 		r = amdgpu_xgmi_update_topology(hive, adev);
4548 
4549 	if (hive)
4550 		amdgpu_put_xgmi_hive(hive);
4551 
4552 	if (!r) {
4553 		amdgpu_irq_gpu_reset_resume_helper(adev);
4554 		r = amdgpu_ib_ring_tests(adev);
4555 
4556 		amdgpu_amdkfd_post_reset(adev);
4557 	}
4558 
4559 error:
4560 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
4561 		amdgpu_inc_vram_lost(adev);
4562 		r = amdgpu_device_recover_vram(adev);
4563 	}
4564 	amdgpu_virt_release_full_gpu(adev, true);
4565 
4566 	if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4567 		if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4568 			retry_limit++;
4569 			goto retry;
4570 		} else
4571 			DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4572 	}
4573 
4574 	return r;
4575 }
4576 
4577 /**
4578  * amdgpu_device_has_job_running - check if there is any job in mirror list
4579  *
4580  * @adev: amdgpu_device pointer
4581  *
4582  * check if there is any job in mirror list
4583  */
4584 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4585 {
4586 	int i;
4587 	struct drm_sched_job *job;
4588 
4589 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4590 		struct amdgpu_ring *ring = adev->rings[i];
4591 
4592 		if (!ring || !ring->sched.thread)
4593 			continue;
4594 
4595 		spin_lock(&ring->sched.job_list_lock);
4596 		job = list_first_entry_or_null(&ring->sched.pending_list,
4597 					       struct drm_sched_job, list);
4598 		spin_unlock(&ring->sched.job_list_lock);
4599 		if (job)
4600 			return true;
4601 	}
4602 	return false;
4603 }
4604 
4605 /**
4606  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4607  *
4608  * @adev: amdgpu_device pointer
4609  *
4610  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4611  * a hung GPU.
4612  */
4613 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4614 {
4615 
4616 	if (amdgpu_gpu_recovery == 0)
4617 		goto disabled;
4618 
4619 	/* Skip soft reset check in fatal error mode */
4620 	if (!amdgpu_ras_is_poison_mode_supported(adev))
4621 		return true;
4622 
4623 	if (amdgpu_sriov_vf(adev))
4624 		return true;
4625 
4626 	if (amdgpu_gpu_recovery == -1) {
4627 		switch (adev->asic_type) {
4628 #ifdef CONFIG_DRM_AMDGPU_SI
4629 		case CHIP_VERDE:
4630 		case CHIP_TAHITI:
4631 		case CHIP_PITCAIRN:
4632 		case CHIP_OLAND:
4633 		case CHIP_HAINAN:
4634 #endif
4635 #ifdef CONFIG_DRM_AMDGPU_CIK
4636 		case CHIP_KAVERI:
4637 		case CHIP_KABINI:
4638 		case CHIP_MULLINS:
4639 #endif
4640 		case CHIP_CARRIZO:
4641 		case CHIP_STONEY:
4642 		case CHIP_CYAN_SKILLFISH:
4643 			goto disabled;
4644 		default:
4645 			break;
4646 		}
4647 	}
4648 
4649 	return true;
4650 
4651 disabled:
4652 		dev_info(adev->dev, "GPU recovery disabled.\n");
4653 		return false;
4654 }
4655 
4656 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4657 {
4658         u32 i;
4659         int ret = 0;
4660 
4661         amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4662 
4663         dev_info(adev->dev, "GPU mode1 reset\n");
4664 
4665         /* disable BM */
4666         pci_clear_master(adev->pdev);
4667 
4668         amdgpu_device_cache_pci_state(adev->pdev);
4669 
4670         if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4671                 dev_info(adev->dev, "GPU smu mode1 reset\n");
4672                 ret = amdgpu_dpm_mode1_reset(adev);
4673         } else {
4674                 dev_info(adev->dev, "GPU psp mode1 reset\n");
4675                 ret = psp_gpu_reset(adev);
4676         }
4677 
4678         if (ret)
4679                 dev_err(adev->dev, "GPU mode1 reset failed\n");
4680 
4681         amdgpu_device_load_pci_state(adev->pdev);
4682 
4683         /* wait for asic to come out of reset */
4684         for (i = 0; i < adev->usec_timeout; i++) {
4685                 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4686 
4687                 if (memsize != 0xffffffff)
4688                         break;
4689                 udelay(1);
4690         }
4691 
4692         amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4693         return ret;
4694 }
4695 
4696 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4697 				 struct amdgpu_reset_context *reset_context)
4698 {
4699 	int i, r = 0;
4700 	struct amdgpu_job *job = NULL;
4701 	bool need_full_reset =
4702 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4703 
4704 	if (reset_context->reset_req_dev == adev)
4705 		job = reset_context->job;
4706 
4707 	if (amdgpu_sriov_vf(adev)) {
4708 		/* stop the data exchange thread */
4709 		amdgpu_virt_fini_data_exchange(adev);
4710 	}
4711 
4712 	amdgpu_fence_driver_isr_toggle(adev, true);
4713 
4714 	/* block all schedulers and reset given job's ring */
4715 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4716 		struct amdgpu_ring *ring = adev->rings[i];
4717 
4718 		if (!ring || !ring->sched.thread)
4719 			continue;
4720 
4721 		/*clear job fence from fence drv to avoid force_completion
4722 		 *leave NULL and vm flush fence in fence drv */
4723 		amdgpu_fence_driver_clear_job_fences(ring);
4724 
4725 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4726 		amdgpu_fence_driver_force_completion(ring);
4727 	}
4728 
4729 	amdgpu_fence_driver_isr_toggle(adev, false);
4730 
4731 	if (job && job->vm)
4732 		drm_sched_increase_karma(&job->base);
4733 
4734 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
4735 	/* If reset handler not implemented, continue; otherwise return */
4736 	if (r == -ENOSYS)
4737 		r = 0;
4738 	else
4739 		return r;
4740 
4741 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
4742 	if (!amdgpu_sriov_vf(adev)) {
4743 
4744 		if (!need_full_reset)
4745 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4746 
4747 		if (!need_full_reset && amdgpu_gpu_recovery &&
4748 		    amdgpu_device_ip_check_soft_reset(adev)) {
4749 			amdgpu_device_ip_pre_soft_reset(adev);
4750 			r = amdgpu_device_ip_soft_reset(adev);
4751 			amdgpu_device_ip_post_soft_reset(adev);
4752 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4753 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
4754 				need_full_reset = true;
4755 			}
4756 		}
4757 
4758 		if (need_full_reset)
4759 			r = amdgpu_device_ip_suspend(adev);
4760 		if (need_full_reset)
4761 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4762 		else
4763 			clear_bit(AMDGPU_NEED_FULL_RESET,
4764 				  &reset_context->flags);
4765 	}
4766 
4767 	return r;
4768 }
4769 
4770 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4771 {
4772 	int i;
4773 
4774 	lockdep_assert_held(&adev->reset_domain->sem);
4775 
4776 	for (i = 0; i < adev->num_regs; i++) {
4777 		adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4778 		trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4779 					     adev->reset_dump_reg_value[i]);
4780 	}
4781 
4782 	return 0;
4783 }
4784 
4785 #ifdef CONFIG_DEV_COREDUMP
4786 static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4787 		size_t count, void *data, size_t datalen)
4788 {
4789 	struct drm_printer p;
4790 	struct amdgpu_device *adev = data;
4791 	struct drm_print_iterator iter;
4792 	int i;
4793 
4794 	iter.data = buffer;
4795 	iter.offset = 0;
4796 	iter.start = offset;
4797 	iter.remain = count;
4798 
4799 	p = drm_coredump_printer(&iter);
4800 
4801 	drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4802 	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4803 	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4804 	drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4805 	if (adev->reset_task_info.pid)
4806 		drm_printf(&p, "process_name: %s PID: %d\n",
4807 			   adev->reset_task_info.process_name,
4808 			   adev->reset_task_info.pid);
4809 
4810 	if (adev->reset_vram_lost)
4811 		drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4812 	if (adev->num_regs) {
4813 		drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n");
4814 
4815 		for (i = 0; i < adev->num_regs; i++)
4816 			drm_printf(&p, "0x%08x: 0x%08x\n",
4817 				   adev->reset_dump_reg_list[i],
4818 				   adev->reset_dump_reg_value[i]);
4819 	}
4820 
4821 	return count - iter.remain;
4822 }
4823 
4824 static void amdgpu_devcoredump_free(void *data)
4825 {
4826 }
4827 
4828 static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4829 {
4830 	struct drm_device *dev = adev_to_drm(adev);
4831 
4832 	ktime_get_ts64(&adev->reset_time);
4833 	dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4834 		      amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4835 }
4836 #endif
4837 
4838 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4839 			 struct amdgpu_reset_context *reset_context)
4840 {
4841 	struct amdgpu_device *tmp_adev = NULL;
4842 	bool need_full_reset, skip_hw_reset, vram_lost = false;
4843 	int r = 0;
4844 	bool gpu_reset_for_dev_remove = 0;
4845 
4846 	/* Try reset handler method first */
4847 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4848 				    reset_list);
4849 	amdgpu_reset_reg_dumps(tmp_adev);
4850 
4851 	reset_context->reset_device_list = device_list_handle;
4852 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
4853 	/* If reset handler not implemented, continue; otherwise return */
4854 	if (r == -ENOSYS)
4855 		r = 0;
4856 	else
4857 		return r;
4858 
4859 	/* Reset handler not implemented, use the default method */
4860 	need_full_reset =
4861 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4862 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4863 
4864 	gpu_reset_for_dev_remove =
4865 		test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4866 			test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4867 
4868 	/*
4869 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
4870 	 * to allow proper links negotiation in FW (within 1 sec)
4871 	 */
4872 	if (!skip_hw_reset && need_full_reset) {
4873 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4874 			/* For XGMI run all resets in parallel to speed up the process */
4875 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4876 				tmp_adev->gmc.xgmi.pending_reset = false;
4877 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4878 					r = -EALREADY;
4879 			} else
4880 				r = amdgpu_asic_reset(tmp_adev);
4881 
4882 			if (r) {
4883 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4884 					 r, adev_to_drm(tmp_adev)->unique);
4885 				break;
4886 			}
4887 		}
4888 
4889 		/* For XGMI wait for all resets to complete before proceed */
4890 		if (!r) {
4891 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4892 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4893 					flush_work(&tmp_adev->xgmi_reset_work);
4894 					r = tmp_adev->asic_reset_res;
4895 					if (r)
4896 						break;
4897 				}
4898 			}
4899 		}
4900 	}
4901 
4902 	if (!r && amdgpu_ras_intr_triggered()) {
4903 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4904 			if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4905 			    tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4906 				tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
4907 		}
4908 
4909 		amdgpu_ras_intr_cleared();
4910 	}
4911 
4912 	/* Since the mode1 reset affects base ip blocks, the
4913 	 * phase1 ip blocks need to be resumed. Otherwise there
4914 	 * will be a BIOS signature error and the psp bootloader
4915 	 * can't load kdb on the next amdgpu install.
4916 	 */
4917 	if (gpu_reset_for_dev_remove) {
4918 		list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4919 			amdgpu_device_ip_resume_phase1(tmp_adev);
4920 
4921 		goto end;
4922 	}
4923 
4924 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4925 		if (need_full_reset) {
4926 			/* post card */
4927 			r = amdgpu_device_asic_init(tmp_adev);
4928 			if (r) {
4929 				dev_warn(tmp_adev->dev, "asic atom init failed!");
4930 			} else {
4931 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4932 				r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4933 				if (r)
4934 					goto out;
4935 
4936 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
4937 				if (r)
4938 					goto out;
4939 
4940 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4941 #ifdef CONFIG_DEV_COREDUMP
4942 				tmp_adev->reset_vram_lost = vram_lost;
4943 				memset(&tmp_adev->reset_task_info, 0,
4944 						sizeof(tmp_adev->reset_task_info));
4945 				if (reset_context->job && reset_context->job->vm)
4946 					tmp_adev->reset_task_info =
4947 						reset_context->job->vm->task_info;
4948 				amdgpu_reset_capture_coredumpm(tmp_adev);
4949 #endif
4950 				if (vram_lost) {
4951 					DRM_INFO("VRAM is lost due to GPU reset!\n");
4952 					amdgpu_inc_vram_lost(tmp_adev);
4953 				}
4954 
4955 				r = amdgpu_device_fw_loading(tmp_adev);
4956 				if (r)
4957 					return r;
4958 
4959 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
4960 				if (r)
4961 					goto out;
4962 
4963 				if (vram_lost)
4964 					amdgpu_device_fill_reset_magic(tmp_adev);
4965 
4966 				/*
4967 				 * Add this ASIC as tracked as reset was already
4968 				 * complete successfully.
4969 				 */
4970 				amdgpu_register_gpu_instance(tmp_adev);
4971 
4972 				if (!reset_context->hive &&
4973 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4974 					amdgpu_xgmi_add_device(tmp_adev);
4975 
4976 				r = amdgpu_device_ip_late_init(tmp_adev);
4977 				if (r)
4978 					goto out;
4979 
4980 				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
4981 
4982 				/*
4983 				 * The GPU enters bad state once faulty pages
4984 				 * by ECC has reached the threshold, and ras
4985 				 * recovery is scheduled next. So add one check
4986 				 * here to break recovery if it indeed exceeds
4987 				 * bad page threshold, and remind user to
4988 				 * retire this GPU or setting one bigger
4989 				 * bad_page_threshold value to fix this once
4990 				 * probing driver again.
4991 				 */
4992 				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
4993 					/* must succeed. */
4994 					amdgpu_ras_resume(tmp_adev);
4995 				} else {
4996 					r = -EINVAL;
4997 					goto out;
4998 				}
4999 
5000 				/* Update PSP FW topology after reset */
5001 				if (reset_context->hive &&
5002 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5003 					r = amdgpu_xgmi_update_topology(
5004 						reset_context->hive, tmp_adev);
5005 			}
5006 		}
5007 
5008 out:
5009 		if (!r) {
5010 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5011 			r = amdgpu_ib_ring_tests(tmp_adev);
5012 			if (r) {
5013 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5014 				need_full_reset = true;
5015 				r = -EAGAIN;
5016 				goto end;
5017 			}
5018 		}
5019 
5020 		if (!r)
5021 			r = amdgpu_device_recover_vram(tmp_adev);
5022 		else
5023 			tmp_adev->asic_reset_res = r;
5024 	}
5025 
5026 end:
5027 	if (need_full_reset)
5028 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5029 	else
5030 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5031 	return r;
5032 }
5033 
5034 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5035 {
5036 
5037 	switch (amdgpu_asic_reset_method(adev)) {
5038 	case AMD_RESET_METHOD_MODE1:
5039 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5040 		break;
5041 	case AMD_RESET_METHOD_MODE2:
5042 		adev->mp1_state = PP_MP1_STATE_RESET;
5043 		break;
5044 	default:
5045 		adev->mp1_state = PP_MP1_STATE_NONE;
5046 		break;
5047 	}
5048 }
5049 
5050 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5051 {
5052 	amdgpu_vf_error_trans_all(adev);
5053 	adev->mp1_state = PP_MP1_STATE_NONE;
5054 }
5055 
5056 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5057 {
5058 	struct pci_dev *p = NULL;
5059 
5060 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5061 			adev->pdev->bus->number, 1);
5062 	if (p) {
5063 		pm_runtime_enable(&(p->dev));
5064 		pm_runtime_resume(&(p->dev));
5065 	}
5066 
5067 	pci_dev_put(p);
5068 }
5069 
5070 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5071 {
5072 	enum amd_reset_method reset_method;
5073 	struct pci_dev *p = NULL;
5074 	u64 expires;
5075 
5076 	/*
5077 	 * For now, only BACO and mode1 reset are confirmed
5078 	 * to suffer the audio issue without proper suspended.
5079 	 */
5080 	reset_method = amdgpu_asic_reset_method(adev);
5081 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5082 	     (reset_method != AMD_RESET_METHOD_MODE1))
5083 		return -EINVAL;
5084 
5085 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5086 			adev->pdev->bus->number, 1);
5087 	if (!p)
5088 		return -ENODEV;
5089 
5090 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5091 	if (!expires)
5092 		/*
5093 		 * If we cannot get the audio device autosuspend delay,
5094 		 * a fixed 4S interval will be used. Considering 3S is
5095 		 * the audio controller default autosuspend delay setting.
5096 		 * 4S used here is guaranteed to cover that.
5097 		 */
5098 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5099 
5100 	while (!pm_runtime_status_suspended(&(p->dev))) {
5101 		if (!pm_runtime_suspend(&(p->dev)))
5102 			break;
5103 
5104 		if (expires < ktime_get_mono_fast_ns()) {
5105 			dev_warn(adev->dev, "failed to suspend display audio\n");
5106 			pci_dev_put(p);
5107 			/* TODO: abort the succeeding gpu reset? */
5108 			return -ETIMEDOUT;
5109 		}
5110 	}
5111 
5112 	pm_runtime_disable(&(p->dev));
5113 
5114 	pci_dev_put(p);
5115 	return 0;
5116 }
5117 
5118 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5119 {
5120 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5121 
5122 #if defined(CONFIG_DEBUG_FS)
5123 	if (!amdgpu_sriov_vf(adev))
5124 		cancel_work(&adev->reset_work);
5125 #endif
5126 
5127 	if (adev->kfd.dev)
5128 		cancel_work(&adev->kfd.reset_work);
5129 
5130 	if (amdgpu_sriov_vf(adev))
5131 		cancel_work(&adev->virt.flr_work);
5132 
5133 	if (con && adev->ras_enabled)
5134 		cancel_work(&con->recovery_work);
5135 
5136 }
5137 
5138 /**
5139  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5140  *
5141  * @adev: amdgpu_device pointer
5142  * @job: which job trigger hang
5143  *
5144  * Attempt to reset the GPU if it has hung (all asics).
5145  * Attempt to do soft-reset or full-reset and reinitialize Asic
5146  * Returns 0 for success or an error on failure.
5147  */
5148 
5149 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5150 			      struct amdgpu_job *job,
5151 			      struct amdgpu_reset_context *reset_context)
5152 {
5153 	struct list_head device_list, *device_list_handle =  NULL;
5154 	bool job_signaled = false;
5155 	struct amdgpu_hive_info *hive = NULL;
5156 	struct amdgpu_device *tmp_adev = NULL;
5157 	int i, r = 0;
5158 	bool need_emergency_restart = false;
5159 	bool audio_suspended = false;
5160 	bool gpu_reset_for_dev_remove = false;
5161 
5162 	gpu_reset_for_dev_remove =
5163 			test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5164 				test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5165 
5166 	/*
5167 	 * Special case: RAS triggered and full reset isn't supported
5168 	 */
5169 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5170 
5171 	/*
5172 	 * Flush RAM to disk so that after reboot
5173 	 * the user can read log and see why the system rebooted.
5174 	 */
5175 	if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
5176 		DRM_WARN("Emergency reboot.");
5177 
5178 		ksys_sync_helper();
5179 		emergency_restart();
5180 	}
5181 
5182 	dev_info(adev->dev, "GPU %s begin!\n",
5183 		need_emergency_restart ? "jobs stop":"reset");
5184 
5185 	if (!amdgpu_sriov_vf(adev))
5186 		hive = amdgpu_get_xgmi_hive(adev);
5187 	if (hive)
5188 		mutex_lock(&hive->hive_lock);
5189 
5190 	reset_context->job = job;
5191 	reset_context->hive = hive;
5192 	/*
5193 	 * Build list of devices to reset.
5194 	 * In case we are in XGMI hive mode, resort the device list
5195 	 * to put adev in the 1st position.
5196 	 */
5197 	INIT_LIST_HEAD(&device_list);
5198 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5199 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5200 			list_add_tail(&tmp_adev->reset_list, &device_list);
5201 			if (gpu_reset_for_dev_remove && adev->shutdown)
5202 				tmp_adev->shutdown = true;
5203 		}
5204 		if (!list_is_first(&adev->reset_list, &device_list))
5205 			list_rotate_to_front(&adev->reset_list, &device_list);
5206 		device_list_handle = &device_list;
5207 	} else {
5208 		list_add_tail(&adev->reset_list, &device_list);
5209 		device_list_handle = &device_list;
5210 	}
5211 
5212 	/* We need to lock reset domain only once both for XGMI and single device */
5213 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5214 				    reset_list);
5215 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5216 
5217 	/* block all schedulers and reset given job's ring */
5218 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5219 
5220 		amdgpu_device_set_mp1_state(tmp_adev);
5221 
5222 		/*
5223 		 * Try to put the audio codec into suspend state
5224 		 * before gpu reset started.
5225 		 *
5226 		 * Due to the power domain of the graphics device
5227 		 * is shared with AZ power domain. Without this,
5228 		 * we may change the audio hardware from behind
5229 		 * the audio driver's back. That will trigger
5230 		 * some audio codec errors.
5231 		 */
5232 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5233 			audio_suspended = true;
5234 
5235 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5236 
5237 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5238 
5239 		if (!amdgpu_sriov_vf(tmp_adev))
5240 			amdgpu_amdkfd_pre_reset(tmp_adev);
5241 
5242 		/*
5243 		 * Mark these ASICs to be reseted as untracked first
5244 		 * And add them back after reset completed
5245 		 */
5246 		amdgpu_unregister_gpu_instance(tmp_adev);
5247 
5248 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5249 
5250 		/* disable ras on ALL IPs */
5251 		if (!need_emergency_restart &&
5252 		      amdgpu_device_ip_need_full_reset(tmp_adev))
5253 			amdgpu_ras_suspend(tmp_adev);
5254 
5255 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5256 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5257 
5258 			if (!ring || !ring->sched.thread)
5259 				continue;
5260 
5261 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5262 
5263 			if (need_emergency_restart)
5264 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5265 		}
5266 		atomic_inc(&tmp_adev->gpu_reset_counter);
5267 	}
5268 
5269 	if (need_emergency_restart)
5270 		goto skip_sched_resume;
5271 
5272 	/*
5273 	 * Must check guilty signal here since after this point all old
5274 	 * HW fences are force signaled.
5275 	 *
5276 	 * job->base holds a reference to parent fence
5277 	 */
5278 	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5279 		job_signaled = true;
5280 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5281 		goto skip_hw_reset;
5282 	}
5283 
5284 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5285 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5286 		if (gpu_reset_for_dev_remove) {
5287 			/* Workaroud for ASICs need to disable SMC first */
5288 			amdgpu_device_smu_fini_early(tmp_adev);
5289 		}
5290 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5291 		/*TODO Should we stop ?*/
5292 		if (r) {
5293 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5294 				  r, adev_to_drm(tmp_adev)->unique);
5295 			tmp_adev->asic_reset_res = r;
5296 		}
5297 
5298 		/*
5299 		 * Drop all pending non scheduler resets. Scheduler resets
5300 		 * were already dropped during drm_sched_stop
5301 		 */
5302 		amdgpu_device_stop_pending_resets(tmp_adev);
5303 	}
5304 
5305 	/* Actual ASIC resets if needed.*/
5306 	/* Host driver will handle XGMI hive reset for SRIOV */
5307 	if (amdgpu_sriov_vf(adev)) {
5308 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
5309 		if (r)
5310 			adev->asic_reset_res = r;
5311 
5312 		/* Aldebaran supports ras in SRIOV, so need resume ras during reset */
5313 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5314 			amdgpu_ras_resume(adev);
5315 	} else {
5316 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5317 		if (r && r == -EAGAIN)
5318 			goto retry;
5319 
5320 		if (!r && gpu_reset_for_dev_remove)
5321 			goto recover_end;
5322 	}
5323 
5324 skip_hw_reset:
5325 
5326 	/* Post ASIC reset for all devs .*/
5327 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5328 
5329 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5330 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5331 
5332 			if (!ring || !ring->sched.thread)
5333 				continue;
5334 
5335 			drm_sched_start(&ring->sched, true);
5336 		}
5337 
5338 		if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
5339 			amdgpu_mes_self_test(tmp_adev);
5340 
5341 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
5342 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5343 		}
5344 
5345 		if (tmp_adev->asic_reset_res)
5346 			r = tmp_adev->asic_reset_res;
5347 
5348 		tmp_adev->asic_reset_res = 0;
5349 
5350 		if (r) {
5351 			/* bad news, how to tell it to userspace ? */
5352 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5353 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5354 		} else {
5355 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5356 			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5357 				DRM_WARN("smart shift update failed\n");
5358 		}
5359 	}
5360 
5361 skip_sched_resume:
5362 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5363 		/* unlock kfd: SRIOV would do it separately */
5364 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5365 			amdgpu_amdkfd_post_reset(tmp_adev);
5366 
5367 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5368 		 * need to bring up kfd here if it's not be initialized before
5369 		 */
5370 		if (!adev->kfd.init_complete)
5371 			amdgpu_amdkfd_device_init(adev);
5372 
5373 		if (audio_suspended)
5374 			amdgpu_device_resume_display_audio(tmp_adev);
5375 
5376 		amdgpu_device_unset_mp1_state(tmp_adev);
5377 
5378 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5379 	}
5380 
5381 recover_end:
5382 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5383 					    reset_list);
5384 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5385 
5386 	if (hive) {
5387 		mutex_unlock(&hive->hive_lock);
5388 		amdgpu_put_xgmi_hive(hive);
5389 	}
5390 
5391 	if (r)
5392 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5393 
5394 	atomic_set(&adev->reset_domain->reset_res, r);
5395 	return r;
5396 }
5397 
5398 /**
5399  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5400  *
5401  * @adev: amdgpu_device pointer
5402  *
5403  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5404  * and lanes) of the slot the device is in. Handles APUs and
5405  * virtualized environments where PCIE config space may not be available.
5406  */
5407 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5408 {
5409 	struct pci_dev *pdev;
5410 	enum pci_bus_speed speed_cap, platform_speed_cap;
5411 	enum pcie_link_width platform_link_width;
5412 
5413 	if (amdgpu_pcie_gen_cap)
5414 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5415 
5416 	if (amdgpu_pcie_lane_cap)
5417 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5418 
5419 	/* covers APUs as well */
5420 	if (pci_is_root_bus(adev->pdev->bus)) {
5421 		if (adev->pm.pcie_gen_mask == 0)
5422 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5423 		if (adev->pm.pcie_mlw_mask == 0)
5424 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5425 		return;
5426 	}
5427 
5428 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5429 		return;
5430 
5431 	pcie_bandwidth_available(adev->pdev, NULL,
5432 				 &platform_speed_cap, &platform_link_width);
5433 
5434 	if (adev->pm.pcie_gen_mask == 0) {
5435 		/* asic caps */
5436 		pdev = adev->pdev;
5437 		speed_cap = pcie_get_speed_cap(pdev);
5438 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5439 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5440 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5441 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5442 		} else {
5443 			if (speed_cap == PCIE_SPEED_32_0GT)
5444 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5445 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5446 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5447 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5448 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5449 			else if (speed_cap == PCIE_SPEED_16_0GT)
5450 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5451 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5452 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5453 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5454 			else if (speed_cap == PCIE_SPEED_8_0GT)
5455 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5456 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5457 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5458 			else if (speed_cap == PCIE_SPEED_5_0GT)
5459 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5460 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5461 			else
5462 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5463 		}
5464 		/* platform caps */
5465 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5466 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5467 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5468 		} else {
5469 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
5470 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5471 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5472 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5473 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5474 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5475 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5476 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5477 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5478 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5479 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5480 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5481 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5482 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5483 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5484 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5485 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5486 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5487 			else
5488 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5489 
5490 		}
5491 	}
5492 	if (adev->pm.pcie_mlw_mask == 0) {
5493 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5494 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5495 		} else {
5496 			switch (platform_link_width) {
5497 			case PCIE_LNK_X32:
5498 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5499 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5500 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5501 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5502 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5503 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5504 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5505 				break;
5506 			case PCIE_LNK_X16:
5507 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5508 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5509 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5510 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5511 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5512 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5513 				break;
5514 			case PCIE_LNK_X12:
5515 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5516 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5517 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5518 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5519 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5520 				break;
5521 			case PCIE_LNK_X8:
5522 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5523 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5524 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5525 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5526 				break;
5527 			case PCIE_LNK_X4:
5528 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5529 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5530 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5531 				break;
5532 			case PCIE_LNK_X2:
5533 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5534 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5535 				break;
5536 			case PCIE_LNK_X1:
5537 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5538 				break;
5539 			default:
5540 				break;
5541 			}
5542 		}
5543 	}
5544 }
5545 
5546 /**
5547  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5548  *
5549  * @adev: amdgpu_device pointer
5550  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5551  *
5552  * Return true if @peer_adev can access (DMA) @adev through the PCIe
5553  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5554  * @peer_adev.
5555  */
5556 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5557 				      struct amdgpu_device *peer_adev)
5558 {
5559 #ifdef CONFIG_HSA_AMD_P2P
5560 	uint64_t address_mask = peer_adev->dev->dma_mask ?
5561 		~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5562 	resource_size_t aper_limit =
5563 		adev->gmc.aper_base + adev->gmc.aper_size - 1;
5564 	bool p2p_access =
5565 		!adev->gmc.xgmi.connected_to_cpu &&
5566 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
5567 
5568 	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5569 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5570 		!(adev->gmc.aper_base & address_mask ||
5571 		  aper_limit & address_mask));
5572 #else
5573 	return false;
5574 #endif
5575 }
5576 
5577 int amdgpu_device_baco_enter(struct drm_device *dev)
5578 {
5579 	struct amdgpu_device *adev = drm_to_adev(dev);
5580 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5581 
5582 	if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5583 		return -ENOTSUPP;
5584 
5585 	if (ras && adev->ras_enabled &&
5586 	    adev->nbio.funcs->enable_doorbell_interrupt)
5587 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5588 
5589 	return amdgpu_dpm_baco_enter(adev);
5590 }
5591 
5592 int amdgpu_device_baco_exit(struct drm_device *dev)
5593 {
5594 	struct amdgpu_device *adev = drm_to_adev(dev);
5595 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5596 	int ret = 0;
5597 
5598 	if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5599 		return -ENOTSUPP;
5600 
5601 	ret = amdgpu_dpm_baco_exit(adev);
5602 	if (ret)
5603 		return ret;
5604 
5605 	if (ras && adev->ras_enabled &&
5606 	    adev->nbio.funcs->enable_doorbell_interrupt)
5607 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5608 
5609 	if (amdgpu_passthrough(adev) &&
5610 	    adev->nbio.funcs->clear_doorbell_interrupt)
5611 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
5612 
5613 	return 0;
5614 }
5615 
5616 /**
5617  * amdgpu_pci_error_detected - Called when a PCI error is detected.
5618  * @pdev: PCI device struct
5619  * @state: PCI channel state
5620  *
5621  * Description: Called when a PCI error is detected.
5622  *
5623  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5624  */
5625 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5626 {
5627 	struct drm_device *dev = pci_get_drvdata(pdev);
5628 	struct amdgpu_device *adev = drm_to_adev(dev);
5629 	int i;
5630 
5631 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5632 
5633 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
5634 		DRM_WARN("No support for XGMI hive yet...");
5635 		return PCI_ERS_RESULT_DISCONNECT;
5636 	}
5637 
5638 	adev->pci_channel_state = state;
5639 
5640 	switch (state) {
5641 	case pci_channel_io_normal:
5642 		return PCI_ERS_RESULT_CAN_RECOVER;
5643 	/* Fatal error, prepare for slot reset */
5644 	case pci_channel_io_frozen:
5645 		/*
5646 		 * Locking adev->reset_domain->sem will prevent any external access
5647 		 * to GPU during PCI error recovery
5648 		 */
5649 		amdgpu_device_lock_reset_domain(adev->reset_domain);
5650 		amdgpu_device_set_mp1_state(adev);
5651 
5652 		/*
5653 		 * Block any work scheduling as we do for regular GPU reset
5654 		 * for the duration of the recovery
5655 		 */
5656 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5657 			struct amdgpu_ring *ring = adev->rings[i];
5658 
5659 			if (!ring || !ring->sched.thread)
5660 				continue;
5661 
5662 			drm_sched_stop(&ring->sched, NULL);
5663 		}
5664 		atomic_inc(&adev->gpu_reset_counter);
5665 		return PCI_ERS_RESULT_NEED_RESET;
5666 	case pci_channel_io_perm_failure:
5667 		/* Permanent error, prepare for device removal */
5668 		return PCI_ERS_RESULT_DISCONNECT;
5669 	}
5670 
5671 	return PCI_ERS_RESULT_NEED_RESET;
5672 }
5673 
5674 /**
5675  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5676  * @pdev: pointer to PCI device
5677  */
5678 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5679 {
5680 
5681 	DRM_INFO("PCI error: mmio enabled callback!!\n");
5682 
5683 	/* TODO - dump whatever for debugging purposes */
5684 
5685 	/* This called only if amdgpu_pci_error_detected returns
5686 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5687 	 * works, no need to reset slot.
5688 	 */
5689 
5690 	return PCI_ERS_RESULT_RECOVERED;
5691 }
5692 
5693 /**
5694  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5695  * @pdev: PCI device struct
5696  *
5697  * Description: This routine is called by the pci error recovery
5698  * code after the PCI slot has been reset, just before we
5699  * should resume normal operations.
5700  */
5701 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5702 {
5703 	struct drm_device *dev = pci_get_drvdata(pdev);
5704 	struct amdgpu_device *adev = drm_to_adev(dev);
5705 	int r, i;
5706 	struct amdgpu_reset_context reset_context;
5707 	u32 memsize;
5708 	struct list_head device_list;
5709 
5710 	DRM_INFO("PCI error: slot reset callback!!\n");
5711 
5712 	memset(&reset_context, 0, sizeof(reset_context));
5713 
5714 	INIT_LIST_HEAD(&device_list);
5715 	list_add_tail(&adev->reset_list, &device_list);
5716 
5717 	/* wait for asic to come out of reset */
5718 	msleep(500);
5719 
5720 	/* Restore PCI confspace */
5721 	amdgpu_device_load_pci_state(pdev);
5722 
5723 	/* confirm  ASIC came out of reset */
5724 	for (i = 0; i < adev->usec_timeout; i++) {
5725 		memsize = amdgpu_asic_get_config_memsize(adev);
5726 
5727 		if (memsize != 0xffffffff)
5728 			break;
5729 		udelay(1);
5730 	}
5731 	if (memsize == 0xffffffff) {
5732 		r = -ETIME;
5733 		goto out;
5734 	}
5735 
5736 	reset_context.method = AMD_RESET_METHOD_NONE;
5737 	reset_context.reset_req_dev = adev;
5738 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5739 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5740 
5741 	adev->no_hw_access = true;
5742 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
5743 	adev->no_hw_access = false;
5744 	if (r)
5745 		goto out;
5746 
5747 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
5748 
5749 out:
5750 	if (!r) {
5751 		if (amdgpu_device_cache_pci_state(adev->pdev))
5752 			pci_restore_state(adev->pdev);
5753 
5754 		DRM_INFO("PCIe error recovery succeeded\n");
5755 	} else {
5756 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
5757 		amdgpu_device_unset_mp1_state(adev);
5758 		amdgpu_device_unlock_reset_domain(adev->reset_domain);
5759 	}
5760 
5761 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5762 }
5763 
5764 /**
5765  * amdgpu_pci_resume() - resume normal ops after PCI reset
5766  * @pdev: pointer to PCI device
5767  *
5768  * Called when the error recovery driver tells us that its
5769  * OK to resume normal operation.
5770  */
5771 void amdgpu_pci_resume(struct pci_dev *pdev)
5772 {
5773 	struct drm_device *dev = pci_get_drvdata(pdev);
5774 	struct amdgpu_device *adev = drm_to_adev(dev);
5775 	int i;
5776 
5777 
5778 	DRM_INFO("PCI error: resume callback!!\n");
5779 
5780 	/* Only continue execution for the case of pci_channel_io_frozen */
5781 	if (adev->pci_channel_state != pci_channel_io_frozen)
5782 		return;
5783 
5784 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5785 		struct amdgpu_ring *ring = adev->rings[i];
5786 
5787 		if (!ring || !ring->sched.thread)
5788 			continue;
5789 
5790 		drm_sched_start(&ring->sched, true);
5791 	}
5792 
5793 	amdgpu_device_unset_mp1_state(adev);
5794 	amdgpu_device_unlock_reset_domain(adev->reset_domain);
5795 }
5796 
5797 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5798 {
5799 	struct drm_device *dev = pci_get_drvdata(pdev);
5800 	struct amdgpu_device *adev = drm_to_adev(dev);
5801 	int r;
5802 
5803 	r = pci_save_state(pdev);
5804 	if (!r) {
5805 		kfree(adev->pci_state);
5806 
5807 		adev->pci_state = pci_store_saved_state(pdev);
5808 
5809 		if (!adev->pci_state) {
5810 			DRM_ERROR("Failed to store PCI saved state");
5811 			return false;
5812 		}
5813 	} else {
5814 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
5815 		return false;
5816 	}
5817 
5818 	return true;
5819 }
5820 
5821 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5822 {
5823 	struct drm_device *dev = pci_get_drvdata(pdev);
5824 	struct amdgpu_device *adev = drm_to_adev(dev);
5825 	int r;
5826 
5827 	if (!adev->pci_state)
5828 		return false;
5829 
5830 	r = pci_load_saved_state(pdev, adev->pci_state);
5831 
5832 	if (!r) {
5833 		pci_restore_state(pdev);
5834 	} else {
5835 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
5836 		return false;
5837 	}
5838 
5839 	return true;
5840 }
5841 
5842 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5843 		struct amdgpu_ring *ring)
5844 {
5845 #ifdef CONFIG_X86_64
5846 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5847 		return;
5848 #endif
5849 	if (adev->gmc.xgmi.connected_to_cpu)
5850 		return;
5851 
5852 	if (ring && ring->funcs->emit_hdp_flush)
5853 		amdgpu_ring_emit_hdp_flush(ring);
5854 	else
5855 		amdgpu_asic_flush_hdp(adev, ring);
5856 }
5857 
5858 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5859 		struct amdgpu_ring *ring)
5860 {
5861 #ifdef CONFIG_X86_64
5862 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5863 		return;
5864 #endif
5865 	if (adev->gmc.xgmi.connected_to_cpu)
5866 		return;
5867 
5868 	amdgpu_asic_invalidate_hdp(adev, ring);
5869 }
5870 
5871 int amdgpu_in_reset(struct amdgpu_device *adev)
5872 {
5873 	return atomic_read(&adev->reset_domain->in_gpu_reset);
5874 }
5875 
5876 /**
5877  * amdgpu_device_halt() - bring hardware to some kind of halt state
5878  *
5879  * @adev: amdgpu_device pointer
5880  *
5881  * Bring hardware to some kind of halt state so that no one can touch it
5882  * any more. It will help to maintain error context when error occurred.
5883  * Compare to a simple hang, the system will keep stable at least for SSH
5884  * access. Then it should be trivial to inspect the hardware state and
5885  * see what's going on. Implemented as following:
5886  *
5887  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5888  *    clears all CPU mappings to device, disallows remappings through page faults
5889  * 2. amdgpu_irq_disable_all() disables all interrupts
5890  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5891  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5892  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5893  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5894  *    flush any in flight DMA operations
5895  */
5896 void amdgpu_device_halt(struct amdgpu_device *adev)
5897 {
5898 	struct pci_dev *pdev = adev->pdev;
5899 	struct drm_device *ddev = adev_to_drm(adev);
5900 
5901 	drm_dev_unplug(ddev);
5902 
5903 	amdgpu_irq_disable_all(adev);
5904 
5905 	amdgpu_fence_driver_hw_fini(adev);
5906 
5907 	adev->no_hw_access = true;
5908 
5909 	amdgpu_device_unmap_mmio(adev);
5910 
5911 	pci_disable_device(pdev);
5912 	pci_wait_for_pending_transaction(pdev);
5913 }
5914 
5915 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5916 				u32 reg)
5917 {
5918 	unsigned long flags, address, data;
5919 	u32 r;
5920 
5921 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5922 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5923 
5924 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5925 	WREG32(address, reg * 4);
5926 	(void)RREG32(address);
5927 	r = RREG32(data);
5928 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5929 	return r;
5930 }
5931 
5932 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5933 				u32 reg, u32 v)
5934 {
5935 	unsigned long flags, address, data;
5936 
5937 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5938 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5939 
5940 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5941 	WREG32(address, reg * 4);
5942 	(void)RREG32(address);
5943 	WREG32(data, v);
5944 	(void)RREG32(data);
5945 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5946 }
5947 
5948 /**
5949  * amdgpu_device_switch_gang - switch to a new gang
5950  * @adev: amdgpu_device pointer
5951  * @gang: the gang to switch to
5952  *
5953  * Try to switch to a new gang.
5954  * Returns: NULL if we switched to the new gang or a reference to the current
5955  * gang leader.
5956  */
5957 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5958 					    struct dma_fence *gang)
5959 {
5960 	struct dma_fence *old = NULL;
5961 
5962 	do {
5963 		dma_fence_put(old);
5964 		rcu_read_lock();
5965 		old = dma_fence_get_rcu_safe(&adev->gang_submit);
5966 		rcu_read_unlock();
5967 
5968 		if (old == gang)
5969 			break;
5970 
5971 		if (!dma_fence_is_signaled(old))
5972 			return old;
5973 
5974 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
5975 			 old, gang) != old);
5976 
5977 	dma_fence_put(old);
5978 	return NULL;
5979 }
5980 
5981 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
5982 {
5983 	switch (adev->asic_type) {
5984 #ifdef CONFIG_DRM_AMDGPU_SI
5985 	case CHIP_HAINAN:
5986 #endif
5987 	case CHIP_TOPAZ:
5988 		/* chips with no display hardware */
5989 		return false;
5990 #ifdef CONFIG_DRM_AMDGPU_SI
5991 	case CHIP_TAHITI:
5992 	case CHIP_PITCAIRN:
5993 	case CHIP_VERDE:
5994 	case CHIP_OLAND:
5995 #endif
5996 #ifdef CONFIG_DRM_AMDGPU_CIK
5997 	case CHIP_BONAIRE:
5998 	case CHIP_HAWAII:
5999 	case CHIP_KAVERI:
6000 	case CHIP_KABINI:
6001 	case CHIP_MULLINS:
6002 #endif
6003 	case CHIP_TONGA:
6004 	case CHIP_FIJI:
6005 	case CHIP_POLARIS10:
6006 	case CHIP_POLARIS11:
6007 	case CHIP_POLARIS12:
6008 	case CHIP_VEGAM:
6009 	case CHIP_CARRIZO:
6010 	case CHIP_STONEY:
6011 		/* chips with display hardware */
6012 		return true;
6013 	default:
6014 		/* IP discovery */
6015 		if (!adev->ip_versions[DCE_HWIP][0] ||
6016 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6017 			return false;
6018 		return true;
6019 	}
6020 }
6021