1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/devcoredump.h>
36 #include <generated/utsrelease.h>
37 #include <linux/pci-p2pdma.h>
38 
39 #include <drm/drm_atomic_helper.h>
40 #include <drm/drm_crtc_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/vgaarb.h>
45 #include <linux/vga_switcheroo.h>
46 #include <linux/efi.h>
47 #include "amdgpu.h"
48 #include "amdgpu_trace.h"
49 #include "amdgpu_i2c.h"
50 #include "atom.h"
51 #include "amdgpu_atombios.h"
52 #include "amdgpu_atomfirmware.h"
53 #include "amd_pcie.h"
54 #ifdef CONFIG_DRM_AMDGPU_SI
55 #include "si.h"
56 #endif
57 #ifdef CONFIG_DRM_AMDGPU_CIK
58 #include "cik.h"
59 #endif
60 #include "vi.h"
61 #include "soc15.h"
62 #include "nv.h"
63 #include "bif/bif_4_1_d.h"
64 #include <linux/firmware.h>
65 #include "amdgpu_vf_error.h"
66 
67 #include "amdgpu_amdkfd.h"
68 #include "amdgpu_pm.h"
69 
70 #include "amdgpu_xgmi.h"
71 #include "amdgpu_ras.h"
72 #include "amdgpu_pmu.h"
73 #include "amdgpu_fru_eeprom.h"
74 #include "amdgpu_reset.h"
75 
76 #include <linux/suspend.h>
77 #include <drm/task_barrier.h>
78 #include <linux/pm_runtime.h>
79 
80 #include <drm/drm_drv.h>
81 
82 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
83 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
84 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
85 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
86 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
87 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
88 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
89 
90 #define AMDGPU_RESUME_MS		2000
91 #define AMDGPU_MAX_RETRY_LIMIT		2
92 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
93 
94 const char *amdgpu_asic_name[] = {
95 	"TAHITI",
96 	"PITCAIRN",
97 	"VERDE",
98 	"OLAND",
99 	"HAINAN",
100 	"BONAIRE",
101 	"KAVERI",
102 	"KABINI",
103 	"HAWAII",
104 	"MULLINS",
105 	"TOPAZ",
106 	"TONGA",
107 	"FIJI",
108 	"CARRIZO",
109 	"STONEY",
110 	"POLARIS10",
111 	"POLARIS11",
112 	"POLARIS12",
113 	"VEGAM",
114 	"VEGA10",
115 	"VEGA12",
116 	"VEGA20",
117 	"RAVEN",
118 	"ARCTURUS",
119 	"RENOIR",
120 	"ALDEBARAN",
121 	"NAVI10",
122 	"CYAN_SKILLFISH",
123 	"NAVI14",
124 	"NAVI12",
125 	"SIENNA_CICHLID",
126 	"NAVY_FLOUNDER",
127 	"VANGOGH",
128 	"DIMGREY_CAVEFISH",
129 	"BEIGE_GOBY",
130 	"YELLOW_CARP",
131 	"IP DISCOVERY",
132 	"LAST",
133 };
134 
135 /**
136  * DOC: pcie_replay_count
137  *
138  * The amdgpu driver provides a sysfs API for reporting the total number
139  * of PCIe replays (NAKs)
140  * The file pcie_replay_count is used for this and returns the total
141  * number of replays as a sum of the NAKs generated and NAKs received
142  */
143 
144 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
145 		struct device_attribute *attr, char *buf)
146 {
147 	struct drm_device *ddev = dev_get_drvdata(dev);
148 	struct amdgpu_device *adev = drm_to_adev(ddev);
149 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
150 
151 	return sysfs_emit(buf, "%llu\n", cnt);
152 }
153 
154 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
155 		amdgpu_device_get_pcie_replay_count, NULL);
156 
157 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
158 
159 /**
160  * DOC: product_name
161  *
162  * The amdgpu driver provides a sysfs API for reporting the product name
163  * for the device
164  * The file serial_number is used for this and returns the product name
165  * as returned from the FRU.
166  * NOTE: This is only available for certain server cards
167  */
168 
169 static ssize_t amdgpu_device_get_product_name(struct device *dev,
170 		struct device_attribute *attr, char *buf)
171 {
172 	struct drm_device *ddev = dev_get_drvdata(dev);
173 	struct amdgpu_device *adev = drm_to_adev(ddev);
174 
175 	return sysfs_emit(buf, "%s\n", adev->product_name);
176 }
177 
178 static DEVICE_ATTR(product_name, S_IRUGO,
179 		amdgpu_device_get_product_name, NULL);
180 
181 /**
182  * DOC: product_number
183  *
184  * The amdgpu driver provides a sysfs API for reporting the part number
185  * for the device
186  * The file serial_number is used for this and returns the part number
187  * as returned from the FRU.
188  * NOTE: This is only available for certain server cards
189  */
190 
191 static ssize_t amdgpu_device_get_product_number(struct device *dev,
192 		struct device_attribute *attr, char *buf)
193 {
194 	struct drm_device *ddev = dev_get_drvdata(dev);
195 	struct amdgpu_device *adev = drm_to_adev(ddev);
196 
197 	return sysfs_emit(buf, "%s\n", adev->product_number);
198 }
199 
200 static DEVICE_ATTR(product_number, S_IRUGO,
201 		amdgpu_device_get_product_number, NULL);
202 
203 /**
204  * DOC: serial_number
205  *
206  * The amdgpu driver provides a sysfs API for reporting the serial number
207  * for the device
208  * The file serial_number is used for this and returns the serial number
209  * as returned from the FRU.
210  * NOTE: This is only available for certain server cards
211  */
212 
213 static ssize_t amdgpu_device_get_serial_number(struct device *dev,
214 		struct device_attribute *attr, char *buf)
215 {
216 	struct drm_device *ddev = dev_get_drvdata(dev);
217 	struct amdgpu_device *adev = drm_to_adev(ddev);
218 
219 	return sysfs_emit(buf, "%s\n", adev->serial);
220 }
221 
222 static DEVICE_ATTR(serial_number, S_IRUGO,
223 		amdgpu_device_get_serial_number, NULL);
224 
225 /**
226  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
227  *
228  * @dev: drm_device pointer
229  *
230  * Returns true if the device is a dGPU with ATPX power control,
231  * otherwise return false.
232  */
233 bool amdgpu_device_supports_px(struct drm_device *dev)
234 {
235 	struct amdgpu_device *adev = drm_to_adev(dev);
236 
237 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
238 		return true;
239 	return false;
240 }
241 
242 /**
243  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
244  *
245  * @dev: drm_device pointer
246  *
247  * Returns true if the device is a dGPU with ACPI power control,
248  * otherwise return false.
249  */
250 bool amdgpu_device_supports_boco(struct drm_device *dev)
251 {
252 	struct amdgpu_device *adev = drm_to_adev(dev);
253 
254 	if (adev->has_pr3 ||
255 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
256 		return true;
257 	return false;
258 }
259 
260 /**
261  * amdgpu_device_supports_baco - Does the device support BACO
262  *
263  * @dev: drm_device pointer
264  *
265  * Returns true if the device supporte BACO,
266  * otherwise return false.
267  */
268 bool amdgpu_device_supports_baco(struct drm_device *dev)
269 {
270 	struct amdgpu_device *adev = drm_to_adev(dev);
271 
272 	return amdgpu_asic_supports_baco(adev);
273 }
274 
275 /**
276  * amdgpu_device_supports_smart_shift - Is the device dGPU with
277  * smart shift support
278  *
279  * @dev: drm_device pointer
280  *
281  * Returns true if the device is a dGPU with Smart Shift support,
282  * otherwise returns false.
283  */
284 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
285 {
286 	return (amdgpu_device_supports_boco(dev) &&
287 		amdgpu_acpi_is_power_shift_control_supported());
288 }
289 
290 /*
291  * VRAM access helper functions
292  */
293 
294 /**
295  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
296  *
297  * @adev: amdgpu_device pointer
298  * @pos: offset of the buffer in vram
299  * @buf: virtual address of the buffer in system memory
300  * @size: read/write size, sizeof(@buf) must > @size
301  * @write: true - write to vram, otherwise - read from vram
302  */
303 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
304 			     void *buf, size_t size, bool write)
305 {
306 	unsigned long flags;
307 	uint32_t hi = ~0, tmp = 0;
308 	uint32_t *data = buf;
309 	uint64_t last;
310 	int idx;
311 
312 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
313 		return;
314 
315 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
316 
317 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
318 	for (last = pos + size; pos < last; pos += 4) {
319 		tmp = pos >> 31;
320 
321 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
322 		if (tmp != hi) {
323 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
324 			hi = tmp;
325 		}
326 		if (write)
327 			WREG32_NO_KIQ(mmMM_DATA, *data++);
328 		else
329 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
330 	}
331 
332 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
333 	drm_dev_exit(idx);
334 }
335 
336 /**
337  * amdgpu_device_aper_access - access vram by vram aperature
338  *
339  * @adev: amdgpu_device pointer
340  * @pos: offset of the buffer in vram
341  * @buf: virtual address of the buffer in system memory
342  * @size: read/write size, sizeof(@buf) must > @size
343  * @write: true - write to vram, otherwise - read from vram
344  *
345  * The return value means how many bytes have been transferred.
346  */
347 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
348 				 void *buf, size_t size, bool write)
349 {
350 #ifdef CONFIG_64BIT
351 	void __iomem *addr;
352 	size_t count = 0;
353 	uint64_t last;
354 
355 	if (!adev->mman.aper_base_kaddr)
356 		return 0;
357 
358 	last = min(pos + size, adev->gmc.visible_vram_size);
359 	if (last > pos) {
360 		addr = adev->mman.aper_base_kaddr + pos;
361 		count = last - pos;
362 
363 		if (write) {
364 			memcpy_toio(addr, buf, count);
365 			mb();
366 			amdgpu_device_flush_hdp(adev, NULL);
367 		} else {
368 			amdgpu_device_invalidate_hdp(adev, NULL);
369 			mb();
370 			memcpy_fromio(buf, addr, count);
371 		}
372 
373 	}
374 
375 	return count;
376 #else
377 	return 0;
378 #endif
379 }
380 
381 /**
382  * amdgpu_device_vram_access - read/write a buffer in vram
383  *
384  * @adev: amdgpu_device pointer
385  * @pos: offset of the buffer in vram
386  * @buf: virtual address of the buffer in system memory
387  * @size: read/write size, sizeof(@buf) must > @size
388  * @write: true - write to vram, otherwise - read from vram
389  */
390 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
391 			       void *buf, size_t size, bool write)
392 {
393 	size_t count;
394 
395 	/* try to using vram apreature to access vram first */
396 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
397 	size -= count;
398 	if (size) {
399 		/* using MM to access rest vram */
400 		pos += count;
401 		buf += count;
402 		amdgpu_device_mm_access(adev, pos, buf, size, write);
403 	}
404 }
405 
406 /*
407  * register access helper functions.
408  */
409 
410 /* Check if hw access should be skipped because of hotplug or device error */
411 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
412 {
413 	if (adev->no_hw_access)
414 		return true;
415 
416 #ifdef CONFIG_LOCKDEP
417 	/*
418 	 * This is a bit complicated to understand, so worth a comment. What we assert
419 	 * here is that the GPU reset is not running on another thread in parallel.
420 	 *
421 	 * For this we trylock the read side of the reset semaphore, if that succeeds
422 	 * we know that the reset is not running in paralell.
423 	 *
424 	 * If the trylock fails we assert that we are either already holding the read
425 	 * side of the lock or are the reset thread itself and hold the write side of
426 	 * the lock.
427 	 */
428 	if (in_task()) {
429 		if (down_read_trylock(&adev->reset_domain->sem))
430 			up_read(&adev->reset_domain->sem);
431 		else
432 			lockdep_assert_held(&adev->reset_domain->sem);
433 	}
434 #endif
435 	return false;
436 }
437 
438 /**
439  * amdgpu_device_rreg - read a memory mapped IO or indirect register
440  *
441  * @adev: amdgpu_device pointer
442  * @reg: dword aligned register offset
443  * @acc_flags: access flags which require special behavior
444  *
445  * Returns the 32 bit value from the offset specified.
446  */
447 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
448 			    uint32_t reg, uint32_t acc_flags)
449 {
450 	uint32_t ret;
451 
452 	if (amdgpu_device_skip_hw_access(adev))
453 		return 0;
454 
455 	if ((reg * 4) < adev->rmmio_size) {
456 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
457 		    amdgpu_sriov_runtime(adev) &&
458 		    down_read_trylock(&adev->reset_domain->sem)) {
459 			ret = amdgpu_kiq_rreg(adev, reg);
460 			up_read(&adev->reset_domain->sem);
461 		} else {
462 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
463 		}
464 	} else {
465 		ret = adev->pcie_rreg(adev, reg * 4);
466 	}
467 
468 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
469 
470 	return ret;
471 }
472 
473 /*
474  * MMIO register read with bytes helper functions
475  * @offset:bytes offset from MMIO start
476  *
477 */
478 
479 /**
480  * amdgpu_mm_rreg8 - read a memory mapped IO register
481  *
482  * @adev: amdgpu_device pointer
483  * @offset: byte aligned register offset
484  *
485  * Returns the 8 bit value from the offset specified.
486  */
487 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
488 {
489 	if (amdgpu_device_skip_hw_access(adev))
490 		return 0;
491 
492 	if (offset < adev->rmmio_size)
493 		return (readb(adev->rmmio + offset));
494 	BUG();
495 }
496 
497 /*
498  * MMIO register write with bytes helper functions
499  * @offset:bytes offset from MMIO start
500  * @value: the value want to be written to the register
501  *
502 */
503 /**
504  * amdgpu_mm_wreg8 - read a memory mapped IO register
505  *
506  * @adev: amdgpu_device pointer
507  * @offset: byte aligned register offset
508  * @value: 8 bit value to write
509  *
510  * Writes the value specified to the offset specified.
511  */
512 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
513 {
514 	if (amdgpu_device_skip_hw_access(adev))
515 		return;
516 
517 	if (offset < adev->rmmio_size)
518 		writeb(value, adev->rmmio + offset);
519 	else
520 		BUG();
521 }
522 
523 /**
524  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
525  *
526  * @adev: amdgpu_device pointer
527  * @reg: dword aligned register offset
528  * @v: 32 bit value to write to the register
529  * @acc_flags: access flags which require special behavior
530  *
531  * Writes the value specified to the offset specified.
532  */
533 void amdgpu_device_wreg(struct amdgpu_device *adev,
534 			uint32_t reg, uint32_t v,
535 			uint32_t acc_flags)
536 {
537 	if (amdgpu_device_skip_hw_access(adev))
538 		return;
539 
540 	if ((reg * 4) < adev->rmmio_size) {
541 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
542 		    amdgpu_sriov_runtime(adev) &&
543 		    down_read_trylock(&adev->reset_domain->sem)) {
544 			amdgpu_kiq_wreg(adev, reg, v);
545 			up_read(&adev->reset_domain->sem);
546 		} else {
547 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
548 		}
549 	} else {
550 		adev->pcie_wreg(adev, reg * 4, v);
551 	}
552 
553 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
554 }
555 
556 /**
557  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
558  *
559  * @adev: amdgpu_device pointer
560  * @reg: mmio/rlc register
561  * @v: value to write
562  *
563  * this function is invoked only for the debugfs register access
564  */
565 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
566 			     uint32_t reg, uint32_t v)
567 {
568 	if (amdgpu_device_skip_hw_access(adev))
569 		return;
570 
571 	if (amdgpu_sriov_fullaccess(adev) &&
572 	    adev->gfx.rlc.funcs &&
573 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
574 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
575 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
576 	} else if ((reg * 4) >= adev->rmmio_size) {
577 		adev->pcie_wreg(adev, reg * 4, v);
578 	} else {
579 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
580 	}
581 }
582 
583 /**
584  * amdgpu_mm_rdoorbell - read a doorbell dword
585  *
586  * @adev: amdgpu_device pointer
587  * @index: doorbell index
588  *
589  * Returns the value in the doorbell aperture at the
590  * requested doorbell index (CIK).
591  */
592 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
593 {
594 	if (amdgpu_device_skip_hw_access(adev))
595 		return 0;
596 
597 	if (index < adev->doorbell.num_doorbells) {
598 		return readl(adev->doorbell.ptr + index);
599 	} else {
600 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
601 		return 0;
602 	}
603 }
604 
605 /**
606  * amdgpu_mm_wdoorbell - write a doorbell dword
607  *
608  * @adev: amdgpu_device pointer
609  * @index: doorbell index
610  * @v: value to write
611  *
612  * Writes @v to the doorbell aperture at the
613  * requested doorbell index (CIK).
614  */
615 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
616 {
617 	if (amdgpu_device_skip_hw_access(adev))
618 		return;
619 
620 	if (index < adev->doorbell.num_doorbells) {
621 		writel(v, adev->doorbell.ptr + index);
622 	} else {
623 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
624 	}
625 }
626 
627 /**
628  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
629  *
630  * @adev: amdgpu_device pointer
631  * @index: doorbell index
632  *
633  * Returns the value in the doorbell aperture at the
634  * requested doorbell index (VEGA10+).
635  */
636 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
637 {
638 	if (amdgpu_device_skip_hw_access(adev))
639 		return 0;
640 
641 	if (index < adev->doorbell.num_doorbells) {
642 		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
643 	} else {
644 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
645 		return 0;
646 	}
647 }
648 
649 /**
650  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
651  *
652  * @adev: amdgpu_device pointer
653  * @index: doorbell index
654  * @v: value to write
655  *
656  * Writes @v to the doorbell aperture at the
657  * requested doorbell index (VEGA10+).
658  */
659 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
660 {
661 	if (amdgpu_device_skip_hw_access(adev))
662 		return;
663 
664 	if (index < adev->doorbell.num_doorbells) {
665 		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
666 	} else {
667 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
668 	}
669 }
670 
671 /**
672  * amdgpu_device_indirect_rreg - read an indirect register
673  *
674  * @adev: amdgpu_device pointer
675  * @pcie_index: mmio register offset
676  * @pcie_data: mmio register offset
677  * @reg_addr: indirect register address to read from
678  *
679  * Returns the value of indirect register @reg_addr
680  */
681 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
682 				u32 pcie_index, u32 pcie_data,
683 				u32 reg_addr)
684 {
685 	unsigned long flags;
686 	u32 r;
687 	void __iomem *pcie_index_offset;
688 	void __iomem *pcie_data_offset;
689 
690 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
691 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
692 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
693 
694 	writel(reg_addr, pcie_index_offset);
695 	readl(pcie_index_offset);
696 	r = readl(pcie_data_offset);
697 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
698 
699 	return r;
700 }
701 
702 /**
703  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
704  *
705  * @adev: amdgpu_device pointer
706  * @pcie_index: mmio register offset
707  * @pcie_data: mmio register offset
708  * @reg_addr: indirect register address to read from
709  *
710  * Returns the value of indirect register @reg_addr
711  */
712 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
713 				  u32 pcie_index, u32 pcie_data,
714 				  u32 reg_addr)
715 {
716 	unsigned long flags;
717 	u64 r;
718 	void __iomem *pcie_index_offset;
719 	void __iomem *pcie_data_offset;
720 
721 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
722 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
723 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
724 
725 	/* read low 32 bits */
726 	writel(reg_addr, pcie_index_offset);
727 	readl(pcie_index_offset);
728 	r = readl(pcie_data_offset);
729 	/* read high 32 bits */
730 	writel(reg_addr + 4, pcie_index_offset);
731 	readl(pcie_index_offset);
732 	r |= ((u64)readl(pcie_data_offset) << 32);
733 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
734 
735 	return r;
736 }
737 
738 /**
739  * amdgpu_device_indirect_wreg - write an indirect register address
740  *
741  * @adev: amdgpu_device pointer
742  * @pcie_index: mmio register offset
743  * @pcie_data: mmio register offset
744  * @reg_addr: indirect register offset
745  * @reg_data: indirect register data
746  *
747  */
748 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
749 				 u32 pcie_index, u32 pcie_data,
750 				 u32 reg_addr, u32 reg_data)
751 {
752 	unsigned long flags;
753 	void __iomem *pcie_index_offset;
754 	void __iomem *pcie_data_offset;
755 
756 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
757 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
758 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
759 
760 	writel(reg_addr, pcie_index_offset);
761 	readl(pcie_index_offset);
762 	writel(reg_data, pcie_data_offset);
763 	readl(pcie_data_offset);
764 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
765 }
766 
767 /**
768  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
769  *
770  * @adev: amdgpu_device pointer
771  * @pcie_index: mmio register offset
772  * @pcie_data: mmio register offset
773  * @reg_addr: indirect register offset
774  * @reg_data: indirect register data
775  *
776  */
777 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
778 				   u32 pcie_index, u32 pcie_data,
779 				   u32 reg_addr, u64 reg_data)
780 {
781 	unsigned long flags;
782 	void __iomem *pcie_index_offset;
783 	void __iomem *pcie_data_offset;
784 
785 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
786 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
787 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
788 
789 	/* write low 32 bits */
790 	writel(reg_addr, pcie_index_offset);
791 	readl(pcie_index_offset);
792 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
793 	readl(pcie_data_offset);
794 	/* write high 32 bits */
795 	writel(reg_addr + 4, pcie_index_offset);
796 	readl(pcie_index_offset);
797 	writel((u32)(reg_data >> 32), pcie_data_offset);
798 	readl(pcie_data_offset);
799 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
800 }
801 
802 /**
803  * amdgpu_invalid_rreg - dummy reg read function
804  *
805  * @adev: amdgpu_device pointer
806  * @reg: offset of register
807  *
808  * Dummy register read function.  Used for register blocks
809  * that certain asics don't have (all asics).
810  * Returns the value in the register.
811  */
812 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
813 {
814 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
815 	BUG();
816 	return 0;
817 }
818 
819 /**
820  * amdgpu_invalid_wreg - dummy reg write function
821  *
822  * @adev: amdgpu_device pointer
823  * @reg: offset of register
824  * @v: value to write to the register
825  *
826  * Dummy register read function.  Used for register blocks
827  * that certain asics don't have (all asics).
828  */
829 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
830 {
831 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
832 		  reg, v);
833 	BUG();
834 }
835 
836 /**
837  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
838  *
839  * @adev: amdgpu_device pointer
840  * @reg: offset of register
841  *
842  * Dummy register read function.  Used for register blocks
843  * that certain asics don't have (all asics).
844  * Returns the value in the register.
845  */
846 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
847 {
848 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
849 	BUG();
850 	return 0;
851 }
852 
853 /**
854  * amdgpu_invalid_wreg64 - dummy reg write function
855  *
856  * @adev: amdgpu_device pointer
857  * @reg: offset of register
858  * @v: value to write to the register
859  *
860  * Dummy register read function.  Used for register blocks
861  * that certain asics don't have (all asics).
862  */
863 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
864 {
865 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
866 		  reg, v);
867 	BUG();
868 }
869 
870 /**
871  * amdgpu_block_invalid_rreg - dummy reg read function
872  *
873  * @adev: amdgpu_device pointer
874  * @block: offset of instance
875  * @reg: offset of register
876  *
877  * Dummy register read function.  Used for register blocks
878  * that certain asics don't have (all asics).
879  * Returns the value in the register.
880  */
881 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
882 					  uint32_t block, uint32_t reg)
883 {
884 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
885 		  reg, block);
886 	BUG();
887 	return 0;
888 }
889 
890 /**
891  * amdgpu_block_invalid_wreg - dummy reg write function
892  *
893  * @adev: amdgpu_device pointer
894  * @block: offset of instance
895  * @reg: offset of register
896  * @v: value to write to the register
897  *
898  * Dummy register read function.  Used for register blocks
899  * that certain asics don't have (all asics).
900  */
901 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
902 				      uint32_t block,
903 				      uint32_t reg, uint32_t v)
904 {
905 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
906 		  reg, block, v);
907 	BUG();
908 }
909 
910 /**
911  * amdgpu_device_asic_init - Wrapper for atom asic_init
912  *
913  * @adev: amdgpu_device pointer
914  *
915  * Does any asic specific work and then calls atom asic init.
916  */
917 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
918 {
919 	amdgpu_asic_pre_asic_init(adev);
920 
921 	if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
922 		return amdgpu_atomfirmware_asic_init(adev, true);
923 	else
924 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
925 }
926 
927 /**
928  * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
929  *
930  * @adev: amdgpu_device pointer
931  *
932  * Allocates a scratch page of VRAM for use by various things in the
933  * driver.
934  */
935 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
936 {
937 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
938 				       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
939 				       &adev->vram_scratch.robj,
940 				       &adev->vram_scratch.gpu_addr,
941 				       (void **)&adev->vram_scratch.ptr);
942 }
943 
944 /**
945  * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
946  *
947  * @adev: amdgpu_device pointer
948  *
949  * Frees the VRAM scratch page.
950  */
951 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
952 {
953 	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
954 }
955 
956 /**
957  * amdgpu_device_program_register_sequence - program an array of registers.
958  *
959  * @adev: amdgpu_device pointer
960  * @registers: pointer to the register array
961  * @array_size: size of the register array
962  *
963  * Programs an array or registers with and and or masks.
964  * This is a helper for setting golden registers.
965  */
966 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
967 					     const u32 *registers,
968 					     const u32 array_size)
969 {
970 	u32 tmp, reg, and_mask, or_mask;
971 	int i;
972 
973 	if (array_size % 3)
974 		return;
975 
976 	for (i = 0; i < array_size; i +=3) {
977 		reg = registers[i + 0];
978 		and_mask = registers[i + 1];
979 		or_mask = registers[i + 2];
980 
981 		if (and_mask == 0xffffffff) {
982 			tmp = or_mask;
983 		} else {
984 			tmp = RREG32(reg);
985 			tmp &= ~and_mask;
986 			if (adev->family >= AMDGPU_FAMILY_AI)
987 				tmp |= (or_mask & and_mask);
988 			else
989 				tmp |= or_mask;
990 		}
991 		WREG32(reg, tmp);
992 	}
993 }
994 
995 /**
996  * amdgpu_device_pci_config_reset - reset the GPU
997  *
998  * @adev: amdgpu_device pointer
999  *
1000  * Resets the GPU using the pci config reset sequence.
1001  * Only applicable to asics prior to vega10.
1002  */
1003 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1004 {
1005 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1006 }
1007 
1008 /**
1009  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1010  *
1011  * @adev: amdgpu_device pointer
1012  *
1013  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1014  */
1015 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1016 {
1017 	return pci_reset_function(adev->pdev);
1018 }
1019 
1020 /*
1021  * GPU doorbell aperture helpers function.
1022  */
1023 /**
1024  * amdgpu_device_doorbell_init - Init doorbell driver information.
1025  *
1026  * @adev: amdgpu_device pointer
1027  *
1028  * Init doorbell driver information (CIK)
1029  * Returns 0 on success, error on failure.
1030  */
1031 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
1032 {
1033 
1034 	/* No doorbell on SI hardware generation */
1035 	if (adev->asic_type < CHIP_BONAIRE) {
1036 		adev->doorbell.base = 0;
1037 		adev->doorbell.size = 0;
1038 		adev->doorbell.num_doorbells = 0;
1039 		adev->doorbell.ptr = NULL;
1040 		return 0;
1041 	}
1042 
1043 	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1044 		return -EINVAL;
1045 
1046 	amdgpu_asic_init_doorbell_index(adev);
1047 
1048 	/* doorbell bar mapping */
1049 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1050 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1051 
1052 	if (adev->enable_mes) {
1053 		adev->doorbell.num_doorbells =
1054 			adev->doorbell.size / sizeof(u32);
1055 	} else {
1056 		adev->doorbell.num_doorbells =
1057 			min_t(u32, adev->doorbell.size / sizeof(u32),
1058 			      adev->doorbell_index.max_assignment+1);
1059 		if (adev->doorbell.num_doorbells == 0)
1060 			return -EINVAL;
1061 
1062 		/* For Vega, reserve and map two pages on doorbell BAR since SDMA
1063 		 * paging queue doorbell use the second page. The
1064 		 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1065 		 * doorbells are in the first page. So with paging queue enabled,
1066 		 * the max num_doorbells should + 1 page (0x400 in dword)
1067 		 */
1068 		if (adev->asic_type >= CHIP_VEGA10)
1069 			adev->doorbell.num_doorbells += 0x400;
1070 	}
1071 
1072 	adev->doorbell.ptr = ioremap(adev->doorbell.base,
1073 				     adev->doorbell.num_doorbells *
1074 				     sizeof(u32));
1075 	if (adev->doorbell.ptr == NULL)
1076 		return -ENOMEM;
1077 
1078 	return 0;
1079 }
1080 
1081 /**
1082  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
1083  *
1084  * @adev: amdgpu_device pointer
1085  *
1086  * Tear down doorbell driver information (CIK)
1087  */
1088 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
1089 {
1090 	iounmap(adev->doorbell.ptr);
1091 	adev->doorbell.ptr = NULL;
1092 }
1093 
1094 
1095 
1096 /*
1097  * amdgpu_device_wb_*()
1098  * Writeback is the method by which the GPU updates special pages in memory
1099  * with the status of certain GPU events (fences, ring pointers,etc.).
1100  */
1101 
1102 /**
1103  * amdgpu_device_wb_fini - Disable Writeback and free memory
1104  *
1105  * @adev: amdgpu_device pointer
1106  *
1107  * Disables Writeback and frees the Writeback memory (all asics).
1108  * Used at driver shutdown.
1109  */
1110 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1111 {
1112 	if (adev->wb.wb_obj) {
1113 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1114 				      &adev->wb.gpu_addr,
1115 				      (void **)&adev->wb.wb);
1116 		adev->wb.wb_obj = NULL;
1117 	}
1118 }
1119 
1120 /**
1121  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1122  *
1123  * @adev: amdgpu_device pointer
1124  *
1125  * Initializes writeback and allocates writeback memory (all asics).
1126  * Used at driver startup.
1127  * Returns 0 on success or an -error on failure.
1128  */
1129 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1130 {
1131 	int r;
1132 
1133 	if (adev->wb.wb_obj == NULL) {
1134 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1135 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1136 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1137 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1138 					    (void **)&adev->wb.wb);
1139 		if (r) {
1140 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1141 			return r;
1142 		}
1143 
1144 		adev->wb.num_wb = AMDGPU_MAX_WB;
1145 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1146 
1147 		/* clear wb memory */
1148 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1149 	}
1150 
1151 	return 0;
1152 }
1153 
1154 /**
1155  * amdgpu_device_wb_get - Allocate a wb entry
1156  *
1157  * @adev: amdgpu_device pointer
1158  * @wb: wb index
1159  *
1160  * Allocate a wb slot for use by the driver (all asics).
1161  * Returns 0 on success or -EINVAL on failure.
1162  */
1163 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1164 {
1165 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1166 
1167 	if (offset < adev->wb.num_wb) {
1168 		__set_bit(offset, adev->wb.used);
1169 		*wb = offset << 3; /* convert to dw offset */
1170 		return 0;
1171 	} else {
1172 		return -EINVAL;
1173 	}
1174 }
1175 
1176 /**
1177  * amdgpu_device_wb_free - Free a wb entry
1178  *
1179  * @adev: amdgpu_device pointer
1180  * @wb: wb index
1181  *
1182  * Free a wb slot allocated for use by the driver (all asics)
1183  */
1184 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1185 {
1186 	wb >>= 3;
1187 	if (wb < adev->wb.num_wb)
1188 		__clear_bit(wb, adev->wb.used);
1189 }
1190 
1191 /**
1192  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1193  *
1194  * @adev: amdgpu_device pointer
1195  *
1196  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1197  * to fail, but if any of the BARs is not accessible after the size we abort
1198  * driver loading by returning -ENODEV.
1199  */
1200 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1201 {
1202 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1203 	struct pci_bus *root;
1204 	struct resource *res;
1205 	unsigned i;
1206 	u16 cmd;
1207 	int r;
1208 
1209 	/* Bypass for VF */
1210 	if (amdgpu_sriov_vf(adev))
1211 		return 0;
1212 
1213 	/* skip if the bios has already enabled large BAR */
1214 	if (adev->gmc.real_vram_size &&
1215 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1216 		return 0;
1217 
1218 	/* Check if the root BUS has 64bit memory resources */
1219 	root = adev->pdev->bus;
1220 	while (root->parent)
1221 		root = root->parent;
1222 
1223 	pci_bus_for_each_resource(root, res, i) {
1224 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1225 		    res->start > 0x100000000ull)
1226 			break;
1227 	}
1228 
1229 	/* Trying to resize is pointless without a root hub window above 4GB */
1230 	if (!res)
1231 		return 0;
1232 
1233 	/* Limit the BAR size to what is available */
1234 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1235 			rbar_size);
1236 
1237 	/* Disable memory decoding while we change the BAR addresses and size */
1238 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1239 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1240 			      cmd & ~PCI_COMMAND_MEMORY);
1241 
1242 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1243 	amdgpu_device_doorbell_fini(adev);
1244 	if (adev->asic_type >= CHIP_BONAIRE)
1245 		pci_release_resource(adev->pdev, 2);
1246 
1247 	pci_release_resource(adev->pdev, 0);
1248 
1249 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1250 	if (r == -ENOSPC)
1251 		DRM_INFO("Not enough PCI address space for a large BAR.");
1252 	else if (r && r != -ENOTSUPP)
1253 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1254 
1255 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1256 
1257 	/* When the doorbell or fb BAR isn't available we have no chance of
1258 	 * using the device.
1259 	 */
1260 	r = amdgpu_device_doorbell_init(adev);
1261 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1262 		return -ENODEV;
1263 
1264 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1265 
1266 	return 0;
1267 }
1268 
1269 /*
1270  * GPU helpers function.
1271  */
1272 /**
1273  * amdgpu_device_need_post - check if the hw need post or not
1274  *
1275  * @adev: amdgpu_device pointer
1276  *
1277  * Check if the asic has been initialized (all asics) at driver startup
1278  * or post is needed if  hw reset is performed.
1279  * Returns true if need or false if not.
1280  */
1281 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1282 {
1283 	uint32_t reg;
1284 
1285 	if (amdgpu_sriov_vf(adev))
1286 		return false;
1287 
1288 	if (amdgpu_passthrough(adev)) {
1289 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1290 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1291 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1292 		 * vpost executed for smc version below 22.15
1293 		 */
1294 		if (adev->asic_type == CHIP_FIJI) {
1295 			int err;
1296 			uint32_t fw_ver;
1297 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1298 			/* force vPost if error occured */
1299 			if (err)
1300 				return true;
1301 
1302 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1303 			if (fw_ver < 0x00160e00)
1304 				return true;
1305 		}
1306 	}
1307 
1308 	/* Don't post if we need to reset whole hive on init */
1309 	if (adev->gmc.xgmi.pending_reset)
1310 		return false;
1311 
1312 	if (adev->has_hw_reset) {
1313 		adev->has_hw_reset = false;
1314 		return true;
1315 	}
1316 
1317 	/* bios scratch used on CIK+ */
1318 	if (adev->asic_type >= CHIP_BONAIRE)
1319 		return amdgpu_atombios_scratch_need_asic_init(adev);
1320 
1321 	/* check MEM_SIZE for older asics */
1322 	reg = amdgpu_asic_get_config_memsize(adev);
1323 
1324 	if ((reg != 0) && (reg != 0xffffffff))
1325 		return false;
1326 
1327 	return true;
1328 }
1329 
1330 /**
1331  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1332  *
1333  * @adev: amdgpu_device pointer
1334  *
1335  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1336  * be set for this device.
1337  *
1338  * Returns true if it should be used or false if not.
1339  */
1340 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1341 {
1342 	switch (amdgpu_aspm) {
1343 	case -1:
1344 		break;
1345 	case 0:
1346 		return false;
1347 	case 1:
1348 		return true;
1349 	default:
1350 		return false;
1351 	}
1352 	return pcie_aspm_enabled(adev->pdev);
1353 }
1354 
1355 /* if we get transitioned to only one device, take VGA back */
1356 /**
1357  * amdgpu_device_vga_set_decode - enable/disable vga decode
1358  *
1359  * @pdev: PCI device pointer
1360  * @state: enable/disable vga decode
1361  *
1362  * Enable/disable vga decode (all asics).
1363  * Returns VGA resource flags.
1364  */
1365 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1366 		bool state)
1367 {
1368 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1369 	amdgpu_asic_set_vga_state(adev, state);
1370 	if (state)
1371 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1372 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1373 	else
1374 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1375 }
1376 
1377 /**
1378  * amdgpu_device_check_block_size - validate the vm block size
1379  *
1380  * @adev: amdgpu_device pointer
1381  *
1382  * Validates the vm block size specified via module parameter.
1383  * The vm block size defines number of bits in page table versus page directory,
1384  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1385  * page table and the remaining bits are in the page directory.
1386  */
1387 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1388 {
1389 	/* defines number of bits in page table versus page directory,
1390 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1391 	 * page table and the remaining bits are in the page directory */
1392 	if (amdgpu_vm_block_size == -1)
1393 		return;
1394 
1395 	if (amdgpu_vm_block_size < 9) {
1396 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1397 			 amdgpu_vm_block_size);
1398 		amdgpu_vm_block_size = -1;
1399 	}
1400 }
1401 
1402 /**
1403  * amdgpu_device_check_vm_size - validate the vm size
1404  *
1405  * @adev: amdgpu_device pointer
1406  *
1407  * Validates the vm size in GB specified via module parameter.
1408  * The VM size is the size of the GPU virtual memory space in GB.
1409  */
1410 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1411 {
1412 	/* no need to check the default value */
1413 	if (amdgpu_vm_size == -1)
1414 		return;
1415 
1416 	if (amdgpu_vm_size < 1) {
1417 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1418 			 amdgpu_vm_size);
1419 		amdgpu_vm_size = -1;
1420 	}
1421 }
1422 
1423 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1424 {
1425 	struct sysinfo si;
1426 	bool is_os_64 = (sizeof(void *) == 8);
1427 	uint64_t total_memory;
1428 	uint64_t dram_size_seven_GB = 0x1B8000000;
1429 	uint64_t dram_size_three_GB = 0xB8000000;
1430 
1431 	if (amdgpu_smu_memory_pool_size == 0)
1432 		return;
1433 
1434 	if (!is_os_64) {
1435 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1436 		goto def_value;
1437 	}
1438 	si_meminfo(&si);
1439 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1440 
1441 	if ((amdgpu_smu_memory_pool_size == 1) ||
1442 		(amdgpu_smu_memory_pool_size == 2)) {
1443 		if (total_memory < dram_size_three_GB)
1444 			goto def_value1;
1445 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1446 		(amdgpu_smu_memory_pool_size == 8)) {
1447 		if (total_memory < dram_size_seven_GB)
1448 			goto def_value1;
1449 	} else {
1450 		DRM_WARN("Smu memory pool size not supported\n");
1451 		goto def_value;
1452 	}
1453 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1454 
1455 	return;
1456 
1457 def_value1:
1458 	DRM_WARN("No enough system memory\n");
1459 def_value:
1460 	adev->pm.smu_prv_buffer_size = 0;
1461 }
1462 
1463 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1464 {
1465 	if (!(adev->flags & AMD_IS_APU) ||
1466 	    adev->asic_type < CHIP_RAVEN)
1467 		return 0;
1468 
1469 	switch (adev->asic_type) {
1470 	case CHIP_RAVEN:
1471 		if (adev->pdev->device == 0x15dd)
1472 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1473 		if (adev->pdev->device == 0x15d8)
1474 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1475 		break;
1476 	case CHIP_RENOIR:
1477 		if ((adev->pdev->device == 0x1636) ||
1478 		    (adev->pdev->device == 0x164c))
1479 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1480 		else
1481 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1482 		break;
1483 	case CHIP_VANGOGH:
1484 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1485 		break;
1486 	case CHIP_YELLOW_CARP:
1487 		break;
1488 	case CHIP_CYAN_SKILLFISH:
1489 		if ((adev->pdev->device == 0x13FE) ||
1490 		    (adev->pdev->device == 0x143F))
1491 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1492 		break;
1493 	default:
1494 		break;
1495 	}
1496 
1497 	return 0;
1498 }
1499 
1500 /**
1501  * amdgpu_device_check_arguments - validate module params
1502  *
1503  * @adev: amdgpu_device pointer
1504  *
1505  * Validates certain module parameters and updates
1506  * the associated values used by the driver (all asics).
1507  */
1508 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1509 {
1510 	if (amdgpu_sched_jobs < 4) {
1511 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1512 			 amdgpu_sched_jobs);
1513 		amdgpu_sched_jobs = 4;
1514 	} else if (!is_power_of_2(amdgpu_sched_jobs)){
1515 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1516 			 amdgpu_sched_jobs);
1517 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1518 	}
1519 
1520 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1521 		/* gart size must be greater or equal to 32M */
1522 		dev_warn(adev->dev, "gart size (%d) too small\n",
1523 			 amdgpu_gart_size);
1524 		amdgpu_gart_size = -1;
1525 	}
1526 
1527 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1528 		/* gtt size must be greater or equal to 32M */
1529 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1530 				 amdgpu_gtt_size);
1531 		amdgpu_gtt_size = -1;
1532 	}
1533 
1534 	/* valid range is between 4 and 9 inclusive */
1535 	if (amdgpu_vm_fragment_size != -1 &&
1536 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1537 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1538 		amdgpu_vm_fragment_size = -1;
1539 	}
1540 
1541 	if (amdgpu_sched_hw_submission < 2) {
1542 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1543 			 amdgpu_sched_hw_submission);
1544 		amdgpu_sched_hw_submission = 2;
1545 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1546 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1547 			 amdgpu_sched_hw_submission);
1548 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1549 	}
1550 
1551 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1552 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1553 		amdgpu_reset_method = -1;
1554 	}
1555 
1556 	amdgpu_device_check_smu_prv_buffer_size(adev);
1557 
1558 	amdgpu_device_check_vm_size(adev);
1559 
1560 	amdgpu_device_check_block_size(adev);
1561 
1562 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1563 
1564 	return 0;
1565 }
1566 
1567 /**
1568  * amdgpu_switcheroo_set_state - set switcheroo state
1569  *
1570  * @pdev: pci dev pointer
1571  * @state: vga_switcheroo state
1572  *
1573  * Callback for the switcheroo driver.  Suspends or resumes
1574  * the asics before or after it is powered up using ACPI methods.
1575  */
1576 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1577 					enum vga_switcheroo_state state)
1578 {
1579 	struct drm_device *dev = pci_get_drvdata(pdev);
1580 	int r;
1581 
1582 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1583 		return;
1584 
1585 	if (state == VGA_SWITCHEROO_ON) {
1586 		pr_info("switched on\n");
1587 		/* don't suspend or resume card normally */
1588 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1589 
1590 		pci_set_power_state(pdev, PCI_D0);
1591 		amdgpu_device_load_pci_state(pdev);
1592 		r = pci_enable_device(pdev);
1593 		if (r)
1594 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1595 		amdgpu_device_resume(dev, true);
1596 
1597 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1598 	} else {
1599 		pr_info("switched off\n");
1600 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1601 		amdgpu_device_suspend(dev, true);
1602 		amdgpu_device_cache_pci_state(pdev);
1603 		/* Shut down the device */
1604 		pci_disable_device(pdev);
1605 		pci_set_power_state(pdev, PCI_D3cold);
1606 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1607 	}
1608 }
1609 
1610 /**
1611  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1612  *
1613  * @pdev: pci dev pointer
1614  *
1615  * Callback for the switcheroo driver.  Check of the switcheroo
1616  * state can be changed.
1617  * Returns true if the state can be changed, false if not.
1618  */
1619 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1620 {
1621 	struct drm_device *dev = pci_get_drvdata(pdev);
1622 
1623 	/*
1624 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1625 	* locking inversion with the driver load path. And the access here is
1626 	* completely racy anyway. So don't bother with locking for now.
1627 	*/
1628 	return atomic_read(&dev->open_count) == 0;
1629 }
1630 
1631 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1632 	.set_gpu_state = amdgpu_switcheroo_set_state,
1633 	.reprobe = NULL,
1634 	.can_switch = amdgpu_switcheroo_can_switch,
1635 };
1636 
1637 /**
1638  * amdgpu_device_ip_set_clockgating_state - set the CG state
1639  *
1640  * @dev: amdgpu_device pointer
1641  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1642  * @state: clockgating state (gate or ungate)
1643  *
1644  * Sets the requested clockgating state for all instances of
1645  * the hardware IP specified.
1646  * Returns the error code from the last instance.
1647  */
1648 int amdgpu_device_ip_set_clockgating_state(void *dev,
1649 					   enum amd_ip_block_type block_type,
1650 					   enum amd_clockgating_state state)
1651 {
1652 	struct amdgpu_device *adev = dev;
1653 	int i, r = 0;
1654 
1655 	for (i = 0; i < adev->num_ip_blocks; i++) {
1656 		if (!adev->ip_blocks[i].status.valid)
1657 			continue;
1658 		if (adev->ip_blocks[i].version->type != block_type)
1659 			continue;
1660 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1661 			continue;
1662 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1663 			(void *)adev, state);
1664 		if (r)
1665 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1666 				  adev->ip_blocks[i].version->funcs->name, r);
1667 	}
1668 	return r;
1669 }
1670 
1671 /**
1672  * amdgpu_device_ip_set_powergating_state - set the PG state
1673  *
1674  * @dev: amdgpu_device pointer
1675  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1676  * @state: powergating state (gate or ungate)
1677  *
1678  * Sets the requested powergating state for all instances of
1679  * the hardware IP specified.
1680  * Returns the error code from the last instance.
1681  */
1682 int amdgpu_device_ip_set_powergating_state(void *dev,
1683 					   enum amd_ip_block_type block_type,
1684 					   enum amd_powergating_state state)
1685 {
1686 	struct amdgpu_device *adev = dev;
1687 	int i, r = 0;
1688 
1689 	for (i = 0; i < adev->num_ip_blocks; i++) {
1690 		if (!adev->ip_blocks[i].status.valid)
1691 			continue;
1692 		if (adev->ip_blocks[i].version->type != block_type)
1693 			continue;
1694 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1695 			continue;
1696 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1697 			(void *)adev, state);
1698 		if (r)
1699 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1700 				  adev->ip_blocks[i].version->funcs->name, r);
1701 	}
1702 	return r;
1703 }
1704 
1705 /**
1706  * amdgpu_device_ip_get_clockgating_state - get the CG state
1707  *
1708  * @adev: amdgpu_device pointer
1709  * @flags: clockgating feature flags
1710  *
1711  * Walks the list of IPs on the device and updates the clockgating
1712  * flags for each IP.
1713  * Updates @flags with the feature flags for each hardware IP where
1714  * clockgating is enabled.
1715  */
1716 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1717 					    u64 *flags)
1718 {
1719 	int i;
1720 
1721 	for (i = 0; i < adev->num_ip_blocks; i++) {
1722 		if (!adev->ip_blocks[i].status.valid)
1723 			continue;
1724 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1725 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1726 	}
1727 }
1728 
1729 /**
1730  * amdgpu_device_ip_wait_for_idle - wait for idle
1731  *
1732  * @adev: amdgpu_device pointer
1733  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1734  *
1735  * Waits for the request hardware IP to be idle.
1736  * Returns 0 for success or a negative error code on failure.
1737  */
1738 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1739 				   enum amd_ip_block_type block_type)
1740 {
1741 	int i, r;
1742 
1743 	for (i = 0; i < adev->num_ip_blocks; i++) {
1744 		if (!adev->ip_blocks[i].status.valid)
1745 			continue;
1746 		if (adev->ip_blocks[i].version->type == block_type) {
1747 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1748 			if (r)
1749 				return r;
1750 			break;
1751 		}
1752 	}
1753 	return 0;
1754 
1755 }
1756 
1757 /**
1758  * amdgpu_device_ip_is_idle - is the hardware IP idle
1759  *
1760  * @adev: amdgpu_device pointer
1761  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1762  *
1763  * Check if the hardware IP is idle or not.
1764  * Returns true if it the IP is idle, false if not.
1765  */
1766 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1767 			      enum amd_ip_block_type block_type)
1768 {
1769 	int i;
1770 
1771 	for (i = 0; i < adev->num_ip_blocks; i++) {
1772 		if (!adev->ip_blocks[i].status.valid)
1773 			continue;
1774 		if (adev->ip_blocks[i].version->type == block_type)
1775 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1776 	}
1777 	return true;
1778 
1779 }
1780 
1781 /**
1782  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1783  *
1784  * @adev: amdgpu_device pointer
1785  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1786  *
1787  * Returns a pointer to the hardware IP block structure
1788  * if it exists for the asic, otherwise NULL.
1789  */
1790 struct amdgpu_ip_block *
1791 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1792 			      enum amd_ip_block_type type)
1793 {
1794 	int i;
1795 
1796 	for (i = 0; i < adev->num_ip_blocks; i++)
1797 		if (adev->ip_blocks[i].version->type == type)
1798 			return &adev->ip_blocks[i];
1799 
1800 	return NULL;
1801 }
1802 
1803 /**
1804  * amdgpu_device_ip_block_version_cmp
1805  *
1806  * @adev: amdgpu_device pointer
1807  * @type: enum amd_ip_block_type
1808  * @major: major version
1809  * @minor: minor version
1810  *
1811  * return 0 if equal or greater
1812  * return 1 if smaller or the ip_block doesn't exist
1813  */
1814 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1815 				       enum amd_ip_block_type type,
1816 				       u32 major, u32 minor)
1817 {
1818 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1819 
1820 	if (ip_block && ((ip_block->version->major > major) ||
1821 			((ip_block->version->major == major) &&
1822 			(ip_block->version->minor >= minor))))
1823 		return 0;
1824 
1825 	return 1;
1826 }
1827 
1828 /**
1829  * amdgpu_device_ip_block_add
1830  *
1831  * @adev: amdgpu_device pointer
1832  * @ip_block_version: pointer to the IP to add
1833  *
1834  * Adds the IP block driver information to the collection of IPs
1835  * on the asic.
1836  */
1837 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1838 			       const struct amdgpu_ip_block_version *ip_block_version)
1839 {
1840 	if (!ip_block_version)
1841 		return -EINVAL;
1842 
1843 	switch (ip_block_version->type) {
1844 	case AMD_IP_BLOCK_TYPE_VCN:
1845 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1846 			return 0;
1847 		break;
1848 	case AMD_IP_BLOCK_TYPE_JPEG:
1849 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1850 			return 0;
1851 		break;
1852 	default:
1853 		break;
1854 	}
1855 
1856 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1857 		  ip_block_version->funcs->name);
1858 
1859 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1860 
1861 	return 0;
1862 }
1863 
1864 /**
1865  * amdgpu_device_enable_virtual_display - enable virtual display feature
1866  *
1867  * @adev: amdgpu_device pointer
1868  *
1869  * Enabled the virtual display feature if the user has enabled it via
1870  * the module parameter virtual_display.  This feature provides a virtual
1871  * display hardware on headless boards or in virtualized environments.
1872  * This function parses and validates the configuration string specified by
1873  * the user and configues the virtual display configuration (number of
1874  * virtual connectors, crtcs, etc.) specified.
1875  */
1876 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1877 {
1878 	adev->enable_virtual_display = false;
1879 
1880 	if (amdgpu_virtual_display) {
1881 		const char *pci_address_name = pci_name(adev->pdev);
1882 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1883 
1884 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1885 		pciaddstr_tmp = pciaddstr;
1886 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1887 			pciaddname = strsep(&pciaddname_tmp, ",");
1888 			if (!strcmp("all", pciaddname)
1889 			    || !strcmp(pci_address_name, pciaddname)) {
1890 				long num_crtc;
1891 				int res = -1;
1892 
1893 				adev->enable_virtual_display = true;
1894 
1895 				if (pciaddname_tmp)
1896 					res = kstrtol(pciaddname_tmp, 10,
1897 						      &num_crtc);
1898 
1899 				if (!res) {
1900 					if (num_crtc < 1)
1901 						num_crtc = 1;
1902 					if (num_crtc > 6)
1903 						num_crtc = 6;
1904 					adev->mode_info.num_crtc = num_crtc;
1905 				} else {
1906 					adev->mode_info.num_crtc = 1;
1907 				}
1908 				break;
1909 			}
1910 		}
1911 
1912 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1913 			 amdgpu_virtual_display, pci_address_name,
1914 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1915 
1916 		kfree(pciaddstr);
1917 	}
1918 }
1919 
1920 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1921 {
1922 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1923 		adev->mode_info.num_crtc = 1;
1924 		adev->enable_virtual_display = true;
1925 		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1926 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1927 	}
1928 }
1929 
1930 /**
1931  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1932  *
1933  * @adev: amdgpu_device pointer
1934  *
1935  * Parses the asic configuration parameters specified in the gpu info
1936  * firmware and makes them availale to the driver for use in configuring
1937  * the asic.
1938  * Returns 0 on success, -EINVAL on failure.
1939  */
1940 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1941 {
1942 	const char *chip_name;
1943 	char fw_name[40];
1944 	int err;
1945 	const struct gpu_info_firmware_header_v1_0 *hdr;
1946 
1947 	adev->firmware.gpu_info_fw = NULL;
1948 
1949 	if (adev->mman.discovery_bin) {
1950 		/*
1951 		 * FIXME: The bounding box is still needed by Navi12, so
1952 		 * temporarily read it from gpu_info firmware. Should be dropped
1953 		 * when DAL no longer needs it.
1954 		 */
1955 		if (adev->asic_type != CHIP_NAVI12)
1956 			return 0;
1957 	}
1958 
1959 	switch (adev->asic_type) {
1960 	default:
1961 		return 0;
1962 	case CHIP_VEGA10:
1963 		chip_name = "vega10";
1964 		break;
1965 	case CHIP_VEGA12:
1966 		chip_name = "vega12";
1967 		break;
1968 	case CHIP_RAVEN:
1969 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1970 			chip_name = "raven2";
1971 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1972 			chip_name = "picasso";
1973 		else
1974 			chip_name = "raven";
1975 		break;
1976 	case CHIP_ARCTURUS:
1977 		chip_name = "arcturus";
1978 		break;
1979 	case CHIP_NAVI12:
1980 		chip_name = "navi12";
1981 		break;
1982 	}
1983 
1984 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1985 	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1986 	if (err) {
1987 		dev_err(adev->dev,
1988 			"Failed to load gpu_info firmware \"%s\"\n",
1989 			fw_name);
1990 		goto out;
1991 	}
1992 	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1993 	if (err) {
1994 		dev_err(adev->dev,
1995 			"Failed to validate gpu_info firmware \"%s\"\n",
1996 			fw_name);
1997 		goto out;
1998 	}
1999 
2000 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2001 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2002 
2003 	switch (hdr->version_major) {
2004 	case 1:
2005 	{
2006 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2007 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2008 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2009 
2010 		/*
2011 		 * Should be droped when DAL no longer needs it.
2012 		 */
2013 		if (adev->asic_type == CHIP_NAVI12)
2014 			goto parse_soc_bounding_box;
2015 
2016 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2017 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2018 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2019 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2020 		adev->gfx.config.max_texture_channel_caches =
2021 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2022 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2023 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2024 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2025 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2026 		adev->gfx.config.double_offchip_lds_buf =
2027 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2028 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2029 		adev->gfx.cu_info.max_waves_per_simd =
2030 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2031 		adev->gfx.cu_info.max_scratch_slots_per_cu =
2032 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2033 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2034 		if (hdr->version_minor >= 1) {
2035 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2036 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2037 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2038 			adev->gfx.config.num_sc_per_sh =
2039 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2040 			adev->gfx.config.num_packer_per_sc =
2041 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2042 		}
2043 
2044 parse_soc_bounding_box:
2045 		/*
2046 		 * soc bounding box info is not integrated in disocovery table,
2047 		 * we always need to parse it from gpu info firmware if needed.
2048 		 */
2049 		if (hdr->version_minor == 2) {
2050 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2051 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2052 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2053 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2054 		}
2055 		break;
2056 	}
2057 	default:
2058 		dev_err(adev->dev,
2059 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2060 		err = -EINVAL;
2061 		goto out;
2062 	}
2063 out:
2064 	return err;
2065 }
2066 
2067 /**
2068  * amdgpu_device_ip_early_init - run early init for hardware IPs
2069  *
2070  * @adev: amdgpu_device pointer
2071  *
2072  * Early initialization pass for hardware IPs.  The hardware IPs that make
2073  * up each asic are discovered each IP's early_init callback is run.  This
2074  * is the first stage in initializing the asic.
2075  * Returns 0 on success, negative error code on failure.
2076  */
2077 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2078 {
2079 	struct drm_device *dev = adev_to_drm(adev);
2080 	struct pci_dev *parent;
2081 	int i, r;
2082 
2083 	amdgpu_device_enable_virtual_display(adev);
2084 
2085 	if (amdgpu_sriov_vf(adev)) {
2086 		r = amdgpu_virt_request_full_gpu(adev, true);
2087 		if (r)
2088 			return r;
2089 	}
2090 
2091 	switch (adev->asic_type) {
2092 #ifdef CONFIG_DRM_AMDGPU_SI
2093 	case CHIP_VERDE:
2094 	case CHIP_TAHITI:
2095 	case CHIP_PITCAIRN:
2096 	case CHIP_OLAND:
2097 	case CHIP_HAINAN:
2098 		adev->family = AMDGPU_FAMILY_SI;
2099 		r = si_set_ip_blocks(adev);
2100 		if (r)
2101 			return r;
2102 		break;
2103 #endif
2104 #ifdef CONFIG_DRM_AMDGPU_CIK
2105 	case CHIP_BONAIRE:
2106 	case CHIP_HAWAII:
2107 	case CHIP_KAVERI:
2108 	case CHIP_KABINI:
2109 	case CHIP_MULLINS:
2110 		if (adev->flags & AMD_IS_APU)
2111 			adev->family = AMDGPU_FAMILY_KV;
2112 		else
2113 			adev->family = AMDGPU_FAMILY_CI;
2114 
2115 		r = cik_set_ip_blocks(adev);
2116 		if (r)
2117 			return r;
2118 		break;
2119 #endif
2120 	case CHIP_TOPAZ:
2121 	case CHIP_TONGA:
2122 	case CHIP_FIJI:
2123 	case CHIP_POLARIS10:
2124 	case CHIP_POLARIS11:
2125 	case CHIP_POLARIS12:
2126 	case CHIP_VEGAM:
2127 	case CHIP_CARRIZO:
2128 	case CHIP_STONEY:
2129 		if (adev->flags & AMD_IS_APU)
2130 			adev->family = AMDGPU_FAMILY_CZ;
2131 		else
2132 			adev->family = AMDGPU_FAMILY_VI;
2133 
2134 		r = vi_set_ip_blocks(adev);
2135 		if (r)
2136 			return r;
2137 		break;
2138 	default:
2139 		r = amdgpu_discovery_set_ip_blocks(adev);
2140 		if (r)
2141 			return r;
2142 		break;
2143 	}
2144 
2145 	if (amdgpu_has_atpx() &&
2146 	    (amdgpu_is_atpx_hybrid() ||
2147 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2148 	    ((adev->flags & AMD_IS_APU) == 0) &&
2149 	    !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2150 		adev->flags |= AMD_IS_PX;
2151 
2152 	if (!(adev->flags & AMD_IS_APU)) {
2153 		parent = pci_upstream_bridge(adev->pdev);
2154 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2155 	}
2156 
2157 	amdgpu_amdkfd_device_probe(adev);
2158 
2159 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2160 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2161 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2162 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2163 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2164 
2165 	for (i = 0; i < adev->num_ip_blocks; i++) {
2166 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2167 			DRM_ERROR("disabled ip block: %d <%s>\n",
2168 				  i, adev->ip_blocks[i].version->funcs->name);
2169 			adev->ip_blocks[i].status.valid = false;
2170 		} else {
2171 			if (adev->ip_blocks[i].version->funcs->early_init) {
2172 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2173 				if (r == -ENOENT) {
2174 					adev->ip_blocks[i].status.valid = false;
2175 				} else if (r) {
2176 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2177 						  adev->ip_blocks[i].version->funcs->name, r);
2178 					return r;
2179 				} else {
2180 					adev->ip_blocks[i].status.valid = true;
2181 				}
2182 			} else {
2183 				adev->ip_blocks[i].status.valid = true;
2184 			}
2185 		}
2186 		/* get the vbios after the asic_funcs are set up */
2187 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2188 			r = amdgpu_device_parse_gpu_info_fw(adev);
2189 			if (r)
2190 				return r;
2191 
2192 			/* Read BIOS */
2193 			if (!amdgpu_get_bios(adev))
2194 				return -EINVAL;
2195 
2196 			r = amdgpu_atombios_init(adev);
2197 			if (r) {
2198 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2199 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2200 				return r;
2201 			}
2202 
2203 			/*get pf2vf msg info at it's earliest time*/
2204 			if (amdgpu_sriov_vf(adev))
2205 				amdgpu_virt_init_data_exchange(adev);
2206 
2207 		}
2208 	}
2209 
2210 	adev->cg_flags &= amdgpu_cg_mask;
2211 	adev->pg_flags &= amdgpu_pg_mask;
2212 
2213 	return 0;
2214 }
2215 
2216 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2217 {
2218 	int i, r;
2219 
2220 	for (i = 0; i < adev->num_ip_blocks; i++) {
2221 		if (!adev->ip_blocks[i].status.sw)
2222 			continue;
2223 		if (adev->ip_blocks[i].status.hw)
2224 			continue;
2225 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2226 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2227 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2228 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2229 			if (r) {
2230 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2231 					  adev->ip_blocks[i].version->funcs->name, r);
2232 				return r;
2233 			}
2234 			adev->ip_blocks[i].status.hw = true;
2235 		}
2236 	}
2237 
2238 	return 0;
2239 }
2240 
2241 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2242 {
2243 	int i, r;
2244 
2245 	for (i = 0; i < adev->num_ip_blocks; i++) {
2246 		if (!adev->ip_blocks[i].status.sw)
2247 			continue;
2248 		if (adev->ip_blocks[i].status.hw)
2249 			continue;
2250 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2251 		if (r) {
2252 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2253 				  adev->ip_blocks[i].version->funcs->name, r);
2254 			return r;
2255 		}
2256 		adev->ip_blocks[i].status.hw = true;
2257 	}
2258 
2259 	return 0;
2260 }
2261 
2262 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2263 {
2264 	int r = 0;
2265 	int i;
2266 	uint32_t smu_version;
2267 
2268 	if (adev->asic_type >= CHIP_VEGA10) {
2269 		for (i = 0; i < adev->num_ip_blocks; i++) {
2270 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2271 				continue;
2272 
2273 			if (!adev->ip_blocks[i].status.sw)
2274 				continue;
2275 
2276 			/* no need to do the fw loading again if already done*/
2277 			if (adev->ip_blocks[i].status.hw == true)
2278 				break;
2279 
2280 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2281 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2282 				if (r) {
2283 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2284 							  adev->ip_blocks[i].version->funcs->name, r);
2285 					return r;
2286 				}
2287 			} else {
2288 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2289 				if (r) {
2290 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2291 							  adev->ip_blocks[i].version->funcs->name, r);
2292 					return r;
2293 				}
2294 			}
2295 
2296 			adev->ip_blocks[i].status.hw = true;
2297 			break;
2298 		}
2299 	}
2300 
2301 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2302 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2303 
2304 	return r;
2305 }
2306 
2307 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2308 {
2309 	long timeout;
2310 	int r, i;
2311 
2312 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2313 		struct amdgpu_ring *ring = adev->rings[i];
2314 
2315 		/* No need to setup the GPU scheduler for rings that don't need it */
2316 		if (!ring || ring->no_scheduler)
2317 			continue;
2318 
2319 		switch (ring->funcs->type) {
2320 		case AMDGPU_RING_TYPE_GFX:
2321 			timeout = adev->gfx_timeout;
2322 			break;
2323 		case AMDGPU_RING_TYPE_COMPUTE:
2324 			timeout = adev->compute_timeout;
2325 			break;
2326 		case AMDGPU_RING_TYPE_SDMA:
2327 			timeout = adev->sdma_timeout;
2328 			break;
2329 		default:
2330 			timeout = adev->video_timeout;
2331 			break;
2332 		}
2333 
2334 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2335 				   ring->num_hw_submission, amdgpu_job_hang_limit,
2336 				   timeout, adev->reset_domain->wq,
2337 				   ring->sched_score, ring->name,
2338 				   adev->dev);
2339 		if (r) {
2340 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2341 				  ring->name);
2342 			return r;
2343 		}
2344 	}
2345 
2346 	return 0;
2347 }
2348 
2349 
2350 /**
2351  * amdgpu_device_ip_init - run init for hardware IPs
2352  *
2353  * @adev: amdgpu_device pointer
2354  *
2355  * Main initialization pass for hardware IPs.  The list of all the hardware
2356  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2357  * are run.  sw_init initializes the software state associated with each IP
2358  * and hw_init initializes the hardware associated with each IP.
2359  * Returns 0 on success, negative error code on failure.
2360  */
2361 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2362 {
2363 	int i, r;
2364 
2365 	r = amdgpu_ras_init(adev);
2366 	if (r)
2367 		return r;
2368 
2369 	for (i = 0; i < adev->num_ip_blocks; i++) {
2370 		if (!adev->ip_blocks[i].status.valid)
2371 			continue;
2372 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2373 		if (r) {
2374 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2375 				  adev->ip_blocks[i].version->funcs->name, r);
2376 			goto init_failed;
2377 		}
2378 		adev->ip_blocks[i].status.sw = true;
2379 
2380 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2381 			/* need to do common hw init early so everything is set up for gmc */
2382 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2383 			if (r) {
2384 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2385 				goto init_failed;
2386 			}
2387 			adev->ip_blocks[i].status.hw = true;
2388 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2389 			/* need to do gmc hw init early so we can allocate gpu mem */
2390 			/* Try to reserve bad pages early */
2391 			if (amdgpu_sriov_vf(adev))
2392 				amdgpu_virt_exchange_data(adev);
2393 
2394 			r = amdgpu_device_vram_scratch_init(adev);
2395 			if (r) {
2396 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
2397 				goto init_failed;
2398 			}
2399 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2400 			if (r) {
2401 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2402 				goto init_failed;
2403 			}
2404 			r = amdgpu_device_wb_init(adev);
2405 			if (r) {
2406 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2407 				goto init_failed;
2408 			}
2409 			adev->ip_blocks[i].status.hw = true;
2410 
2411 			/* right after GMC hw init, we create CSA */
2412 			if (amdgpu_mcbp) {
2413 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2414 								AMDGPU_GEM_DOMAIN_VRAM,
2415 								AMDGPU_CSA_SIZE);
2416 				if (r) {
2417 					DRM_ERROR("allocate CSA failed %d\n", r);
2418 					goto init_failed;
2419 				}
2420 			}
2421 		}
2422 	}
2423 
2424 	if (amdgpu_sriov_vf(adev))
2425 		amdgpu_virt_init_data_exchange(adev);
2426 
2427 	r = amdgpu_ib_pool_init(adev);
2428 	if (r) {
2429 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2430 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2431 		goto init_failed;
2432 	}
2433 
2434 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2435 	if (r)
2436 		goto init_failed;
2437 
2438 	r = amdgpu_device_ip_hw_init_phase1(adev);
2439 	if (r)
2440 		goto init_failed;
2441 
2442 	r = amdgpu_device_fw_loading(adev);
2443 	if (r)
2444 		goto init_failed;
2445 
2446 	r = amdgpu_device_ip_hw_init_phase2(adev);
2447 	if (r)
2448 		goto init_failed;
2449 
2450 	/*
2451 	 * retired pages will be loaded from eeprom and reserved here,
2452 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2453 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2454 	 * for I2C communication which only true at this point.
2455 	 *
2456 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2457 	 * failure from bad gpu situation and stop amdgpu init process
2458 	 * accordingly. For other failed cases, it will still release all
2459 	 * the resource and print error message, rather than returning one
2460 	 * negative value to upper level.
2461 	 *
2462 	 * Note: theoretically, this should be called before all vram allocations
2463 	 * to protect retired page from abusing
2464 	 */
2465 	r = amdgpu_ras_recovery_init(adev);
2466 	if (r)
2467 		goto init_failed;
2468 
2469 	/**
2470 	 * In case of XGMI grab extra reference for reset domain for this device
2471 	 */
2472 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2473 		if (amdgpu_xgmi_add_device(adev) == 0) {
2474 			if (!amdgpu_sriov_vf(adev)) {
2475 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2476 
2477 				if (!hive->reset_domain ||
2478 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2479 					r = -ENOENT;
2480 					amdgpu_put_xgmi_hive(hive);
2481 					goto init_failed;
2482 				}
2483 
2484 				/* Drop the early temporary reset domain we created for device */
2485 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2486 				adev->reset_domain = hive->reset_domain;
2487 				amdgpu_put_xgmi_hive(hive);
2488 			}
2489 		}
2490 	}
2491 
2492 	r = amdgpu_device_init_schedulers(adev);
2493 	if (r)
2494 		goto init_failed;
2495 
2496 	/* Don't init kfd if whole hive need to be reset during init */
2497 	if (!adev->gmc.xgmi.pending_reset)
2498 		amdgpu_amdkfd_device_init(adev);
2499 
2500 	amdgpu_fru_get_product_info(adev);
2501 
2502 init_failed:
2503 	if (amdgpu_sriov_vf(adev))
2504 		amdgpu_virt_release_full_gpu(adev, true);
2505 
2506 	return r;
2507 }
2508 
2509 /**
2510  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2511  *
2512  * @adev: amdgpu_device pointer
2513  *
2514  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2515  * this function before a GPU reset.  If the value is retained after a
2516  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2517  */
2518 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2519 {
2520 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2521 }
2522 
2523 /**
2524  * amdgpu_device_check_vram_lost - check if vram is valid
2525  *
2526  * @adev: amdgpu_device pointer
2527  *
2528  * Checks the reset magic value written to the gart pointer in VRAM.
2529  * The driver calls this after a GPU reset to see if the contents of
2530  * VRAM is lost or now.
2531  * returns true if vram is lost, false if not.
2532  */
2533 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2534 {
2535 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2536 			AMDGPU_RESET_MAGIC_NUM))
2537 		return true;
2538 
2539 	if (!amdgpu_in_reset(adev))
2540 		return false;
2541 
2542 	/*
2543 	 * For all ASICs with baco/mode1 reset, the VRAM is
2544 	 * always assumed to be lost.
2545 	 */
2546 	switch (amdgpu_asic_reset_method(adev)) {
2547 	case AMD_RESET_METHOD_BACO:
2548 	case AMD_RESET_METHOD_MODE1:
2549 		return true;
2550 	default:
2551 		return false;
2552 	}
2553 }
2554 
2555 /**
2556  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2557  *
2558  * @adev: amdgpu_device pointer
2559  * @state: clockgating state (gate or ungate)
2560  *
2561  * The list of all the hardware IPs that make up the asic is walked and the
2562  * set_clockgating_state callbacks are run.
2563  * Late initialization pass enabling clockgating for hardware IPs.
2564  * Fini or suspend, pass disabling clockgating for hardware IPs.
2565  * Returns 0 on success, negative error code on failure.
2566  */
2567 
2568 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2569 			       enum amd_clockgating_state state)
2570 {
2571 	int i, j, r;
2572 
2573 	if (amdgpu_emu_mode == 1)
2574 		return 0;
2575 
2576 	for (j = 0; j < adev->num_ip_blocks; j++) {
2577 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2578 		if (!adev->ip_blocks[i].status.late_initialized)
2579 			continue;
2580 		/* skip CG for GFX on S0ix */
2581 		if (adev->in_s0ix &&
2582 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2583 			continue;
2584 		/* skip CG for VCE/UVD, it's handled specially */
2585 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2586 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2587 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2588 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2589 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2590 			/* enable clockgating to save power */
2591 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2592 										     state);
2593 			if (r) {
2594 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2595 					  adev->ip_blocks[i].version->funcs->name, r);
2596 				return r;
2597 			}
2598 		}
2599 	}
2600 
2601 	return 0;
2602 }
2603 
2604 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2605 			       enum amd_powergating_state state)
2606 {
2607 	int i, j, r;
2608 
2609 	if (amdgpu_emu_mode == 1)
2610 		return 0;
2611 
2612 	for (j = 0; j < adev->num_ip_blocks; j++) {
2613 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2614 		if (!adev->ip_blocks[i].status.late_initialized)
2615 			continue;
2616 		/* skip PG for GFX on S0ix */
2617 		if (adev->in_s0ix &&
2618 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2619 			continue;
2620 		/* skip CG for VCE/UVD, it's handled specially */
2621 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2622 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2623 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2624 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2625 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2626 			/* enable powergating to save power */
2627 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2628 											state);
2629 			if (r) {
2630 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2631 					  adev->ip_blocks[i].version->funcs->name, r);
2632 				return r;
2633 			}
2634 		}
2635 	}
2636 	return 0;
2637 }
2638 
2639 static int amdgpu_device_enable_mgpu_fan_boost(void)
2640 {
2641 	struct amdgpu_gpu_instance *gpu_ins;
2642 	struct amdgpu_device *adev;
2643 	int i, ret = 0;
2644 
2645 	mutex_lock(&mgpu_info.mutex);
2646 
2647 	/*
2648 	 * MGPU fan boost feature should be enabled
2649 	 * only when there are two or more dGPUs in
2650 	 * the system
2651 	 */
2652 	if (mgpu_info.num_dgpu < 2)
2653 		goto out;
2654 
2655 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2656 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2657 		adev = gpu_ins->adev;
2658 		if (!(adev->flags & AMD_IS_APU) &&
2659 		    !gpu_ins->mgpu_fan_enabled) {
2660 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2661 			if (ret)
2662 				break;
2663 
2664 			gpu_ins->mgpu_fan_enabled = 1;
2665 		}
2666 	}
2667 
2668 out:
2669 	mutex_unlock(&mgpu_info.mutex);
2670 
2671 	return ret;
2672 }
2673 
2674 /**
2675  * amdgpu_device_ip_late_init - run late init for hardware IPs
2676  *
2677  * @adev: amdgpu_device pointer
2678  *
2679  * Late initialization pass for hardware IPs.  The list of all the hardware
2680  * IPs that make up the asic is walked and the late_init callbacks are run.
2681  * late_init covers any special initialization that an IP requires
2682  * after all of the have been initialized or something that needs to happen
2683  * late in the init process.
2684  * Returns 0 on success, negative error code on failure.
2685  */
2686 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2687 {
2688 	struct amdgpu_gpu_instance *gpu_instance;
2689 	int i = 0, r;
2690 
2691 	for (i = 0; i < adev->num_ip_blocks; i++) {
2692 		if (!adev->ip_blocks[i].status.hw)
2693 			continue;
2694 		if (adev->ip_blocks[i].version->funcs->late_init) {
2695 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2696 			if (r) {
2697 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
2698 					  adev->ip_blocks[i].version->funcs->name, r);
2699 				return r;
2700 			}
2701 		}
2702 		adev->ip_blocks[i].status.late_initialized = true;
2703 	}
2704 
2705 	r = amdgpu_ras_late_init(adev);
2706 	if (r) {
2707 		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2708 		return r;
2709 	}
2710 
2711 	amdgpu_ras_set_error_query_ready(adev, true);
2712 
2713 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2714 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2715 
2716 	amdgpu_device_fill_reset_magic(adev);
2717 
2718 	r = amdgpu_device_enable_mgpu_fan_boost();
2719 	if (r)
2720 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2721 
2722 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2723 	if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2724 			       adev->asic_type == CHIP_ALDEBARAN ))
2725 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
2726 
2727 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2728 		mutex_lock(&mgpu_info.mutex);
2729 
2730 		/*
2731 		 * Reset device p-state to low as this was booted with high.
2732 		 *
2733 		 * This should be performed only after all devices from the same
2734 		 * hive get initialized.
2735 		 *
2736 		 * However, it's unknown how many device in the hive in advance.
2737 		 * As this is counted one by one during devices initializations.
2738 		 *
2739 		 * So, we wait for all XGMI interlinked devices initialized.
2740 		 * This may bring some delays as those devices may come from
2741 		 * different hives. But that should be OK.
2742 		 */
2743 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2744 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2745 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2746 				if (gpu_instance->adev->flags & AMD_IS_APU)
2747 					continue;
2748 
2749 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2750 						AMDGPU_XGMI_PSTATE_MIN);
2751 				if (r) {
2752 					DRM_ERROR("pstate setting failed (%d).\n", r);
2753 					break;
2754 				}
2755 			}
2756 		}
2757 
2758 		mutex_unlock(&mgpu_info.mutex);
2759 	}
2760 
2761 	return 0;
2762 }
2763 
2764 /**
2765  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2766  *
2767  * @adev: amdgpu_device pointer
2768  *
2769  * For ASICs need to disable SMC first
2770  */
2771 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2772 {
2773 	int i, r;
2774 
2775 	if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2776 		return;
2777 
2778 	for (i = 0; i < adev->num_ip_blocks; i++) {
2779 		if (!adev->ip_blocks[i].status.hw)
2780 			continue;
2781 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2782 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2783 			/* XXX handle errors */
2784 			if (r) {
2785 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2786 					  adev->ip_blocks[i].version->funcs->name, r);
2787 			}
2788 			adev->ip_blocks[i].status.hw = false;
2789 			break;
2790 		}
2791 	}
2792 }
2793 
2794 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2795 {
2796 	int i, r;
2797 
2798 	for (i = 0; i < adev->num_ip_blocks; i++) {
2799 		if (!adev->ip_blocks[i].version->funcs->early_fini)
2800 			continue;
2801 
2802 		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2803 		if (r) {
2804 			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2805 				  adev->ip_blocks[i].version->funcs->name, r);
2806 		}
2807 	}
2808 
2809 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2810 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2811 
2812 	amdgpu_amdkfd_suspend(adev, false);
2813 
2814 	/* Workaroud for ASICs need to disable SMC first */
2815 	amdgpu_device_smu_fini_early(adev);
2816 
2817 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2818 		if (!adev->ip_blocks[i].status.hw)
2819 			continue;
2820 
2821 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2822 		/* XXX handle errors */
2823 		if (r) {
2824 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2825 				  adev->ip_blocks[i].version->funcs->name, r);
2826 		}
2827 
2828 		adev->ip_blocks[i].status.hw = false;
2829 	}
2830 
2831 	if (amdgpu_sriov_vf(adev)) {
2832 		if (amdgpu_virt_release_full_gpu(adev, false))
2833 			DRM_ERROR("failed to release exclusive mode on fini\n");
2834 	}
2835 
2836 	return 0;
2837 }
2838 
2839 /**
2840  * amdgpu_device_ip_fini - run fini for hardware IPs
2841  *
2842  * @adev: amdgpu_device pointer
2843  *
2844  * Main teardown pass for hardware IPs.  The list of all the hardware
2845  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2846  * are run.  hw_fini tears down the hardware associated with each IP
2847  * and sw_fini tears down any software state associated with each IP.
2848  * Returns 0 on success, negative error code on failure.
2849  */
2850 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2851 {
2852 	int i, r;
2853 
2854 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2855 		amdgpu_virt_release_ras_err_handler_data(adev);
2856 
2857 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2858 		amdgpu_xgmi_remove_device(adev);
2859 
2860 	amdgpu_amdkfd_device_fini_sw(adev);
2861 
2862 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2863 		if (!adev->ip_blocks[i].status.sw)
2864 			continue;
2865 
2866 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2867 			amdgpu_ucode_free_bo(adev);
2868 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2869 			amdgpu_device_wb_fini(adev);
2870 			amdgpu_device_vram_scratch_fini(adev);
2871 			amdgpu_ib_pool_fini(adev);
2872 		}
2873 
2874 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2875 		/* XXX handle errors */
2876 		if (r) {
2877 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2878 				  adev->ip_blocks[i].version->funcs->name, r);
2879 		}
2880 		adev->ip_blocks[i].status.sw = false;
2881 		adev->ip_blocks[i].status.valid = false;
2882 	}
2883 
2884 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2885 		if (!adev->ip_blocks[i].status.late_initialized)
2886 			continue;
2887 		if (adev->ip_blocks[i].version->funcs->late_fini)
2888 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2889 		adev->ip_blocks[i].status.late_initialized = false;
2890 	}
2891 
2892 	amdgpu_ras_fini(adev);
2893 
2894 	return 0;
2895 }
2896 
2897 /**
2898  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2899  *
2900  * @work: work_struct.
2901  */
2902 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2903 {
2904 	struct amdgpu_device *adev =
2905 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2906 	int r;
2907 
2908 	r = amdgpu_ib_ring_tests(adev);
2909 	if (r)
2910 		DRM_ERROR("ib ring test failed (%d).\n", r);
2911 }
2912 
2913 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2914 {
2915 	struct amdgpu_device *adev =
2916 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2917 
2918 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
2919 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2920 
2921 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2922 		adev->gfx.gfx_off_state = true;
2923 }
2924 
2925 /**
2926  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2927  *
2928  * @adev: amdgpu_device pointer
2929  *
2930  * Main suspend function for hardware IPs.  The list of all the hardware
2931  * IPs that make up the asic is walked, clockgating is disabled and the
2932  * suspend callbacks are run.  suspend puts the hardware and software state
2933  * in each IP into a state suitable for suspend.
2934  * Returns 0 on success, negative error code on failure.
2935  */
2936 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2937 {
2938 	int i, r;
2939 
2940 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2941 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2942 
2943 	/*
2944 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
2945 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2946 	 * scenario. Add the missing df cstate disablement here.
2947 	 */
2948 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2949 		dev_warn(adev->dev, "Failed to disallow df cstate");
2950 
2951 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2952 		if (!adev->ip_blocks[i].status.valid)
2953 			continue;
2954 
2955 		/* displays are handled separately */
2956 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2957 			continue;
2958 
2959 		/* XXX handle errors */
2960 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2961 		/* XXX handle errors */
2962 		if (r) {
2963 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2964 				  adev->ip_blocks[i].version->funcs->name, r);
2965 			return r;
2966 		}
2967 
2968 		adev->ip_blocks[i].status.hw = false;
2969 	}
2970 
2971 	return 0;
2972 }
2973 
2974 /**
2975  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2976  *
2977  * @adev: amdgpu_device pointer
2978  *
2979  * Main suspend function for hardware IPs.  The list of all the hardware
2980  * IPs that make up the asic is walked, clockgating is disabled and the
2981  * suspend callbacks are run.  suspend puts the hardware and software state
2982  * in each IP into a state suitable for suspend.
2983  * Returns 0 on success, negative error code on failure.
2984  */
2985 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2986 {
2987 	int i, r;
2988 
2989 	if (adev->in_s0ix)
2990 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
2991 
2992 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2993 		if (!adev->ip_blocks[i].status.valid)
2994 			continue;
2995 		/* displays are handled in phase1 */
2996 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2997 			continue;
2998 		/* PSP lost connection when err_event_athub occurs */
2999 		if (amdgpu_ras_intr_triggered() &&
3000 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3001 			adev->ip_blocks[i].status.hw = false;
3002 			continue;
3003 		}
3004 
3005 		/* skip unnecessary suspend if we do not initialize them yet */
3006 		if (adev->gmc.xgmi.pending_reset &&
3007 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3008 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3009 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3010 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3011 			adev->ip_blocks[i].status.hw = false;
3012 			continue;
3013 		}
3014 
3015 		/* skip suspend of gfx and psp for S0ix
3016 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3017 		 * like at runtime. PSP is also part of the always on hardware
3018 		 * so no need to suspend it.
3019 		 */
3020 		if (adev->in_s0ix &&
3021 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3022 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
3023 			continue;
3024 
3025 		/* XXX handle errors */
3026 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3027 		/* XXX handle errors */
3028 		if (r) {
3029 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3030 				  adev->ip_blocks[i].version->funcs->name, r);
3031 		}
3032 		adev->ip_blocks[i].status.hw = false;
3033 		/* handle putting the SMC in the appropriate state */
3034 		if(!amdgpu_sriov_vf(adev)){
3035 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3036 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3037 				if (r) {
3038 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3039 							adev->mp1_state, r);
3040 					return r;
3041 				}
3042 			}
3043 		}
3044 	}
3045 
3046 	return 0;
3047 }
3048 
3049 /**
3050  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3051  *
3052  * @adev: amdgpu_device pointer
3053  *
3054  * Main suspend function for hardware IPs.  The list of all the hardware
3055  * IPs that make up the asic is walked, clockgating is disabled and the
3056  * suspend callbacks are run.  suspend puts the hardware and software state
3057  * in each IP into a state suitable for suspend.
3058  * Returns 0 on success, negative error code on failure.
3059  */
3060 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3061 {
3062 	int r;
3063 
3064 	if (amdgpu_sriov_vf(adev)) {
3065 		amdgpu_virt_fini_data_exchange(adev);
3066 		amdgpu_virt_request_full_gpu(adev, false);
3067 	}
3068 
3069 	r = amdgpu_device_ip_suspend_phase1(adev);
3070 	if (r)
3071 		return r;
3072 	r = amdgpu_device_ip_suspend_phase2(adev);
3073 
3074 	if (amdgpu_sriov_vf(adev))
3075 		amdgpu_virt_release_full_gpu(adev, false);
3076 
3077 	return r;
3078 }
3079 
3080 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3081 {
3082 	int i, r;
3083 
3084 	static enum amd_ip_block_type ip_order[] = {
3085 		AMD_IP_BLOCK_TYPE_COMMON,
3086 		AMD_IP_BLOCK_TYPE_GMC,
3087 		AMD_IP_BLOCK_TYPE_PSP,
3088 		AMD_IP_BLOCK_TYPE_IH,
3089 	};
3090 
3091 	for (i = 0; i < adev->num_ip_blocks; i++) {
3092 		int j;
3093 		struct amdgpu_ip_block *block;
3094 
3095 		block = &adev->ip_blocks[i];
3096 		block->status.hw = false;
3097 
3098 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3099 
3100 			if (block->version->type != ip_order[j] ||
3101 				!block->status.valid)
3102 				continue;
3103 
3104 			r = block->version->funcs->hw_init(adev);
3105 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3106 			if (r)
3107 				return r;
3108 			block->status.hw = true;
3109 		}
3110 	}
3111 
3112 	return 0;
3113 }
3114 
3115 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3116 {
3117 	int i, r;
3118 
3119 	static enum amd_ip_block_type ip_order[] = {
3120 		AMD_IP_BLOCK_TYPE_SMC,
3121 		AMD_IP_BLOCK_TYPE_DCE,
3122 		AMD_IP_BLOCK_TYPE_GFX,
3123 		AMD_IP_BLOCK_TYPE_SDMA,
3124 		AMD_IP_BLOCK_TYPE_UVD,
3125 		AMD_IP_BLOCK_TYPE_VCE,
3126 		AMD_IP_BLOCK_TYPE_VCN
3127 	};
3128 
3129 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3130 		int j;
3131 		struct amdgpu_ip_block *block;
3132 
3133 		for (j = 0; j < adev->num_ip_blocks; j++) {
3134 			block = &adev->ip_blocks[j];
3135 
3136 			if (block->version->type != ip_order[i] ||
3137 				!block->status.valid ||
3138 				block->status.hw)
3139 				continue;
3140 
3141 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3142 				r = block->version->funcs->resume(adev);
3143 			else
3144 				r = block->version->funcs->hw_init(adev);
3145 
3146 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3147 			if (r)
3148 				return r;
3149 			block->status.hw = true;
3150 		}
3151 	}
3152 
3153 	return 0;
3154 }
3155 
3156 /**
3157  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3158  *
3159  * @adev: amdgpu_device pointer
3160  *
3161  * First resume function for hardware IPs.  The list of all the hardware
3162  * IPs that make up the asic is walked and the resume callbacks are run for
3163  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3164  * after a suspend and updates the software state as necessary.  This
3165  * function is also used for restoring the GPU after a GPU reset.
3166  * Returns 0 on success, negative error code on failure.
3167  */
3168 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3169 {
3170 	int i, r;
3171 
3172 	for (i = 0; i < adev->num_ip_blocks; i++) {
3173 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3174 			continue;
3175 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3176 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3177 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3178 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3179 
3180 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3181 			if (r) {
3182 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3183 					  adev->ip_blocks[i].version->funcs->name, r);
3184 				return r;
3185 			}
3186 			adev->ip_blocks[i].status.hw = true;
3187 		}
3188 	}
3189 
3190 	return 0;
3191 }
3192 
3193 /**
3194  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3195  *
3196  * @adev: amdgpu_device pointer
3197  *
3198  * First resume function for hardware IPs.  The list of all the hardware
3199  * IPs that make up the asic is walked and the resume callbacks are run for
3200  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3201  * functional state after a suspend and updates the software state as
3202  * necessary.  This function is also used for restoring the GPU after a GPU
3203  * reset.
3204  * Returns 0 on success, negative error code on failure.
3205  */
3206 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3207 {
3208 	int i, r;
3209 
3210 	for (i = 0; i < adev->num_ip_blocks; i++) {
3211 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3212 			continue;
3213 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3214 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3215 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3216 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3217 			continue;
3218 		r = adev->ip_blocks[i].version->funcs->resume(adev);
3219 		if (r) {
3220 			DRM_ERROR("resume of IP block <%s> failed %d\n",
3221 				  adev->ip_blocks[i].version->funcs->name, r);
3222 			return r;
3223 		}
3224 		adev->ip_blocks[i].status.hw = true;
3225 
3226 		if (adev->in_s0ix && adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3227 			/* disable gfxoff for IP resume. The gfxoff will be re-enabled in
3228 			 * amdgpu_device_resume() after IP resume.
3229 			 */
3230 			amdgpu_gfx_off_ctrl(adev, false);
3231 			DRM_DEBUG("will disable gfxoff for re-initializing other blocks\n");
3232 		}
3233 
3234 	}
3235 
3236 	return 0;
3237 }
3238 
3239 /**
3240  * amdgpu_device_ip_resume - run resume for hardware IPs
3241  *
3242  * @adev: amdgpu_device pointer
3243  *
3244  * Main resume function for hardware IPs.  The hardware IPs
3245  * are split into two resume functions because they are
3246  * are also used in in recovering from a GPU reset and some additional
3247  * steps need to be take between them.  In this case (S3/S4) they are
3248  * run sequentially.
3249  * Returns 0 on success, negative error code on failure.
3250  */
3251 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3252 {
3253 	int r;
3254 
3255 	r = amdgpu_amdkfd_resume_iommu(adev);
3256 	if (r)
3257 		return r;
3258 
3259 	r = amdgpu_device_ip_resume_phase1(adev);
3260 	if (r)
3261 		return r;
3262 
3263 	r = amdgpu_device_fw_loading(adev);
3264 	if (r)
3265 		return r;
3266 
3267 	r = amdgpu_device_ip_resume_phase2(adev);
3268 
3269 	return r;
3270 }
3271 
3272 /**
3273  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3274  *
3275  * @adev: amdgpu_device pointer
3276  *
3277  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3278  */
3279 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3280 {
3281 	if (amdgpu_sriov_vf(adev)) {
3282 		if (adev->is_atom_fw) {
3283 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3284 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3285 		} else {
3286 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3287 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3288 		}
3289 
3290 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3291 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3292 	}
3293 }
3294 
3295 /**
3296  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3297  *
3298  * @asic_type: AMD asic type
3299  *
3300  * Check if there is DC (new modesetting infrastructre) support for an asic.
3301  * returns true if DC has support, false if not.
3302  */
3303 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3304 {
3305 	switch (asic_type) {
3306 #ifdef CONFIG_DRM_AMDGPU_SI
3307 	case CHIP_HAINAN:
3308 #endif
3309 	case CHIP_TOPAZ:
3310 		/* chips with no display hardware */
3311 		return false;
3312 #if defined(CONFIG_DRM_AMD_DC)
3313 	case CHIP_TAHITI:
3314 	case CHIP_PITCAIRN:
3315 	case CHIP_VERDE:
3316 	case CHIP_OLAND:
3317 		/*
3318 		 * We have systems in the wild with these ASICs that require
3319 		 * LVDS and VGA support which is not supported with DC.
3320 		 *
3321 		 * Fallback to the non-DC driver here by default so as not to
3322 		 * cause regressions.
3323 		 */
3324 #if defined(CONFIG_DRM_AMD_DC_SI)
3325 		return amdgpu_dc > 0;
3326 #else
3327 		return false;
3328 #endif
3329 	case CHIP_BONAIRE:
3330 	case CHIP_KAVERI:
3331 	case CHIP_KABINI:
3332 	case CHIP_MULLINS:
3333 		/*
3334 		 * We have systems in the wild with these ASICs that require
3335 		 * VGA support which is not supported with DC.
3336 		 *
3337 		 * Fallback to the non-DC driver here by default so as not to
3338 		 * cause regressions.
3339 		 */
3340 		return amdgpu_dc > 0;
3341 	default:
3342 		return amdgpu_dc != 0;
3343 #else
3344 	default:
3345 		if (amdgpu_dc > 0)
3346 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
3347 					 "but isn't supported by ASIC, ignoring\n");
3348 		return false;
3349 #endif
3350 	}
3351 }
3352 
3353 /**
3354  * amdgpu_device_has_dc_support - check if dc is supported
3355  *
3356  * @adev: amdgpu_device pointer
3357  *
3358  * Returns true for supported, false for not supported
3359  */
3360 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3361 {
3362 	if (adev->enable_virtual_display ||
3363 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3364 		return false;
3365 
3366 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3367 }
3368 
3369 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3370 {
3371 	struct amdgpu_device *adev =
3372 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3373 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3374 
3375 	/* It's a bug to not have a hive within this function */
3376 	if (WARN_ON(!hive))
3377 		return;
3378 
3379 	/*
3380 	 * Use task barrier to synchronize all xgmi reset works across the
3381 	 * hive. task_barrier_enter and task_barrier_exit will block
3382 	 * until all the threads running the xgmi reset works reach
3383 	 * those points. task_barrier_full will do both blocks.
3384 	 */
3385 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3386 
3387 		task_barrier_enter(&hive->tb);
3388 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3389 
3390 		if (adev->asic_reset_res)
3391 			goto fail;
3392 
3393 		task_barrier_exit(&hive->tb);
3394 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3395 
3396 		if (adev->asic_reset_res)
3397 			goto fail;
3398 
3399 		if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3400 		    adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3401 			adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
3402 	} else {
3403 
3404 		task_barrier_full(&hive->tb);
3405 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3406 	}
3407 
3408 fail:
3409 	if (adev->asic_reset_res)
3410 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3411 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3412 	amdgpu_put_xgmi_hive(hive);
3413 }
3414 
3415 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3416 {
3417 	char *input = amdgpu_lockup_timeout;
3418 	char *timeout_setting = NULL;
3419 	int index = 0;
3420 	long timeout;
3421 	int ret = 0;
3422 
3423 	/*
3424 	 * By default timeout for non compute jobs is 10000
3425 	 * and 60000 for compute jobs.
3426 	 * In SR-IOV or passthrough mode, timeout for compute
3427 	 * jobs are 60000 by default.
3428 	 */
3429 	adev->gfx_timeout = msecs_to_jiffies(10000);
3430 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3431 	if (amdgpu_sriov_vf(adev))
3432 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3433 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3434 	else
3435 		adev->compute_timeout =  msecs_to_jiffies(60000);
3436 
3437 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3438 		while ((timeout_setting = strsep(&input, ",")) &&
3439 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3440 			ret = kstrtol(timeout_setting, 0, &timeout);
3441 			if (ret)
3442 				return ret;
3443 
3444 			if (timeout == 0) {
3445 				index++;
3446 				continue;
3447 			} else if (timeout < 0) {
3448 				timeout = MAX_SCHEDULE_TIMEOUT;
3449 				dev_warn(adev->dev, "lockup timeout disabled");
3450 				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3451 			} else {
3452 				timeout = msecs_to_jiffies(timeout);
3453 			}
3454 
3455 			switch (index++) {
3456 			case 0:
3457 				adev->gfx_timeout = timeout;
3458 				break;
3459 			case 1:
3460 				adev->compute_timeout = timeout;
3461 				break;
3462 			case 2:
3463 				adev->sdma_timeout = timeout;
3464 				break;
3465 			case 3:
3466 				adev->video_timeout = timeout;
3467 				break;
3468 			default:
3469 				break;
3470 			}
3471 		}
3472 		/*
3473 		 * There is only one value specified and
3474 		 * it should apply to all non-compute jobs.
3475 		 */
3476 		if (index == 1) {
3477 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3478 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3479 				adev->compute_timeout = adev->gfx_timeout;
3480 		}
3481 	}
3482 
3483 	return ret;
3484 }
3485 
3486 /**
3487  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3488  *
3489  * @adev: amdgpu_device pointer
3490  *
3491  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3492  */
3493 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3494 {
3495 	struct iommu_domain *domain;
3496 
3497 	domain = iommu_get_domain_for_dev(adev->dev);
3498 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3499 		adev->ram_is_direct_mapped = true;
3500 }
3501 
3502 static const struct attribute *amdgpu_dev_attributes[] = {
3503 	&dev_attr_product_name.attr,
3504 	&dev_attr_product_number.attr,
3505 	&dev_attr_serial_number.attr,
3506 	&dev_attr_pcie_replay_count.attr,
3507 	NULL
3508 };
3509 
3510 /**
3511  * amdgpu_device_init - initialize the driver
3512  *
3513  * @adev: amdgpu_device pointer
3514  * @flags: driver flags
3515  *
3516  * Initializes the driver info and hw (all asics).
3517  * Returns 0 for success or an error on failure.
3518  * Called at driver startup.
3519  */
3520 int amdgpu_device_init(struct amdgpu_device *adev,
3521 		       uint32_t flags)
3522 {
3523 	struct drm_device *ddev = adev_to_drm(adev);
3524 	struct pci_dev *pdev = adev->pdev;
3525 	int r, i;
3526 	bool px = false;
3527 	u32 max_MBps;
3528 
3529 	adev->shutdown = false;
3530 	adev->flags = flags;
3531 
3532 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3533 		adev->asic_type = amdgpu_force_asic_type;
3534 	else
3535 		adev->asic_type = flags & AMD_ASIC_MASK;
3536 
3537 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3538 	if (amdgpu_emu_mode == 1)
3539 		adev->usec_timeout *= 10;
3540 	adev->gmc.gart_size = 512 * 1024 * 1024;
3541 	adev->accel_working = false;
3542 	adev->num_rings = 0;
3543 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3544 	adev->mman.buffer_funcs = NULL;
3545 	adev->mman.buffer_funcs_ring = NULL;
3546 	adev->vm_manager.vm_pte_funcs = NULL;
3547 	adev->vm_manager.vm_pte_num_scheds = 0;
3548 	adev->gmc.gmc_funcs = NULL;
3549 	adev->harvest_ip_mask = 0x0;
3550 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3551 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3552 
3553 	adev->smc_rreg = &amdgpu_invalid_rreg;
3554 	adev->smc_wreg = &amdgpu_invalid_wreg;
3555 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3556 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3557 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3558 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3559 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3560 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3561 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3562 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3563 	adev->didt_rreg = &amdgpu_invalid_rreg;
3564 	adev->didt_wreg = &amdgpu_invalid_wreg;
3565 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3566 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3567 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3568 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3569 
3570 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3571 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3572 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3573 
3574 	/* mutex initialization are all done here so we
3575 	 * can recall function without having locking issues */
3576 	mutex_init(&adev->firmware.mutex);
3577 	mutex_init(&adev->pm.mutex);
3578 	mutex_init(&adev->gfx.gpu_clock_mutex);
3579 	mutex_init(&adev->srbm_mutex);
3580 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3581 	mutex_init(&adev->gfx.gfx_off_mutex);
3582 	mutex_init(&adev->grbm_idx_mutex);
3583 	mutex_init(&adev->mn_lock);
3584 	mutex_init(&adev->virt.vf_errors.lock);
3585 	hash_init(adev->mn_hash);
3586 	mutex_init(&adev->psp.mutex);
3587 	mutex_init(&adev->notifier_lock);
3588 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3589 	mutex_init(&adev->benchmark_mutex);
3590 
3591 	amdgpu_device_init_apu_flags(adev);
3592 
3593 	r = amdgpu_device_check_arguments(adev);
3594 	if (r)
3595 		return r;
3596 
3597 	spin_lock_init(&adev->mmio_idx_lock);
3598 	spin_lock_init(&adev->smc_idx_lock);
3599 	spin_lock_init(&adev->pcie_idx_lock);
3600 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3601 	spin_lock_init(&adev->didt_idx_lock);
3602 	spin_lock_init(&adev->gc_cac_idx_lock);
3603 	spin_lock_init(&adev->se_cac_idx_lock);
3604 	spin_lock_init(&adev->audio_endpt_idx_lock);
3605 	spin_lock_init(&adev->mm_stats.lock);
3606 
3607 	INIT_LIST_HEAD(&adev->shadow_list);
3608 	mutex_init(&adev->shadow_list_lock);
3609 
3610 	INIT_LIST_HEAD(&adev->reset_list);
3611 
3612 	INIT_LIST_HEAD(&adev->ras_list);
3613 
3614 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3615 			  amdgpu_device_delayed_init_work_handler);
3616 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3617 			  amdgpu_device_delay_enable_gfx_off);
3618 
3619 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3620 
3621 	adev->gfx.gfx_off_req_count = 1;
3622 	adev->gfx.gfx_off_residency = 0;
3623 	adev->gfx.gfx_off_entrycount = 0;
3624 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3625 
3626 	atomic_set(&adev->throttling_logging_enabled, 1);
3627 	/*
3628 	 * If throttling continues, logging will be performed every minute
3629 	 * to avoid log flooding. "-1" is subtracted since the thermal
3630 	 * throttling interrupt comes every second. Thus, the total logging
3631 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3632 	 * for throttling interrupt) = 60 seconds.
3633 	 */
3634 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3635 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3636 
3637 	/* Registers mapping */
3638 	/* TODO: block userspace mapping of io register */
3639 	if (adev->asic_type >= CHIP_BONAIRE) {
3640 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3641 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3642 	} else {
3643 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3644 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3645 	}
3646 
3647 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3648 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3649 
3650 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3651 	if (adev->rmmio == NULL) {
3652 		return -ENOMEM;
3653 	}
3654 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3655 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3656 
3657 	amdgpu_device_get_pcie_info(adev);
3658 
3659 	if (amdgpu_mcbp)
3660 		DRM_INFO("MCBP is enabled\n");
3661 
3662 	/*
3663 	 * Reset domain needs to be present early, before XGMI hive discovered
3664 	 * (if any) and intitialized to use reset sem and in_gpu reset flag
3665 	 * early on during init and before calling to RREG32.
3666 	 */
3667 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3668 	if (!adev->reset_domain)
3669 		return -ENOMEM;
3670 
3671 	/* detect hw virtualization here */
3672 	amdgpu_detect_virtualization(adev);
3673 
3674 	r = amdgpu_device_get_job_timeout_settings(adev);
3675 	if (r) {
3676 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3677 		return r;
3678 	}
3679 
3680 	/* early init functions */
3681 	r = amdgpu_device_ip_early_init(adev);
3682 	if (r)
3683 		return r;
3684 
3685 	/* Enable TMZ based on IP_VERSION */
3686 	amdgpu_gmc_tmz_set(adev);
3687 
3688 	amdgpu_gmc_noretry_set(adev);
3689 	/* Need to get xgmi info early to decide the reset behavior*/
3690 	if (adev->gmc.xgmi.supported) {
3691 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
3692 		if (r)
3693 			return r;
3694 	}
3695 
3696 	/* enable PCIE atomic ops */
3697 	if (amdgpu_sriov_vf(adev))
3698 		adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3699 			adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3700 			(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3701 	else
3702 		adev->have_atomics_support =
3703 			!pci_enable_atomic_ops_to_root(adev->pdev,
3704 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3705 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3706 	if (!adev->have_atomics_support)
3707 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3708 
3709 	/* doorbell bar mapping and doorbell index init*/
3710 	amdgpu_device_doorbell_init(adev);
3711 
3712 	if (amdgpu_emu_mode == 1) {
3713 		/* post the asic on emulation mode */
3714 		emu_soc_asic_init(adev);
3715 		goto fence_driver_init;
3716 	}
3717 
3718 	amdgpu_reset_init(adev);
3719 
3720 	/* detect if we are with an SRIOV vbios */
3721 	amdgpu_device_detect_sriov_bios(adev);
3722 
3723 	/* check if we need to reset the asic
3724 	 *  E.g., driver was not cleanly unloaded previously, etc.
3725 	 */
3726 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3727 		if (adev->gmc.xgmi.num_physical_nodes) {
3728 			dev_info(adev->dev, "Pending hive reset.\n");
3729 			adev->gmc.xgmi.pending_reset = true;
3730 			/* Only need to init necessary block for SMU to handle the reset */
3731 			for (i = 0; i < adev->num_ip_blocks; i++) {
3732 				if (!adev->ip_blocks[i].status.valid)
3733 					continue;
3734 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3735 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3736 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3737 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
3738 					DRM_DEBUG("IP %s disabled for hw_init.\n",
3739 						adev->ip_blocks[i].version->funcs->name);
3740 					adev->ip_blocks[i].status.hw = true;
3741 				}
3742 			}
3743 		} else {
3744 			r = amdgpu_asic_reset(adev);
3745 			if (r) {
3746 				dev_err(adev->dev, "asic reset on init failed\n");
3747 				goto failed;
3748 			}
3749 		}
3750 	}
3751 
3752 	pci_enable_pcie_error_reporting(adev->pdev);
3753 
3754 	/* Post card if necessary */
3755 	if (amdgpu_device_need_post(adev)) {
3756 		if (!adev->bios) {
3757 			dev_err(adev->dev, "no vBIOS found\n");
3758 			r = -EINVAL;
3759 			goto failed;
3760 		}
3761 		DRM_INFO("GPU posting now...\n");
3762 		r = amdgpu_device_asic_init(adev);
3763 		if (r) {
3764 			dev_err(adev->dev, "gpu post error!\n");
3765 			goto failed;
3766 		}
3767 	}
3768 
3769 	if (adev->is_atom_fw) {
3770 		/* Initialize clocks */
3771 		r = amdgpu_atomfirmware_get_clock_info(adev);
3772 		if (r) {
3773 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3774 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3775 			goto failed;
3776 		}
3777 	} else {
3778 		/* Initialize clocks */
3779 		r = amdgpu_atombios_get_clock_info(adev);
3780 		if (r) {
3781 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3782 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3783 			goto failed;
3784 		}
3785 		/* init i2c buses */
3786 		if (!amdgpu_device_has_dc_support(adev))
3787 			amdgpu_atombios_i2c_init(adev);
3788 	}
3789 
3790 fence_driver_init:
3791 	/* Fence driver */
3792 	r = amdgpu_fence_driver_sw_init(adev);
3793 	if (r) {
3794 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
3795 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3796 		goto failed;
3797 	}
3798 
3799 	/* init the mode config */
3800 	drm_mode_config_init(adev_to_drm(adev));
3801 
3802 	r = amdgpu_device_ip_init(adev);
3803 	if (r) {
3804 		/* failed in exclusive mode due to timeout */
3805 		if (amdgpu_sriov_vf(adev) &&
3806 		    !amdgpu_sriov_runtime(adev) &&
3807 		    amdgpu_virt_mmio_blocked(adev) &&
3808 		    !amdgpu_virt_wait_reset(adev)) {
3809 			dev_err(adev->dev, "VF exclusive mode timeout\n");
3810 			/* Don't send request since VF is inactive. */
3811 			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3812 			adev->virt.ops = NULL;
3813 			r = -EAGAIN;
3814 			goto release_ras_con;
3815 		}
3816 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3817 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3818 		goto release_ras_con;
3819 	}
3820 
3821 	amdgpu_fence_driver_hw_init(adev);
3822 
3823 	dev_info(adev->dev,
3824 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3825 			adev->gfx.config.max_shader_engines,
3826 			adev->gfx.config.max_sh_per_se,
3827 			adev->gfx.config.max_cu_per_sh,
3828 			adev->gfx.cu_info.number);
3829 
3830 	adev->accel_working = true;
3831 
3832 	amdgpu_vm_check_compute_bug(adev);
3833 
3834 	/* Initialize the buffer migration limit. */
3835 	if (amdgpu_moverate >= 0)
3836 		max_MBps = amdgpu_moverate;
3837 	else
3838 		max_MBps = 8; /* Allow 8 MB/s. */
3839 	/* Get a log2 for easy divisions. */
3840 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3841 
3842 	r = amdgpu_pm_sysfs_init(adev);
3843 	if (r) {
3844 		adev->pm_sysfs_en = false;
3845 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3846 	} else
3847 		adev->pm_sysfs_en = true;
3848 
3849 	r = amdgpu_ucode_sysfs_init(adev);
3850 	if (r) {
3851 		adev->ucode_sysfs_en = false;
3852 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3853 	} else
3854 		adev->ucode_sysfs_en = true;
3855 
3856 	r = amdgpu_psp_sysfs_init(adev);
3857 	if (r) {
3858 		adev->psp_sysfs_en = false;
3859 		if (!amdgpu_sriov_vf(adev))
3860 			DRM_ERROR("Creating psp sysfs failed\n");
3861 	} else
3862 		adev->psp_sysfs_en = true;
3863 
3864 	/*
3865 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3866 	 * Otherwise the mgpu fan boost feature will be skipped due to the
3867 	 * gpu instance is counted less.
3868 	 */
3869 	amdgpu_register_gpu_instance(adev);
3870 
3871 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
3872 	 * explicit gating rather than handling it automatically.
3873 	 */
3874 	if (!adev->gmc.xgmi.pending_reset) {
3875 		r = amdgpu_device_ip_late_init(adev);
3876 		if (r) {
3877 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3878 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3879 			goto release_ras_con;
3880 		}
3881 		/* must succeed. */
3882 		amdgpu_ras_resume(adev);
3883 		queue_delayed_work(system_wq, &adev->delayed_init_work,
3884 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
3885 	}
3886 
3887 	if (amdgpu_sriov_vf(adev))
3888 		flush_delayed_work(&adev->delayed_init_work);
3889 
3890 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3891 	if (r)
3892 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3893 
3894 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3895 		r = amdgpu_pmu_init(adev);
3896 	if (r)
3897 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3898 
3899 	/* Have stored pci confspace at hand for restore in sudden PCI error */
3900 	if (amdgpu_device_cache_pci_state(adev->pdev))
3901 		pci_restore_state(pdev);
3902 
3903 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3904 	/* this will fail for cards that aren't VGA class devices, just
3905 	 * ignore it */
3906 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3907 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
3908 
3909 	if (amdgpu_device_supports_px(ddev)) {
3910 		px = true;
3911 		vga_switcheroo_register_client(adev->pdev,
3912 					       &amdgpu_switcheroo_ops, px);
3913 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3914 	}
3915 
3916 	if (adev->gmc.xgmi.pending_reset)
3917 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3918 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
3919 
3920 	amdgpu_device_check_iommu_direct_map(adev);
3921 
3922 	return 0;
3923 
3924 release_ras_con:
3925 	amdgpu_release_ras_context(adev);
3926 
3927 failed:
3928 	amdgpu_vf_error_trans_all(adev);
3929 
3930 	return r;
3931 }
3932 
3933 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3934 {
3935 
3936 	/* Clear all CPU mappings pointing to this device */
3937 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3938 
3939 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
3940 	amdgpu_device_doorbell_fini(adev);
3941 
3942 	iounmap(adev->rmmio);
3943 	adev->rmmio = NULL;
3944 	if (adev->mman.aper_base_kaddr)
3945 		iounmap(adev->mman.aper_base_kaddr);
3946 	adev->mman.aper_base_kaddr = NULL;
3947 
3948 	/* Memory manager related */
3949 	if (!adev->gmc.xgmi.connected_to_cpu) {
3950 		arch_phys_wc_del(adev->gmc.vram_mtrr);
3951 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3952 	}
3953 }
3954 
3955 /**
3956  * amdgpu_device_fini_hw - tear down the driver
3957  *
3958  * @adev: amdgpu_device pointer
3959  *
3960  * Tear down the driver info (all asics).
3961  * Called at driver shutdown.
3962  */
3963 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
3964 {
3965 	dev_info(adev->dev, "amdgpu: finishing device.\n");
3966 	flush_delayed_work(&adev->delayed_init_work);
3967 	adev->shutdown = true;
3968 
3969 	/* make sure IB test finished before entering exclusive mode
3970 	 * to avoid preemption on IB test
3971 	 * */
3972 	if (amdgpu_sriov_vf(adev)) {
3973 		amdgpu_virt_request_full_gpu(adev, false);
3974 		amdgpu_virt_fini_data_exchange(adev);
3975 	}
3976 
3977 	/* disable all interrupts */
3978 	amdgpu_irq_disable_all(adev);
3979 	if (adev->mode_info.mode_config_initialized){
3980 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
3981 			drm_helper_force_disable_all(adev_to_drm(adev));
3982 		else
3983 			drm_atomic_helper_shutdown(adev_to_drm(adev));
3984 	}
3985 	amdgpu_fence_driver_hw_fini(adev);
3986 
3987 	if (adev->mman.initialized)
3988 		drain_workqueue(adev->mman.bdev.wq);
3989 
3990 	if (adev->pm_sysfs_en)
3991 		amdgpu_pm_sysfs_fini(adev);
3992 	if (adev->ucode_sysfs_en)
3993 		amdgpu_ucode_sysfs_fini(adev);
3994 	if (adev->psp_sysfs_en)
3995 		amdgpu_psp_sysfs_fini(adev);
3996 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
3997 
3998 	/* disable ras feature must before hw fini */
3999 	amdgpu_ras_pre_fini(adev);
4000 
4001 	amdgpu_device_ip_fini_early(adev);
4002 
4003 	amdgpu_irq_fini_hw(adev);
4004 
4005 	if (adev->mman.initialized)
4006 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4007 
4008 	amdgpu_gart_dummy_page_fini(adev);
4009 
4010 	amdgpu_device_unmap_mmio(adev);
4011 
4012 }
4013 
4014 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4015 {
4016 	int idx;
4017 
4018 	amdgpu_fence_driver_sw_fini(adev);
4019 	amdgpu_device_ip_fini(adev);
4020 	release_firmware(adev->firmware.gpu_info_fw);
4021 	adev->firmware.gpu_info_fw = NULL;
4022 	adev->accel_working = false;
4023 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4024 
4025 	amdgpu_reset_fini(adev);
4026 
4027 	/* free i2c buses */
4028 	if (!amdgpu_device_has_dc_support(adev))
4029 		amdgpu_i2c_fini(adev);
4030 
4031 	if (amdgpu_emu_mode != 1)
4032 		amdgpu_atombios_fini(adev);
4033 
4034 	kfree(adev->bios);
4035 	adev->bios = NULL;
4036 	if (amdgpu_device_supports_px(adev_to_drm(adev))) {
4037 		vga_switcheroo_unregister_client(adev->pdev);
4038 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4039 	}
4040 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4041 		vga_client_unregister(adev->pdev);
4042 
4043 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4044 
4045 		iounmap(adev->rmmio);
4046 		adev->rmmio = NULL;
4047 		amdgpu_device_doorbell_fini(adev);
4048 		drm_dev_exit(idx);
4049 	}
4050 
4051 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4052 		amdgpu_pmu_fini(adev);
4053 	if (adev->mman.discovery_bin)
4054 		amdgpu_discovery_fini(adev);
4055 
4056 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4057 	adev->reset_domain = NULL;
4058 
4059 	kfree(adev->pci_state);
4060 
4061 }
4062 
4063 /**
4064  * amdgpu_device_evict_resources - evict device resources
4065  * @adev: amdgpu device object
4066  *
4067  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4068  * of the vram memory type. Mainly used for evicting device resources
4069  * at suspend time.
4070  *
4071  */
4072 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4073 {
4074 	int ret;
4075 
4076 	/* No need to evict vram on APUs for suspend to ram or s2idle */
4077 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4078 		return 0;
4079 
4080 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4081 	if (ret)
4082 		DRM_WARN("evicting device resources failed\n");
4083 	return ret;
4084 }
4085 
4086 /*
4087  * Suspend & resume.
4088  */
4089 /**
4090  * amdgpu_device_suspend - initiate device suspend
4091  *
4092  * @dev: drm dev pointer
4093  * @fbcon : notify the fbdev of suspend
4094  *
4095  * Puts the hw in the suspend state (all asics).
4096  * Returns 0 for success or an error on failure.
4097  * Called at driver suspend.
4098  */
4099 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4100 {
4101 	struct amdgpu_device *adev = drm_to_adev(dev);
4102 	int r = 0;
4103 
4104 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4105 		return 0;
4106 
4107 	adev->in_suspend = true;
4108 
4109 	if (amdgpu_sriov_vf(adev)) {
4110 		amdgpu_virt_fini_data_exchange(adev);
4111 		r = amdgpu_virt_request_full_gpu(adev, false);
4112 		if (r)
4113 			return r;
4114 	}
4115 
4116 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4117 		DRM_WARN("smart shift update failed\n");
4118 
4119 	drm_kms_helper_poll_disable(dev);
4120 
4121 	if (fbcon)
4122 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4123 
4124 	cancel_delayed_work_sync(&adev->delayed_init_work);
4125 
4126 	amdgpu_ras_suspend(adev);
4127 
4128 	amdgpu_device_ip_suspend_phase1(adev);
4129 
4130 	if (!adev->in_s0ix)
4131 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4132 
4133 	r = amdgpu_device_evict_resources(adev);
4134 	if (r)
4135 		return r;
4136 
4137 	amdgpu_fence_driver_hw_fini(adev);
4138 
4139 	amdgpu_device_ip_suspend_phase2(adev);
4140 
4141 	if (amdgpu_sriov_vf(adev))
4142 		amdgpu_virt_release_full_gpu(adev, false);
4143 
4144 	return 0;
4145 }
4146 
4147 /**
4148  * amdgpu_device_resume - initiate device resume
4149  *
4150  * @dev: drm dev pointer
4151  * @fbcon : notify the fbdev of resume
4152  *
4153  * Bring the hw back to operating state (all asics).
4154  * Returns 0 for success or an error on failure.
4155  * Called at driver resume.
4156  */
4157 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4158 {
4159 	struct amdgpu_device *adev = drm_to_adev(dev);
4160 	int r = 0;
4161 
4162 	if (amdgpu_sriov_vf(adev)) {
4163 		r = amdgpu_virt_request_full_gpu(adev, true);
4164 		if (r)
4165 			return r;
4166 	}
4167 
4168 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4169 		return 0;
4170 
4171 	if (adev->in_s0ix)
4172 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4173 
4174 	/* post card */
4175 	if (amdgpu_device_need_post(adev)) {
4176 		r = amdgpu_device_asic_init(adev);
4177 		if (r)
4178 			dev_err(adev->dev, "amdgpu asic init failed\n");
4179 	}
4180 
4181 	r = amdgpu_device_ip_resume(adev);
4182 
4183 	/* no matter what r is, always need to properly release full GPU */
4184 	if (amdgpu_sriov_vf(adev)) {
4185 		amdgpu_virt_init_data_exchange(adev);
4186 		amdgpu_virt_release_full_gpu(adev, true);
4187 	}
4188 
4189 	if (r) {
4190 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4191 		return r;
4192 	}
4193 	amdgpu_fence_driver_hw_init(adev);
4194 
4195 	r = amdgpu_device_ip_late_init(adev);
4196 	if (r)
4197 		return r;
4198 
4199 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4200 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4201 
4202 	if (!adev->in_s0ix) {
4203 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4204 		if (r)
4205 			return r;
4206 	}
4207 
4208 	/* Make sure IB tests flushed */
4209 	if (amdgpu_sriov_vf(adev))
4210 		amdgpu_irq_gpu_reset_resume_helper(adev);
4211 	flush_delayed_work(&adev->delayed_init_work);
4212 
4213 	if (adev->in_s0ix) {
4214 		/* re-enable gfxoff after IP resume. This re-enables gfxoff after
4215 		 * it was disabled for IP resume in amdgpu_device_ip_resume_phase2().
4216 		 */
4217 		amdgpu_gfx_off_ctrl(adev, true);
4218 		DRM_DEBUG("will enable gfxoff for the mission mode\n");
4219 	}
4220 	if (fbcon)
4221 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4222 
4223 	drm_kms_helper_poll_enable(dev);
4224 
4225 	amdgpu_ras_resume(adev);
4226 
4227 	if (adev->mode_info.num_crtc) {
4228 		/*
4229 		 * Most of the connector probing functions try to acquire runtime pm
4230 		 * refs to ensure that the GPU is powered on when connector polling is
4231 		 * performed. Since we're calling this from a runtime PM callback,
4232 		 * trying to acquire rpm refs will cause us to deadlock.
4233 		 *
4234 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4235 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4236 		 */
4237 #ifdef CONFIG_PM
4238 		dev->dev->power.disable_depth++;
4239 #endif
4240 		if (!adev->dc_enabled)
4241 			drm_helper_hpd_irq_event(dev);
4242 		else
4243 			drm_kms_helper_hotplug_event(dev);
4244 #ifdef CONFIG_PM
4245 		dev->dev->power.disable_depth--;
4246 #endif
4247 	}
4248 	adev->in_suspend = false;
4249 
4250 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4251 		DRM_WARN("smart shift update failed\n");
4252 
4253 	return 0;
4254 }
4255 
4256 /**
4257  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4258  *
4259  * @adev: amdgpu_device pointer
4260  *
4261  * The list of all the hardware IPs that make up the asic is walked and
4262  * the check_soft_reset callbacks are run.  check_soft_reset determines
4263  * if the asic is still hung or not.
4264  * Returns true if any of the IPs are still in a hung state, false if not.
4265  */
4266 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4267 {
4268 	int i;
4269 	bool asic_hang = false;
4270 
4271 	if (amdgpu_sriov_vf(adev))
4272 		return true;
4273 
4274 	if (amdgpu_asic_need_full_reset(adev))
4275 		return true;
4276 
4277 	for (i = 0; i < adev->num_ip_blocks; i++) {
4278 		if (!adev->ip_blocks[i].status.valid)
4279 			continue;
4280 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4281 			adev->ip_blocks[i].status.hang =
4282 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4283 		if (adev->ip_blocks[i].status.hang) {
4284 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4285 			asic_hang = true;
4286 		}
4287 	}
4288 	return asic_hang;
4289 }
4290 
4291 /**
4292  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4293  *
4294  * @adev: amdgpu_device pointer
4295  *
4296  * The list of all the hardware IPs that make up the asic is walked and the
4297  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4298  * handles any IP specific hardware or software state changes that are
4299  * necessary for a soft reset to succeed.
4300  * Returns 0 on success, negative error code on failure.
4301  */
4302 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4303 {
4304 	int i, r = 0;
4305 
4306 	for (i = 0; i < adev->num_ip_blocks; i++) {
4307 		if (!adev->ip_blocks[i].status.valid)
4308 			continue;
4309 		if (adev->ip_blocks[i].status.hang &&
4310 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4311 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4312 			if (r)
4313 				return r;
4314 		}
4315 	}
4316 
4317 	return 0;
4318 }
4319 
4320 /**
4321  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4322  *
4323  * @adev: amdgpu_device pointer
4324  *
4325  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4326  * reset is necessary to recover.
4327  * Returns true if a full asic reset is required, false if not.
4328  */
4329 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4330 {
4331 	int i;
4332 
4333 	if (amdgpu_asic_need_full_reset(adev))
4334 		return true;
4335 
4336 	for (i = 0; i < adev->num_ip_blocks; i++) {
4337 		if (!adev->ip_blocks[i].status.valid)
4338 			continue;
4339 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4340 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4341 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4342 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4343 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4344 			if (adev->ip_blocks[i].status.hang) {
4345 				dev_info(adev->dev, "Some block need full reset!\n");
4346 				return true;
4347 			}
4348 		}
4349 	}
4350 	return false;
4351 }
4352 
4353 /**
4354  * amdgpu_device_ip_soft_reset - do a soft reset
4355  *
4356  * @adev: amdgpu_device pointer
4357  *
4358  * The list of all the hardware IPs that make up the asic is walked and the
4359  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4360  * IP specific hardware or software state changes that are necessary to soft
4361  * reset the IP.
4362  * Returns 0 on success, negative error code on failure.
4363  */
4364 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4365 {
4366 	int i, r = 0;
4367 
4368 	for (i = 0; i < adev->num_ip_blocks; i++) {
4369 		if (!adev->ip_blocks[i].status.valid)
4370 			continue;
4371 		if (adev->ip_blocks[i].status.hang &&
4372 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4373 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4374 			if (r)
4375 				return r;
4376 		}
4377 	}
4378 
4379 	return 0;
4380 }
4381 
4382 /**
4383  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4384  *
4385  * @adev: amdgpu_device pointer
4386  *
4387  * The list of all the hardware IPs that make up the asic is walked and the
4388  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4389  * handles any IP specific hardware or software state changes that are
4390  * necessary after the IP has been soft reset.
4391  * Returns 0 on success, negative error code on failure.
4392  */
4393 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4394 {
4395 	int i, r = 0;
4396 
4397 	for (i = 0; i < adev->num_ip_blocks; i++) {
4398 		if (!adev->ip_blocks[i].status.valid)
4399 			continue;
4400 		if (adev->ip_blocks[i].status.hang &&
4401 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4402 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4403 		if (r)
4404 			return r;
4405 	}
4406 
4407 	return 0;
4408 }
4409 
4410 /**
4411  * amdgpu_device_recover_vram - Recover some VRAM contents
4412  *
4413  * @adev: amdgpu_device pointer
4414  *
4415  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4416  * restore things like GPUVM page tables after a GPU reset where
4417  * the contents of VRAM might be lost.
4418  *
4419  * Returns:
4420  * 0 on success, negative error code on failure.
4421  */
4422 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4423 {
4424 	struct dma_fence *fence = NULL, *next = NULL;
4425 	struct amdgpu_bo *shadow;
4426 	struct amdgpu_bo_vm *vmbo;
4427 	long r = 1, tmo;
4428 
4429 	if (amdgpu_sriov_runtime(adev))
4430 		tmo = msecs_to_jiffies(8000);
4431 	else
4432 		tmo = msecs_to_jiffies(100);
4433 
4434 	dev_info(adev->dev, "recover vram bo from shadow start\n");
4435 	mutex_lock(&adev->shadow_list_lock);
4436 	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4437 		shadow = &vmbo->bo;
4438 		/* No need to recover an evicted BO */
4439 		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4440 		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4441 		    shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4442 			continue;
4443 
4444 		r = amdgpu_bo_restore_shadow(shadow, &next);
4445 		if (r)
4446 			break;
4447 
4448 		if (fence) {
4449 			tmo = dma_fence_wait_timeout(fence, false, tmo);
4450 			dma_fence_put(fence);
4451 			fence = next;
4452 			if (tmo == 0) {
4453 				r = -ETIMEDOUT;
4454 				break;
4455 			} else if (tmo < 0) {
4456 				r = tmo;
4457 				break;
4458 			}
4459 		} else {
4460 			fence = next;
4461 		}
4462 	}
4463 	mutex_unlock(&adev->shadow_list_lock);
4464 
4465 	if (fence)
4466 		tmo = dma_fence_wait_timeout(fence, false, tmo);
4467 	dma_fence_put(fence);
4468 
4469 	if (r < 0 || tmo <= 0) {
4470 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4471 		return -EIO;
4472 	}
4473 
4474 	dev_info(adev->dev, "recover vram bo from shadow done\n");
4475 	return 0;
4476 }
4477 
4478 
4479 /**
4480  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4481  *
4482  * @adev: amdgpu_device pointer
4483  * @from_hypervisor: request from hypervisor
4484  *
4485  * do VF FLR and reinitialize Asic
4486  * return 0 means succeeded otherwise failed
4487  */
4488 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4489 				     bool from_hypervisor)
4490 {
4491 	int r;
4492 	struct amdgpu_hive_info *hive = NULL;
4493 	int retry_limit = 0;
4494 
4495 retry:
4496 	amdgpu_amdkfd_pre_reset(adev);
4497 
4498 	if (from_hypervisor)
4499 		r = amdgpu_virt_request_full_gpu(adev, true);
4500 	else
4501 		r = amdgpu_virt_reset_gpu(adev);
4502 	if (r)
4503 		return r;
4504 
4505 	/* Resume IP prior to SMC */
4506 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4507 	if (r)
4508 		goto error;
4509 
4510 	amdgpu_virt_init_data_exchange(adev);
4511 
4512 	r = amdgpu_device_fw_loading(adev);
4513 	if (r)
4514 		return r;
4515 
4516 	/* now we are okay to resume SMC/CP/SDMA */
4517 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4518 	if (r)
4519 		goto error;
4520 
4521 	hive = amdgpu_get_xgmi_hive(adev);
4522 	/* Update PSP FW topology after reset */
4523 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4524 		r = amdgpu_xgmi_update_topology(hive, adev);
4525 
4526 	if (hive)
4527 		amdgpu_put_xgmi_hive(hive);
4528 
4529 	if (!r) {
4530 		amdgpu_irq_gpu_reset_resume_helper(adev);
4531 		r = amdgpu_ib_ring_tests(adev);
4532 
4533 		amdgpu_amdkfd_post_reset(adev);
4534 	}
4535 
4536 error:
4537 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
4538 		amdgpu_inc_vram_lost(adev);
4539 		r = amdgpu_device_recover_vram(adev);
4540 	}
4541 	amdgpu_virt_release_full_gpu(adev, true);
4542 
4543 	if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4544 		if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4545 			retry_limit++;
4546 			goto retry;
4547 		} else
4548 			DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4549 	}
4550 
4551 	return r;
4552 }
4553 
4554 /**
4555  * amdgpu_device_has_job_running - check if there is any job in mirror list
4556  *
4557  * @adev: amdgpu_device pointer
4558  *
4559  * check if there is any job in mirror list
4560  */
4561 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4562 {
4563 	int i;
4564 	struct drm_sched_job *job;
4565 
4566 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4567 		struct amdgpu_ring *ring = adev->rings[i];
4568 
4569 		if (!ring || !ring->sched.thread)
4570 			continue;
4571 
4572 		spin_lock(&ring->sched.job_list_lock);
4573 		job = list_first_entry_or_null(&ring->sched.pending_list,
4574 					       struct drm_sched_job, list);
4575 		spin_unlock(&ring->sched.job_list_lock);
4576 		if (job)
4577 			return true;
4578 	}
4579 	return false;
4580 }
4581 
4582 /**
4583  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4584  *
4585  * @adev: amdgpu_device pointer
4586  *
4587  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4588  * a hung GPU.
4589  */
4590 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4591 {
4592 
4593 	if (amdgpu_gpu_recovery == 0)
4594 		goto disabled;
4595 
4596 	/* Skip soft reset check in fatal error mode */
4597 	if (!amdgpu_ras_is_poison_mode_supported(adev))
4598 		return true;
4599 
4600 	if (!amdgpu_device_ip_check_soft_reset(adev)) {
4601 		dev_info(adev->dev,"Timeout, but no hardware hang detected.\n");
4602 		return false;
4603 	}
4604 
4605 	if (amdgpu_sriov_vf(adev))
4606 		return true;
4607 
4608 	if (amdgpu_gpu_recovery == -1) {
4609 		switch (adev->asic_type) {
4610 #ifdef CONFIG_DRM_AMDGPU_SI
4611 		case CHIP_VERDE:
4612 		case CHIP_TAHITI:
4613 		case CHIP_PITCAIRN:
4614 		case CHIP_OLAND:
4615 		case CHIP_HAINAN:
4616 #endif
4617 #ifdef CONFIG_DRM_AMDGPU_CIK
4618 		case CHIP_KAVERI:
4619 		case CHIP_KABINI:
4620 		case CHIP_MULLINS:
4621 #endif
4622 		case CHIP_CARRIZO:
4623 		case CHIP_STONEY:
4624 		case CHIP_CYAN_SKILLFISH:
4625 			goto disabled;
4626 		default:
4627 			break;
4628 		}
4629 	}
4630 
4631 	return true;
4632 
4633 disabled:
4634 		dev_info(adev->dev, "GPU recovery disabled.\n");
4635 		return false;
4636 }
4637 
4638 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4639 {
4640         u32 i;
4641         int ret = 0;
4642 
4643         amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4644 
4645         dev_info(adev->dev, "GPU mode1 reset\n");
4646 
4647         /* disable BM */
4648         pci_clear_master(adev->pdev);
4649 
4650         amdgpu_device_cache_pci_state(adev->pdev);
4651 
4652         if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4653                 dev_info(adev->dev, "GPU smu mode1 reset\n");
4654                 ret = amdgpu_dpm_mode1_reset(adev);
4655         } else {
4656                 dev_info(adev->dev, "GPU psp mode1 reset\n");
4657                 ret = psp_gpu_reset(adev);
4658         }
4659 
4660         if (ret)
4661                 dev_err(adev->dev, "GPU mode1 reset failed\n");
4662 
4663         amdgpu_device_load_pci_state(adev->pdev);
4664 
4665         /* wait for asic to come out of reset */
4666         for (i = 0; i < adev->usec_timeout; i++) {
4667                 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4668 
4669                 if (memsize != 0xffffffff)
4670                         break;
4671                 udelay(1);
4672         }
4673 
4674         amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4675         return ret;
4676 }
4677 
4678 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4679 				 struct amdgpu_reset_context *reset_context)
4680 {
4681 	int i, r = 0;
4682 	struct amdgpu_job *job = NULL;
4683 	bool need_full_reset =
4684 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4685 
4686 	if (reset_context->reset_req_dev == adev)
4687 		job = reset_context->job;
4688 
4689 	if (amdgpu_sriov_vf(adev)) {
4690 		/* stop the data exchange thread */
4691 		amdgpu_virt_fini_data_exchange(adev);
4692 	}
4693 
4694 	amdgpu_fence_driver_isr_toggle(adev, true);
4695 
4696 	/* block all schedulers and reset given job's ring */
4697 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4698 		struct amdgpu_ring *ring = adev->rings[i];
4699 
4700 		if (!ring || !ring->sched.thread)
4701 			continue;
4702 
4703 		/*clear job fence from fence drv to avoid force_completion
4704 		 *leave NULL and vm flush fence in fence drv */
4705 		amdgpu_fence_driver_clear_job_fences(ring);
4706 
4707 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4708 		amdgpu_fence_driver_force_completion(ring);
4709 	}
4710 
4711 	amdgpu_fence_driver_isr_toggle(adev, false);
4712 
4713 	if (job && job->vm)
4714 		drm_sched_increase_karma(&job->base);
4715 
4716 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
4717 	/* If reset handler not implemented, continue; otherwise return */
4718 	if (r == -ENOSYS)
4719 		r = 0;
4720 	else
4721 		return r;
4722 
4723 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
4724 	if (!amdgpu_sriov_vf(adev)) {
4725 
4726 		if (!need_full_reset)
4727 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4728 
4729 		if (!need_full_reset && amdgpu_gpu_recovery) {
4730 			amdgpu_device_ip_pre_soft_reset(adev);
4731 			r = amdgpu_device_ip_soft_reset(adev);
4732 			amdgpu_device_ip_post_soft_reset(adev);
4733 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4734 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
4735 				need_full_reset = true;
4736 			}
4737 		}
4738 
4739 		if (need_full_reset)
4740 			r = amdgpu_device_ip_suspend(adev);
4741 		if (need_full_reset)
4742 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4743 		else
4744 			clear_bit(AMDGPU_NEED_FULL_RESET,
4745 				  &reset_context->flags);
4746 	}
4747 
4748 	return r;
4749 }
4750 
4751 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4752 {
4753 	int i;
4754 
4755 	lockdep_assert_held(&adev->reset_domain->sem);
4756 
4757 	for (i = 0; i < adev->num_regs; i++) {
4758 		adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4759 		trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4760 					     adev->reset_dump_reg_value[i]);
4761 	}
4762 
4763 	return 0;
4764 }
4765 
4766 #ifdef CONFIG_DEV_COREDUMP
4767 static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4768 		size_t count, void *data, size_t datalen)
4769 {
4770 	struct drm_printer p;
4771 	struct amdgpu_device *adev = data;
4772 	struct drm_print_iterator iter;
4773 	int i;
4774 
4775 	iter.data = buffer;
4776 	iter.offset = 0;
4777 	iter.start = offset;
4778 	iter.remain = count;
4779 
4780 	p = drm_coredump_printer(&iter);
4781 
4782 	drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4783 	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4784 	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4785 	drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4786 	if (adev->reset_task_info.pid)
4787 		drm_printf(&p, "process_name: %s PID: %d\n",
4788 			   adev->reset_task_info.process_name,
4789 			   adev->reset_task_info.pid);
4790 
4791 	if (adev->reset_vram_lost)
4792 		drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4793 	if (adev->num_regs) {
4794 		drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n");
4795 
4796 		for (i = 0; i < adev->num_regs; i++)
4797 			drm_printf(&p, "0x%08x: 0x%08x\n",
4798 				   adev->reset_dump_reg_list[i],
4799 				   adev->reset_dump_reg_value[i]);
4800 	}
4801 
4802 	return count - iter.remain;
4803 }
4804 
4805 static void amdgpu_devcoredump_free(void *data)
4806 {
4807 }
4808 
4809 static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4810 {
4811 	struct drm_device *dev = adev_to_drm(adev);
4812 
4813 	ktime_get_ts64(&adev->reset_time);
4814 	dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4815 		      amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4816 }
4817 #endif
4818 
4819 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4820 			 struct amdgpu_reset_context *reset_context)
4821 {
4822 	struct amdgpu_device *tmp_adev = NULL;
4823 	bool need_full_reset, skip_hw_reset, vram_lost = false;
4824 	int r = 0;
4825 	bool gpu_reset_for_dev_remove = 0;
4826 
4827 	/* Try reset handler method first */
4828 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4829 				    reset_list);
4830 	amdgpu_reset_reg_dumps(tmp_adev);
4831 
4832 	reset_context->reset_device_list = device_list_handle;
4833 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
4834 	/* If reset handler not implemented, continue; otherwise return */
4835 	if (r == -ENOSYS)
4836 		r = 0;
4837 	else
4838 		return r;
4839 
4840 	/* Reset handler not implemented, use the default method */
4841 	need_full_reset =
4842 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4843 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4844 
4845 	gpu_reset_for_dev_remove =
4846 		test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4847 			test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4848 
4849 	/*
4850 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
4851 	 * to allow proper links negotiation in FW (within 1 sec)
4852 	 */
4853 	if (!skip_hw_reset && need_full_reset) {
4854 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4855 			/* For XGMI run all resets in parallel to speed up the process */
4856 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4857 				tmp_adev->gmc.xgmi.pending_reset = false;
4858 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4859 					r = -EALREADY;
4860 			} else
4861 				r = amdgpu_asic_reset(tmp_adev);
4862 
4863 			if (r) {
4864 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4865 					 r, adev_to_drm(tmp_adev)->unique);
4866 				break;
4867 			}
4868 		}
4869 
4870 		/* For XGMI wait for all resets to complete before proceed */
4871 		if (!r) {
4872 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4873 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4874 					flush_work(&tmp_adev->xgmi_reset_work);
4875 					r = tmp_adev->asic_reset_res;
4876 					if (r)
4877 						break;
4878 				}
4879 			}
4880 		}
4881 	}
4882 
4883 	if (!r && amdgpu_ras_intr_triggered()) {
4884 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4885 			if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4886 			    tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4887 				tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
4888 		}
4889 
4890 		amdgpu_ras_intr_cleared();
4891 	}
4892 
4893 	/* Since the mode1 reset affects base ip blocks, the
4894 	 * phase1 ip blocks need to be resumed. Otherwise there
4895 	 * will be a BIOS signature error and the psp bootloader
4896 	 * can't load kdb on the next amdgpu install.
4897 	 */
4898 	if (gpu_reset_for_dev_remove) {
4899 		list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4900 			amdgpu_device_ip_resume_phase1(tmp_adev);
4901 
4902 		goto end;
4903 	}
4904 
4905 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4906 		if (need_full_reset) {
4907 			/* post card */
4908 			r = amdgpu_device_asic_init(tmp_adev);
4909 			if (r) {
4910 				dev_warn(tmp_adev->dev, "asic atom init failed!");
4911 			} else {
4912 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4913 				r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4914 				if (r)
4915 					goto out;
4916 
4917 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
4918 				if (r)
4919 					goto out;
4920 
4921 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4922 #ifdef CONFIG_DEV_COREDUMP
4923 				tmp_adev->reset_vram_lost = vram_lost;
4924 				memset(&tmp_adev->reset_task_info, 0,
4925 						sizeof(tmp_adev->reset_task_info));
4926 				if (reset_context->job && reset_context->job->vm)
4927 					tmp_adev->reset_task_info =
4928 						reset_context->job->vm->task_info;
4929 				amdgpu_reset_capture_coredumpm(tmp_adev);
4930 #endif
4931 				if (vram_lost) {
4932 					DRM_INFO("VRAM is lost due to GPU reset!\n");
4933 					amdgpu_inc_vram_lost(tmp_adev);
4934 				}
4935 
4936 				r = amdgpu_device_fw_loading(tmp_adev);
4937 				if (r)
4938 					return r;
4939 
4940 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
4941 				if (r)
4942 					goto out;
4943 
4944 				if (vram_lost)
4945 					amdgpu_device_fill_reset_magic(tmp_adev);
4946 
4947 				/*
4948 				 * Add this ASIC as tracked as reset was already
4949 				 * complete successfully.
4950 				 */
4951 				amdgpu_register_gpu_instance(tmp_adev);
4952 
4953 				if (!reset_context->hive &&
4954 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4955 					amdgpu_xgmi_add_device(tmp_adev);
4956 
4957 				r = amdgpu_device_ip_late_init(tmp_adev);
4958 				if (r)
4959 					goto out;
4960 
4961 				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
4962 
4963 				/*
4964 				 * The GPU enters bad state once faulty pages
4965 				 * by ECC has reached the threshold, and ras
4966 				 * recovery is scheduled next. So add one check
4967 				 * here to break recovery if it indeed exceeds
4968 				 * bad page threshold, and remind user to
4969 				 * retire this GPU or setting one bigger
4970 				 * bad_page_threshold value to fix this once
4971 				 * probing driver again.
4972 				 */
4973 				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
4974 					/* must succeed. */
4975 					amdgpu_ras_resume(tmp_adev);
4976 				} else {
4977 					r = -EINVAL;
4978 					goto out;
4979 				}
4980 
4981 				/* Update PSP FW topology after reset */
4982 				if (reset_context->hive &&
4983 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4984 					r = amdgpu_xgmi_update_topology(
4985 						reset_context->hive, tmp_adev);
4986 			}
4987 		}
4988 
4989 out:
4990 		if (!r) {
4991 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4992 			r = amdgpu_ib_ring_tests(tmp_adev);
4993 			if (r) {
4994 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4995 				need_full_reset = true;
4996 				r = -EAGAIN;
4997 				goto end;
4998 			}
4999 		}
5000 
5001 		if (!r)
5002 			r = amdgpu_device_recover_vram(tmp_adev);
5003 		else
5004 			tmp_adev->asic_reset_res = r;
5005 	}
5006 
5007 end:
5008 	if (need_full_reset)
5009 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5010 	else
5011 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5012 	return r;
5013 }
5014 
5015 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5016 {
5017 
5018 	switch (amdgpu_asic_reset_method(adev)) {
5019 	case AMD_RESET_METHOD_MODE1:
5020 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5021 		break;
5022 	case AMD_RESET_METHOD_MODE2:
5023 		adev->mp1_state = PP_MP1_STATE_RESET;
5024 		break;
5025 	default:
5026 		adev->mp1_state = PP_MP1_STATE_NONE;
5027 		break;
5028 	}
5029 }
5030 
5031 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5032 {
5033 	amdgpu_vf_error_trans_all(adev);
5034 	adev->mp1_state = PP_MP1_STATE_NONE;
5035 }
5036 
5037 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5038 {
5039 	struct pci_dev *p = NULL;
5040 
5041 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5042 			adev->pdev->bus->number, 1);
5043 	if (p) {
5044 		pm_runtime_enable(&(p->dev));
5045 		pm_runtime_resume(&(p->dev));
5046 	}
5047 }
5048 
5049 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5050 {
5051 	enum amd_reset_method reset_method;
5052 	struct pci_dev *p = NULL;
5053 	u64 expires;
5054 
5055 	/*
5056 	 * For now, only BACO and mode1 reset are confirmed
5057 	 * to suffer the audio issue without proper suspended.
5058 	 */
5059 	reset_method = amdgpu_asic_reset_method(adev);
5060 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5061 	     (reset_method != AMD_RESET_METHOD_MODE1))
5062 		return -EINVAL;
5063 
5064 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5065 			adev->pdev->bus->number, 1);
5066 	if (!p)
5067 		return -ENODEV;
5068 
5069 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5070 	if (!expires)
5071 		/*
5072 		 * If we cannot get the audio device autosuspend delay,
5073 		 * a fixed 4S interval will be used. Considering 3S is
5074 		 * the audio controller default autosuspend delay setting.
5075 		 * 4S used here is guaranteed to cover that.
5076 		 */
5077 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5078 
5079 	while (!pm_runtime_status_suspended(&(p->dev))) {
5080 		if (!pm_runtime_suspend(&(p->dev)))
5081 			break;
5082 
5083 		if (expires < ktime_get_mono_fast_ns()) {
5084 			dev_warn(adev->dev, "failed to suspend display audio\n");
5085 			/* TODO: abort the succeeding gpu reset? */
5086 			return -ETIMEDOUT;
5087 		}
5088 	}
5089 
5090 	pm_runtime_disable(&(p->dev));
5091 
5092 	return 0;
5093 }
5094 
5095 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5096 {
5097 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5098 
5099 #if defined(CONFIG_DEBUG_FS)
5100 	if (!amdgpu_sriov_vf(adev))
5101 		cancel_work(&adev->reset_work);
5102 #endif
5103 
5104 	if (adev->kfd.dev)
5105 		cancel_work(&adev->kfd.reset_work);
5106 
5107 	if (amdgpu_sriov_vf(adev))
5108 		cancel_work(&adev->virt.flr_work);
5109 
5110 	if (con && adev->ras_enabled)
5111 		cancel_work(&con->recovery_work);
5112 
5113 }
5114 
5115 /**
5116  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5117  *
5118  * @adev: amdgpu_device pointer
5119  * @job: which job trigger hang
5120  *
5121  * Attempt to reset the GPU if it has hung (all asics).
5122  * Attempt to do soft-reset or full-reset and reinitialize Asic
5123  * Returns 0 for success or an error on failure.
5124  */
5125 
5126 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5127 			      struct amdgpu_job *job,
5128 			      struct amdgpu_reset_context *reset_context)
5129 {
5130 	struct list_head device_list, *device_list_handle =  NULL;
5131 	bool job_signaled = false;
5132 	struct amdgpu_hive_info *hive = NULL;
5133 	struct amdgpu_device *tmp_adev = NULL;
5134 	int i, r = 0;
5135 	bool need_emergency_restart = false;
5136 	bool audio_suspended = false;
5137 	bool gpu_reset_for_dev_remove = false;
5138 
5139 	gpu_reset_for_dev_remove =
5140 			test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5141 				test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5142 
5143 	/*
5144 	 * Special case: RAS triggered and full reset isn't supported
5145 	 */
5146 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5147 
5148 	/*
5149 	 * Flush RAM to disk so that after reboot
5150 	 * the user can read log and see why the system rebooted.
5151 	 */
5152 	if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
5153 		DRM_WARN("Emergency reboot.");
5154 
5155 		ksys_sync_helper();
5156 		emergency_restart();
5157 	}
5158 
5159 	dev_info(adev->dev, "GPU %s begin!\n",
5160 		need_emergency_restart ? "jobs stop":"reset");
5161 
5162 	if (!amdgpu_sriov_vf(adev))
5163 		hive = amdgpu_get_xgmi_hive(adev);
5164 	if (hive)
5165 		mutex_lock(&hive->hive_lock);
5166 
5167 	reset_context->job = job;
5168 	reset_context->hive = hive;
5169 	/*
5170 	 * Build list of devices to reset.
5171 	 * In case we are in XGMI hive mode, resort the device list
5172 	 * to put adev in the 1st position.
5173 	 */
5174 	INIT_LIST_HEAD(&device_list);
5175 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5176 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5177 			list_add_tail(&tmp_adev->reset_list, &device_list);
5178 			if (gpu_reset_for_dev_remove && adev->shutdown)
5179 				tmp_adev->shutdown = true;
5180 		}
5181 		if (!list_is_first(&adev->reset_list, &device_list))
5182 			list_rotate_to_front(&adev->reset_list, &device_list);
5183 		device_list_handle = &device_list;
5184 	} else {
5185 		list_add_tail(&adev->reset_list, &device_list);
5186 		device_list_handle = &device_list;
5187 	}
5188 
5189 	/* We need to lock reset domain only once both for XGMI and single device */
5190 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5191 				    reset_list);
5192 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5193 
5194 	/* block all schedulers and reset given job's ring */
5195 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5196 
5197 		amdgpu_device_set_mp1_state(tmp_adev);
5198 
5199 		/*
5200 		 * Try to put the audio codec into suspend state
5201 		 * before gpu reset started.
5202 		 *
5203 		 * Due to the power domain of the graphics device
5204 		 * is shared with AZ power domain. Without this,
5205 		 * we may change the audio hardware from behind
5206 		 * the audio driver's back. That will trigger
5207 		 * some audio codec errors.
5208 		 */
5209 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5210 			audio_suspended = true;
5211 
5212 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5213 
5214 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5215 
5216 		if (!amdgpu_sriov_vf(tmp_adev))
5217 			amdgpu_amdkfd_pre_reset(tmp_adev);
5218 
5219 		/*
5220 		 * Mark these ASICs to be reseted as untracked first
5221 		 * And add them back after reset completed
5222 		 */
5223 		amdgpu_unregister_gpu_instance(tmp_adev);
5224 
5225 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5226 
5227 		/* disable ras on ALL IPs */
5228 		if (!need_emergency_restart &&
5229 		      amdgpu_device_ip_need_full_reset(tmp_adev))
5230 			amdgpu_ras_suspend(tmp_adev);
5231 
5232 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5233 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5234 
5235 			if (!ring || !ring->sched.thread)
5236 				continue;
5237 
5238 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5239 
5240 			if (need_emergency_restart)
5241 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5242 		}
5243 		atomic_inc(&tmp_adev->gpu_reset_counter);
5244 	}
5245 
5246 	if (need_emergency_restart)
5247 		goto skip_sched_resume;
5248 
5249 	/*
5250 	 * Must check guilty signal here since after this point all old
5251 	 * HW fences are force signaled.
5252 	 *
5253 	 * job->base holds a reference to parent fence
5254 	 */
5255 	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5256 		job_signaled = true;
5257 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5258 		goto skip_hw_reset;
5259 	}
5260 
5261 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5262 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5263 		if (gpu_reset_for_dev_remove) {
5264 			/* Workaroud for ASICs need to disable SMC first */
5265 			amdgpu_device_smu_fini_early(tmp_adev);
5266 		}
5267 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5268 		/*TODO Should we stop ?*/
5269 		if (r) {
5270 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5271 				  r, adev_to_drm(tmp_adev)->unique);
5272 			tmp_adev->asic_reset_res = r;
5273 		}
5274 
5275 		/*
5276 		 * Drop all pending non scheduler resets. Scheduler resets
5277 		 * were already dropped during drm_sched_stop
5278 		 */
5279 		amdgpu_device_stop_pending_resets(tmp_adev);
5280 	}
5281 
5282 	/* Actual ASIC resets if needed.*/
5283 	/* Host driver will handle XGMI hive reset for SRIOV */
5284 	if (amdgpu_sriov_vf(adev)) {
5285 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
5286 		if (r)
5287 			adev->asic_reset_res = r;
5288 
5289 		/* Aldebaran supports ras in SRIOV, so need resume ras during reset */
5290 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5291 			amdgpu_ras_resume(adev);
5292 	} else {
5293 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5294 		if (r && r == -EAGAIN)
5295 			goto retry;
5296 
5297 		if (!r && gpu_reset_for_dev_remove)
5298 			goto recover_end;
5299 	}
5300 
5301 skip_hw_reset:
5302 
5303 	/* Post ASIC reset for all devs .*/
5304 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5305 
5306 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5307 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5308 
5309 			if (!ring || !ring->sched.thread)
5310 				continue;
5311 
5312 			drm_sched_start(&ring->sched, true);
5313 		}
5314 
5315 		if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
5316 			amdgpu_mes_self_test(tmp_adev);
5317 
5318 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
5319 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5320 		}
5321 
5322 		if (tmp_adev->asic_reset_res)
5323 			r = tmp_adev->asic_reset_res;
5324 
5325 		tmp_adev->asic_reset_res = 0;
5326 
5327 		if (r) {
5328 			/* bad news, how to tell it to userspace ? */
5329 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5330 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5331 		} else {
5332 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5333 			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5334 				DRM_WARN("smart shift update failed\n");
5335 		}
5336 	}
5337 
5338 skip_sched_resume:
5339 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5340 		/* unlock kfd: SRIOV would do it separately */
5341 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5342 			amdgpu_amdkfd_post_reset(tmp_adev);
5343 
5344 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5345 		 * need to bring up kfd here if it's not be initialized before
5346 		 */
5347 		if (!adev->kfd.init_complete)
5348 			amdgpu_amdkfd_device_init(adev);
5349 
5350 		if (audio_suspended)
5351 			amdgpu_device_resume_display_audio(tmp_adev);
5352 
5353 		amdgpu_device_unset_mp1_state(tmp_adev);
5354 
5355 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5356 	}
5357 
5358 recover_end:
5359 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5360 					    reset_list);
5361 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5362 
5363 	if (hive) {
5364 		mutex_unlock(&hive->hive_lock);
5365 		amdgpu_put_xgmi_hive(hive);
5366 	}
5367 
5368 	if (r)
5369 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5370 
5371 	atomic_set(&adev->reset_domain->reset_res, r);
5372 	return r;
5373 }
5374 
5375 /**
5376  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5377  *
5378  * @adev: amdgpu_device pointer
5379  *
5380  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5381  * and lanes) of the slot the device is in. Handles APUs and
5382  * virtualized environments where PCIE config space may not be available.
5383  */
5384 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5385 {
5386 	struct pci_dev *pdev;
5387 	enum pci_bus_speed speed_cap, platform_speed_cap;
5388 	enum pcie_link_width platform_link_width;
5389 
5390 	if (amdgpu_pcie_gen_cap)
5391 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5392 
5393 	if (amdgpu_pcie_lane_cap)
5394 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5395 
5396 	/* covers APUs as well */
5397 	if (pci_is_root_bus(adev->pdev->bus)) {
5398 		if (adev->pm.pcie_gen_mask == 0)
5399 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5400 		if (adev->pm.pcie_mlw_mask == 0)
5401 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5402 		return;
5403 	}
5404 
5405 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5406 		return;
5407 
5408 	pcie_bandwidth_available(adev->pdev, NULL,
5409 				 &platform_speed_cap, &platform_link_width);
5410 
5411 	if (adev->pm.pcie_gen_mask == 0) {
5412 		/* asic caps */
5413 		pdev = adev->pdev;
5414 		speed_cap = pcie_get_speed_cap(pdev);
5415 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5416 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5417 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5418 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5419 		} else {
5420 			if (speed_cap == PCIE_SPEED_32_0GT)
5421 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5422 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5423 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5424 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5425 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5426 			else if (speed_cap == PCIE_SPEED_16_0GT)
5427 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5428 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5429 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5430 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5431 			else if (speed_cap == PCIE_SPEED_8_0GT)
5432 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5433 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5434 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5435 			else if (speed_cap == PCIE_SPEED_5_0GT)
5436 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5437 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5438 			else
5439 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5440 		}
5441 		/* platform caps */
5442 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5443 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5444 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5445 		} else {
5446 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
5447 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5448 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5449 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5450 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5451 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5452 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5453 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5454 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5455 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5456 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5457 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5458 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5459 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5460 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5461 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5462 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5463 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5464 			else
5465 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5466 
5467 		}
5468 	}
5469 	if (adev->pm.pcie_mlw_mask == 0) {
5470 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5471 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5472 		} else {
5473 			switch (platform_link_width) {
5474 			case PCIE_LNK_X32:
5475 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5476 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5477 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5478 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5479 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5480 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5481 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5482 				break;
5483 			case PCIE_LNK_X16:
5484 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5485 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5486 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5487 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5488 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5489 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5490 				break;
5491 			case PCIE_LNK_X12:
5492 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5493 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5494 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5495 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5496 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5497 				break;
5498 			case PCIE_LNK_X8:
5499 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5500 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5501 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5502 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5503 				break;
5504 			case PCIE_LNK_X4:
5505 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5506 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5507 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5508 				break;
5509 			case PCIE_LNK_X2:
5510 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5511 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5512 				break;
5513 			case PCIE_LNK_X1:
5514 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5515 				break;
5516 			default:
5517 				break;
5518 			}
5519 		}
5520 	}
5521 }
5522 
5523 /**
5524  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5525  *
5526  * @adev: amdgpu_device pointer
5527  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5528  *
5529  * Return true if @peer_adev can access (DMA) @adev through the PCIe
5530  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5531  * @peer_adev.
5532  */
5533 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5534 				      struct amdgpu_device *peer_adev)
5535 {
5536 #ifdef CONFIG_HSA_AMD_P2P
5537 	uint64_t address_mask = peer_adev->dev->dma_mask ?
5538 		~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5539 	resource_size_t aper_limit =
5540 		adev->gmc.aper_base + adev->gmc.aper_size - 1;
5541 	bool p2p_access =
5542 		!adev->gmc.xgmi.connected_to_cpu &&
5543 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
5544 
5545 	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5546 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5547 		!(adev->gmc.aper_base & address_mask ||
5548 		  aper_limit & address_mask));
5549 #else
5550 	return false;
5551 #endif
5552 }
5553 
5554 int amdgpu_device_baco_enter(struct drm_device *dev)
5555 {
5556 	struct amdgpu_device *adev = drm_to_adev(dev);
5557 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5558 
5559 	if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5560 		return -ENOTSUPP;
5561 
5562 	if (ras && adev->ras_enabled &&
5563 	    adev->nbio.funcs->enable_doorbell_interrupt)
5564 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5565 
5566 	return amdgpu_dpm_baco_enter(adev);
5567 }
5568 
5569 int amdgpu_device_baco_exit(struct drm_device *dev)
5570 {
5571 	struct amdgpu_device *adev = drm_to_adev(dev);
5572 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5573 	int ret = 0;
5574 
5575 	if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5576 		return -ENOTSUPP;
5577 
5578 	ret = amdgpu_dpm_baco_exit(adev);
5579 	if (ret)
5580 		return ret;
5581 
5582 	if (ras && adev->ras_enabled &&
5583 	    adev->nbio.funcs->enable_doorbell_interrupt)
5584 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5585 
5586 	if (amdgpu_passthrough(adev) &&
5587 	    adev->nbio.funcs->clear_doorbell_interrupt)
5588 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
5589 
5590 	return 0;
5591 }
5592 
5593 /**
5594  * amdgpu_pci_error_detected - Called when a PCI error is detected.
5595  * @pdev: PCI device struct
5596  * @state: PCI channel state
5597  *
5598  * Description: Called when a PCI error is detected.
5599  *
5600  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5601  */
5602 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5603 {
5604 	struct drm_device *dev = pci_get_drvdata(pdev);
5605 	struct amdgpu_device *adev = drm_to_adev(dev);
5606 	int i;
5607 
5608 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5609 
5610 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
5611 		DRM_WARN("No support for XGMI hive yet...");
5612 		return PCI_ERS_RESULT_DISCONNECT;
5613 	}
5614 
5615 	adev->pci_channel_state = state;
5616 
5617 	switch (state) {
5618 	case pci_channel_io_normal:
5619 		return PCI_ERS_RESULT_CAN_RECOVER;
5620 	/* Fatal error, prepare for slot reset */
5621 	case pci_channel_io_frozen:
5622 		/*
5623 		 * Locking adev->reset_domain->sem will prevent any external access
5624 		 * to GPU during PCI error recovery
5625 		 */
5626 		amdgpu_device_lock_reset_domain(adev->reset_domain);
5627 		amdgpu_device_set_mp1_state(adev);
5628 
5629 		/*
5630 		 * Block any work scheduling as we do for regular GPU reset
5631 		 * for the duration of the recovery
5632 		 */
5633 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5634 			struct amdgpu_ring *ring = adev->rings[i];
5635 
5636 			if (!ring || !ring->sched.thread)
5637 				continue;
5638 
5639 			drm_sched_stop(&ring->sched, NULL);
5640 		}
5641 		atomic_inc(&adev->gpu_reset_counter);
5642 		return PCI_ERS_RESULT_NEED_RESET;
5643 	case pci_channel_io_perm_failure:
5644 		/* Permanent error, prepare for device removal */
5645 		return PCI_ERS_RESULT_DISCONNECT;
5646 	}
5647 
5648 	return PCI_ERS_RESULT_NEED_RESET;
5649 }
5650 
5651 /**
5652  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5653  * @pdev: pointer to PCI device
5654  */
5655 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5656 {
5657 
5658 	DRM_INFO("PCI error: mmio enabled callback!!\n");
5659 
5660 	/* TODO - dump whatever for debugging purposes */
5661 
5662 	/* This called only if amdgpu_pci_error_detected returns
5663 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5664 	 * works, no need to reset slot.
5665 	 */
5666 
5667 	return PCI_ERS_RESULT_RECOVERED;
5668 }
5669 
5670 /**
5671  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5672  * @pdev: PCI device struct
5673  *
5674  * Description: This routine is called by the pci error recovery
5675  * code after the PCI slot has been reset, just before we
5676  * should resume normal operations.
5677  */
5678 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5679 {
5680 	struct drm_device *dev = pci_get_drvdata(pdev);
5681 	struct amdgpu_device *adev = drm_to_adev(dev);
5682 	int r, i;
5683 	struct amdgpu_reset_context reset_context;
5684 	u32 memsize;
5685 	struct list_head device_list;
5686 
5687 	DRM_INFO("PCI error: slot reset callback!!\n");
5688 
5689 	memset(&reset_context, 0, sizeof(reset_context));
5690 
5691 	INIT_LIST_HEAD(&device_list);
5692 	list_add_tail(&adev->reset_list, &device_list);
5693 
5694 	/* wait for asic to come out of reset */
5695 	msleep(500);
5696 
5697 	/* Restore PCI confspace */
5698 	amdgpu_device_load_pci_state(pdev);
5699 
5700 	/* confirm  ASIC came out of reset */
5701 	for (i = 0; i < adev->usec_timeout; i++) {
5702 		memsize = amdgpu_asic_get_config_memsize(adev);
5703 
5704 		if (memsize != 0xffffffff)
5705 			break;
5706 		udelay(1);
5707 	}
5708 	if (memsize == 0xffffffff) {
5709 		r = -ETIME;
5710 		goto out;
5711 	}
5712 
5713 	reset_context.method = AMD_RESET_METHOD_NONE;
5714 	reset_context.reset_req_dev = adev;
5715 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5716 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5717 
5718 	adev->no_hw_access = true;
5719 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
5720 	adev->no_hw_access = false;
5721 	if (r)
5722 		goto out;
5723 
5724 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
5725 
5726 out:
5727 	if (!r) {
5728 		if (amdgpu_device_cache_pci_state(adev->pdev))
5729 			pci_restore_state(adev->pdev);
5730 
5731 		DRM_INFO("PCIe error recovery succeeded\n");
5732 	} else {
5733 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
5734 		amdgpu_device_unset_mp1_state(adev);
5735 		amdgpu_device_unlock_reset_domain(adev->reset_domain);
5736 	}
5737 
5738 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5739 }
5740 
5741 /**
5742  * amdgpu_pci_resume() - resume normal ops after PCI reset
5743  * @pdev: pointer to PCI device
5744  *
5745  * Called when the error recovery driver tells us that its
5746  * OK to resume normal operation.
5747  */
5748 void amdgpu_pci_resume(struct pci_dev *pdev)
5749 {
5750 	struct drm_device *dev = pci_get_drvdata(pdev);
5751 	struct amdgpu_device *adev = drm_to_adev(dev);
5752 	int i;
5753 
5754 
5755 	DRM_INFO("PCI error: resume callback!!\n");
5756 
5757 	/* Only continue execution for the case of pci_channel_io_frozen */
5758 	if (adev->pci_channel_state != pci_channel_io_frozen)
5759 		return;
5760 
5761 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5762 		struct amdgpu_ring *ring = adev->rings[i];
5763 
5764 		if (!ring || !ring->sched.thread)
5765 			continue;
5766 
5767 		drm_sched_start(&ring->sched, true);
5768 	}
5769 
5770 	amdgpu_device_unset_mp1_state(adev);
5771 	amdgpu_device_unlock_reset_domain(adev->reset_domain);
5772 }
5773 
5774 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5775 {
5776 	struct drm_device *dev = pci_get_drvdata(pdev);
5777 	struct amdgpu_device *adev = drm_to_adev(dev);
5778 	int r;
5779 
5780 	r = pci_save_state(pdev);
5781 	if (!r) {
5782 		kfree(adev->pci_state);
5783 
5784 		adev->pci_state = pci_store_saved_state(pdev);
5785 
5786 		if (!adev->pci_state) {
5787 			DRM_ERROR("Failed to store PCI saved state");
5788 			return false;
5789 		}
5790 	} else {
5791 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
5792 		return false;
5793 	}
5794 
5795 	return true;
5796 }
5797 
5798 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5799 {
5800 	struct drm_device *dev = pci_get_drvdata(pdev);
5801 	struct amdgpu_device *adev = drm_to_adev(dev);
5802 	int r;
5803 
5804 	if (!adev->pci_state)
5805 		return false;
5806 
5807 	r = pci_load_saved_state(pdev, adev->pci_state);
5808 
5809 	if (!r) {
5810 		pci_restore_state(pdev);
5811 	} else {
5812 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
5813 		return false;
5814 	}
5815 
5816 	return true;
5817 }
5818 
5819 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5820 		struct amdgpu_ring *ring)
5821 {
5822 #ifdef CONFIG_X86_64
5823 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5824 		return;
5825 #endif
5826 	if (adev->gmc.xgmi.connected_to_cpu)
5827 		return;
5828 
5829 	if (ring && ring->funcs->emit_hdp_flush)
5830 		amdgpu_ring_emit_hdp_flush(ring);
5831 	else
5832 		amdgpu_asic_flush_hdp(adev, ring);
5833 }
5834 
5835 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5836 		struct amdgpu_ring *ring)
5837 {
5838 #ifdef CONFIG_X86_64
5839 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5840 		return;
5841 #endif
5842 	if (adev->gmc.xgmi.connected_to_cpu)
5843 		return;
5844 
5845 	amdgpu_asic_invalidate_hdp(adev, ring);
5846 }
5847 
5848 int amdgpu_in_reset(struct amdgpu_device *adev)
5849 {
5850 	return atomic_read(&adev->reset_domain->in_gpu_reset);
5851 	}
5852 
5853 /**
5854  * amdgpu_device_halt() - bring hardware to some kind of halt state
5855  *
5856  * @adev: amdgpu_device pointer
5857  *
5858  * Bring hardware to some kind of halt state so that no one can touch it
5859  * any more. It will help to maintain error context when error occurred.
5860  * Compare to a simple hang, the system will keep stable at least for SSH
5861  * access. Then it should be trivial to inspect the hardware state and
5862  * see what's going on. Implemented as following:
5863  *
5864  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5865  *    clears all CPU mappings to device, disallows remappings through page faults
5866  * 2. amdgpu_irq_disable_all() disables all interrupts
5867  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5868  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5869  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5870  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5871  *    flush any in flight DMA operations
5872  */
5873 void amdgpu_device_halt(struct amdgpu_device *adev)
5874 {
5875 	struct pci_dev *pdev = adev->pdev;
5876 	struct drm_device *ddev = adev_to_drm(adev);
5877 
5878 	drm_dev_unplug(ddev);
5879 
5880 	amdgpu_irq_disable_all(adev);
5881 
5882 	amdgpu_fence_driver_hw_fini(adev);
5883 
5884 	adev->no_hw_access = true;
5885 
5886 	amdgpu_device_unmap_mmio(adev);
5887 
5888 	pci_disable_device(pdev);
5889 	pci_wait_for_pending_transaction(pdev);
5890 }
5891 
5892 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5893 				u32 reg)
5894 {
5895 	unsigned long flags, address, data;
5896 	u32 r;
5897 
5898 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5899 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5900 
5901 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5902 	WREG32(address, reg * 4);
5903 	(void)RREG32(address);
5904 	r = RREG32(data);
5905 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5906 	return r;
5907 }
5908 
5909 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5910 				u32 reg, u32 v)
5911 {
5912 	unsigned long flags, address, data;
5913 
5914 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5915 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5916 
5917 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5918 	WREG32(address, reg * 4);
5919 	(void)RREG32(address);
5920 	WREG32(data, v);
5921 	(void)RREG32(data);
5922 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5923 }
5924 
5925 /**
5926  * amdgpu_device_switch_gang - switch to a new gang
5927  * @adev: amdgpu_device pointer
5928  * @gang: the gang to switch to
5929  *
5930  * Try to switch to a new gang.
5931  * Returns: NULL if we switched to the new gang or a reference to the current
5932  * gang leader.
5933  */
5934 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5935 					    struct dma_fence *gang)
5936 {
5937 	struct dma_fence *old = NULL;
5938 
5939 	do {
5940 		dma_fence_put(old);
5941 		rcu_read_lock();
5942 		old = dma_fence_get_rcu_safe(&adev->gang_submit);
5943 		rcu_read_unlock();
5944 
5945 		if (old == gang)
5946 			break;
5947 
5948 		if (!dma_fence_is_signaled(old))
5949 			return old;
5950 
5951 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
5952 			 old, gang) != old);
5953 
5954 	dma_fence_put(old);
5955 	return NULL;
5956 }
5957 
5958 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
5959 {
5960 	switch (adev->asic_type) {
5961 #ifdef CONFIG_DRM_AMDGPU_SI
5962 	case CHIP_HAINAN:
5963 #endif
5964 	case CHIP_TOPAZ:
5965 		/* chips with no display hardware */
5966 		return false;
5967 #ifdef CONFIG_DRM_AMDGPU_SI
5968 	case CHIP_TAHITI:
5969 	case CHIP_PITCAIRN:
5970 	case CHIP_VERDE:
5971 	case CHIP_OLAND:
5972 #endif
5973 #ifdef CONFIG_DRM_AMDGPU_CIK
5974 	case CHIP_BONAIRE:
5975 	case CHIP_HAWAII:
5976 	case CHIP_KAVERI:
5977 	case CHIP_KABINI:
5978 	case CHIP_MULLINS:
5979 #endif
5980 	case CHIP_TONGA:
5981 	case CHIP_FIJI:
5982 	case CHIP_POLARIS10:
5983 	case CHIP_POLARIS11:
5984 	case CHIP_POLARIS12:
5985 	case CHIP_VEGAM:
5986 	case CHIP_CARRIZO:
5987 	case CHIP_STONEY:
5988 		/* chips with display hardware */
5989 		return true;
5990 	default:
5991 		/* IP discovery */
5992 		if (!adev->ip_versions[DCE_HWIP][0] ||
5993 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
5994 			return false;
5995 		return true;
5996 	}
5997 }
5998