1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 
34 #include <drm/drm_atomic_helper.h>
35 #include <drm/drm_probe_helper.h>
36 #include <drm/amdgpu_drm.h>
37 #include <linux/vgaarb.h>
38 #include <linux/vga_switcheroo.h>
39 #include <linux/efi.h>
40 #include "amdgpu.h"
41 #include "amdgpu_trace.h"
42 #include "amdgpu_i2c.h"
43 #include "atom.h"
44 #include "amdgpu_atombios.h"
45 #include "amdgpu_atomfirmware.h"
46 #include "amd_pcie.h"
47 #ifdef CONFIG_DRM_AMDGPU_SI
48 #include "si.h"
49 #endif
50 #ifdef CONFIG_DRM_AMDGPU_CIK
51 #include "cik.h"
52 #endif
53 #include "vi.h"
54 #include "soc15.h"
55 #include "nv.h"
56 #include "bif/bif_4_1_d.h"
57 #include <linux/pci.h>
58 #include <linux/firmware.h>
59 #include "amdgpu_vf_error.h"
60 
61 #include "amdgpu_amdkfd.h"
62 #include "amdgpu_pm.h"
63 
64 #include "amdgpu_xgmi.h"
65 #include "amdgpu_ras.h"
66 #include "amdgpu_pmu.h"
67 #include "amdgpu_fru_eeprom.h"
68 
69 #include <linux/suspend.h>
70 #include <drm/task_barrier.h>
71 #include <linux/pm_runtime.h>
72 
73 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
75 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
76 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
77 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
78 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
79 MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
80 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
81 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
82 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
83 
84 #define AMDGPU_RESUME_MS		2000
85 
86 const char *amdgpu_asic_name[] = {
87 	"TAHITI",
88 	"PITCAIRN",
89 	"VERDE",
90 	"OLAND",
91 	"HAINAN",
92 	"BONAIRE",
93 	"KAVERI",
94 	"KABINI",
95 	"HAWAII",
96 	"MULLINS",
97 	"TOPAZ",
98 	"TONGA",
99 	"FIJI",
100 	"CARRIZO",
101 	"STONEY",
102 	"POLARIS10",
103 	"POLARIS11",
104 	"POLARIS12",
105 	"VEGAM",
106 	"VEGA10",
107 	"VEGA12",
108 	"VEGA20",
109 	"RAVEN",
110 	"ARCTURUS",
111 	"RENOIR",
112 	"NAVI10",
113 	"NAVI14",
114 	"NAVI12",
115 	"LAST",
116 };
117 
118 /**
119  * DOC: pcie_replay_count
120  *
121  * The amdgpu driver provides a sysfs API for reporting the total number
122  * of PCIe replays (NAKs)
123  * The file pcie_replay_count is used for this and returns the total
124  * number of replays as a sum of the NAKs generated and NAKs received
125  */
126 
127 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
128 		struct device_attribute *attr, char *buf)
129 {
130 	struct drm_device *ddev = dev_get_drvdata(dev);
131 	struct amdgpu_device *adev = ddev->dev_private;
132 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
133 
134 	return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
135 }
136 
137 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
138 		amdgpu_device_get_pcie_replay_count, NULL);
139 
140 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
141 
142 /**
143  * DOC: product_name
144  *
145  * The amdgpu driver provides a sysfs API for reporting the product name
146  * for the device
147  * The file serial_number is used for this and returns the product name
148  * as returned from the FRU.
149  * NOTE: This is only available for certain server cards
150  */
151 
152 static ssize_t amdgpu_device_get_product_name(struct device *dev,
153 		struct device_attribute *attr, char *buf)
154 {
155 	struct drm_device *ddev = dev_get_drvdata(dev);
156 	struct amdgpu_device *adev = ddev->dev_private;
157 
158 	return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
159 }
160 
161 static DEVICE_ATTR(product_name, S_IRUGO,
162 		amdgpu_device_get_product_name, NULL);
163 
164 /**
165  * DOC: product_number
166  *
167  * The amdgpu driver provides a sysfs API for reporting the part number
168  * for the device
169  * The file serial_number is used for this and returns the part number
170  * as returned from the FRU.
171  * NOTE: This is only available for certain server cards
172  */
173 
174 static ssize_t amdgpu_device_get_product_number(struct device *dev,
175 		struct device_attribute *attr, char *buf)
176 {
177 	struct drm_device *ddev = dev_get_drvdata(dev);
178 	struct amdgpu_device *adev = ddev->dev_private;
179 
180 	return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
181 }
182 
183 static DEVICE_ATTR(product_number, S_IRUGO,
184 		amdgpu_device_get_product_number, NULL);
185 
186 /**
187  * DOC: serial_number
188  *
189  * The amdgpu driver provides a sysfs API for reporting the serial number
190  * for the device
191  * The file serial_number is used for this and returns the serial number
192  * as returned from the FRU.
193  * NOTE: This is only available for certain server cards
194  */
195 
196 static ssize_t amdgpu_device_get_serial_number(struct device *dev,
197 		struct device_attribute *attr, char *buf)
198 {
199 	struct drm_device *ddev = dev_get_drvdata(dev);
200 	struct amdgpu_device *adev = ddev->dev_private;
201 
202 	return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
203 }
204 
205 static DEVICE_ATTR(serial_number, S_IRUGO,
206 		amdgpu_device_get_serial_number, NULL);
207 
208 /**
209  * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
210  *
211  * @dev: drm_device pointer
212  *
213  * Returns true if the device is a dGPU with HG/PX power control,
214  * otherwise return false.
215  */
216 bool amdgpu_device_supports_boco(struct drm_device *dev)
217 {
218 	struct amdgpu_device *adev = dev->dev_private;
219 
220 	if (adev->flags & AMD_IS_PX)
221 		return true;
222 	return false;
223 }
224 
225 /**
226  * amdgpu_device_supports_baco - Does the device support BACO
227  *
228  * @dev: drm_device pointer
229  *
230  * Returns true if the device supporte BACO,
231  * otherwise return false.
232  */
233 bool amdgpu_device_supports_baco(struct drm_device *dev)
234 {
235 	struct amdgpu_device *adev = dev->dev_private;
236 
237 	return amdgpu_asic_supports_baco(adev);
238 }
239 
240 /**
241  * VRAM access helper functions.
242  *
243  * amdgpu_device_vram_access - read/write a buffer in vram
244  *
245  * @adev: amdgpu_device pointer
246  * @pos: offset of the buffer in vram
247  * @buf: virtual address of the buffer in system memory
248  * @size: read/write size, sizeof(@buf) must > @size
249  * @write: true - write to vram, otherwise - read from vram
250  */
251 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
252 			       uint32_t *buf, size_t size, bool write)
253 {
254 	unsigned long flags;
255 	uint32_t hi = ~0;
256 	uint64_t last;
257 
258 
259 #ifdef CONFIG_64BIT
260 	last = min(pos + size, adev->gmc.visible_vram_size);
261 	if (last > pos) {
262 		void __iomem *addr = adev->mman.aper_base_kaddr + pos;
263 		size_t count = last - pos;
264 
265 		if (write) {
266 			memcpy_toio(addr, buf, count);
267 			mb();
268 			amdgpu_asic_flush_hdp(adev, NULL);
269 		} else {
270 			amdgpu_asic_invalidate_hdp(adev, NULL);
271 			mb();
272 			memcpy_fromio(buf, addr, count);
273 		}
274 
275 		if (count == size)
276 			return;
277 
278 		pos += count;
279 		buf += count / 4;
280 		size -= count;
281 	}
282 #endif
283 
284 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
285 	for (last = pos + size; pos < last; pos += 4) {
286 		uint32_t tmp = pos >> 31;
287 
288 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
289 		if (tmp != hi) {
290 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
291 			hi = tmp;
292 		}
293 		if (write)
294 			WREG32_NO_KIQ(mmMM_DATA, *buf++);
295 		else
296 			*buf++ = RREG32_NO_KIQ(mmMM_DATA);
297 	}
298 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
299 }
300 
301 /*
302  * device register access helper functions.
303  */
304 /**
305  * amdgpu_device_rreg - read a register
306  *
307  * @adev: amdgpu_device pointer
308  * @reg: dword aligned register offset
309  * @acc_flags: access flags which require special behavior
310  *
311  * Returns the 32 bit value from the offset specified.
312  */
313 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
314 			    uint32_t acc_flags)
315 {
316 	uint32_t ret;
317 
318 	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
319 		return amdgpu_kiq_rreg(adev, reg);
320 
321 	if ((reg * 4) < adev->rmmio_size)
322 		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
323 	else
324 		ret = adev->pcie_rreg(adev, (reg * 4));
325 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
326 	return ret;
327 }
328 
329 /*
330  * MMIO register read with bytes helper functions
331  * @offset:bytes offset from MMIO start
332  *
333 */
334 
335 /**
336  * amdgpu_mm_rreg8 - read a memory mapped IO register
337  *
338  * @adev: amdgpu_device pointer
339  * @offset: byte aligned register offset
340  *
341  * Returns the 8 bit value from the offset specified.
342  */
343 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
344 	if (offset < adev->rmmio_size)
345 		return (readb(adev->rmmio + offset));
346 	BUG();
347 }
348 
349 /*
350  * MMIO register write with bytes helper functions
351  * @offset:bytes offset from MMIO start
352  * @value: the value want to be written to the register
353  *
354 */
355 /**
356  * amdgpu_mm_wreg8 - read a memory mapped IO register
357  *
358  * @adev: amdgpu_device pointer
359  * @offset: byte aligned register offset
360  * @value: 8 bit value to write
361  *
362  * Writes the value specified to the offset specified.
363  */
364 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
365 	if (offset < adev->rmmio_size)
366 		writeb(value, adev->rmmio + offset);
367 	else
368 		BUG();
369 }
370 
371 void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg,
372 					     uint32_t v, uint32_t acc_flags)
373 {
374 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
375 
376 	if ((reg * 4) < adev->rmmio_size)
377 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
378 	else
379 		adev->pcie_wreg(adev, (reg * 4), v);
380 }
381 
382 /**
383  * amdgpu_device_wreg - write to a register
384  *
385  * @adev: amdgpu_device pointer
386  * @reg: dword aligned register offset
387  * @v: 32 bit value to write to the register
388  * @acc_flags: access flags which require special behavior
389  *
390  * Writes the value specified to the offset specified.
391  */
392 void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
393 			uint32_t acc_flags)
394 {
395 	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
396 		return amdgpu_kiq_wreg(adev, reg, v);
397 
398 	amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
399 }
400 
401 /*
402  * amdgpu_mm_wreg_mmio_rlc -  write register either with mmio or with RLC path if in range
403  *
404  * this function is invoked only the debugfs register access
405  * */
406 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
407 		    uint32_t acc_flags)
408 {
409 	if (amdgpu_sriov_fullaccess(adev) &&
410 		adev->gfx.rlc.funcs &&
411 		adev->gfx.rlc.funcs->is_rlcg_access_range) {
412 
413 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
414 			return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
415 	}
416 
417 	amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
418 }
419 
420 /**
421  * amdgpu_io_rreg - read an IO register
422  *
423  * @adev: amdgpu_device pointer
424  * @reg: dword aligned register offset
425  *
426  * Returns the 32 bit value from the offset specified.
427  */
428 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
429 {
430 	if ((reg * 4) < adev->rio_mem_size)
431 		return ioread32(adev->rio_mem + (reg * 4));
432 	else {
433 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
434 		return ioread32(adev->rio_mem + (mmMM_DATA * 4));
435 	}
436 }
437 
438 /**
439  * amdgpu_io_wreg - write to an IO register
440  *
441  * @adev: amdgpu_device pointer
442  * @reg: dword aligned register offset
443  * @v: 32 bit value to write to the register
444  *
445  * Writes the value specified to the offset specified.
446  */
447 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
448 {
449 	if ((reg * 4) < adev->rio_mem_size)
450 		iowrite32(v, adev->rio_mem + (reg * 4));
451 	else {
452 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
453 		iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
454 	}
455 }
456 
457 /**
458  * amdgpu_mm_rdoorbell - read a doorbell dword
459  *
460  * @adev: amdgpu_device pointer
461  * @index: doorbell index
462  *
463  * Returns the value in the doorbell aperture at the
464  * requested doorbell index (CIK).
465  */
466 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
467 {
468 	if (index < adev->doorbell.num_doorbells) {
469 		return readl(adev->doorbell.ptr + index);
470 	} else {
471 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
472 		return 0;
473 	}
474 }
475 
476 /**
477  * amdgpu_mm_wdoorbell - write a doorbell dword
478  *
479  * @adev: amdgpu_device pointer
480  * @index: doorbell index
481  * @v: value to write
482  *
483  * Writes @v to the doorbell aperture at the
484  * requested doorbell index (CIK).
485  */
486 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
487 {
488 	if (index < adev->doorbell.num_doorbells) {
489 		writel(v, adev->doorbell.ptr + index);
490 	} else {
491 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
492 	}
493 }
494 
495 /**
496  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
497  *
498  * @adev: amdgpu_device pointer
499  * @index: doorbell index
500  *
501  * Returns the value in the doorbell aperture at the
502  * requested doorbell index (VEGA10+).
503  */
504 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
505 {
506 	if (index < adev->doorbell.num_doorbells) {
507 		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
508 	} else {
509 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
510 		return 0;
511 	}
512 }
513 
514 /**
515  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
516  *
517  * @adev: amdgpu_device pointer
518  * @index: doorbell index
519  * @v: value to write
520  *
521  * Writes @v to the doorbell aperture at the
522  * requested doorbell index (VEGA10+).
523  */
524 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
525 {
526 	if (index < adev->doorbell.num_doorbells) {
527 		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
528 	} else {
529 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
530 	}
531 }
532 
533 /**
534  * amdgpu_invalid_rreg - dummy reg read function
535  *
536  * @adev: amdgpu device pointer
537  * @reg: offset of register
538  *
539  * Dummy register read function.  Used for register blocks
540  * that certain asics don't have (all asics).
541  * Returns the value in the register.
542  */
543 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
544 {
545 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
546 	BUG();
547 	return 0;
548 }
549 
550 /**
551  * amdgpu_invalid_wreg - dummy reg write function
552  *
553  * @adev: amdgpu device pointer
554  * @reg: offset of register
555  * @v: value to write to the register
556  *
557  * Dummy register read function.  Used for register blocks
558  * that certain asics don't have (all asics).
559  */
560 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
561 {
562 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
563 		  reg, v);
564 	BUG();
565 }
566 
567 /**
568  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
569  *
570  * @adev: amdgpu device pointer
571  * @reg: offset of register
572  *
573  * Dummy register read function.  Used for register blocks
574  * that certain asics don't have (all asics).
575  * Returns the value in the register.
576  */
577 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
578 {
579 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
580 	BUG();
581 	return 0;
582 }
583 
584 /**
585  * amdgpu_invalid_wreg64 - dummy reg write function
586  *
587  * @adev: amdgpu device pointer
588  * @reg: offset of register
589  * @v: value to write to the register
590  *
591  * Dummy register read function.  Used for register blocks
592  * that certain asics don't have (all asics).
593  */
594 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
595 {
596 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
597 		  reg, v);
598 	BUG();
599 }
600 
601 /**
602  * amdgpu_block_invalid_rreg - dummy reg read function
603  *
604  * @adev: amdgpu device pointer
605  * @block: offset of instance
606  * @reg: offset of register
607  *
608  * Dummy register read function.  Used for register blocks
609  * that certain asics don't have (all asics).
610  * Returns the value in the register.
611  */
612 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
613 					  uint32_t block, uint32_t reg)
614 {
615 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
616 		  reg, block);
617 	BUG();
618 	return 0;
619 }
620 
621 /**
622  * amdgpu_block_invalid_wreg - dummy reg write function
623  *
624  * @adev: amdgpu device pointer
625  * @block: offset of instance
626  * @reg: offset of register
627  * @v: value to write to the register
628  *
629  * Dummy register read function.  Used for register blocks
630  * that certain asics don't have (all asics).
631  */
632 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
633 				      uint32_t block,
634 				      uint32_t reg, uint32_t v)
635 {
636 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
637 		  reg, block, v);
638 	BUG();
639 }
640 
641 /**
642  * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
643  *
644  * @adev: amdgpu device pointer
645  *
646  * Allocates a scratch page of VRAM for use by various things in the
647  * driver.
648  */
649 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
650 {
651 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
652 				       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
653 				       &adev->vram_scratch.robj,
654 				       &adev->vram_scratch.gpu_addr,
655 				       (void **)&adev->vram_scratch.ptr);
656 }
657 
658 /**
659  * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
660  *
661  * @adev: amdgpu device pointer
662  *
663  * Frees the VRAM scratch page.
664  */
665 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
666 {
667 	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
668 }
669 
670 /**
671  * amdgpu_device_program_register_sequence - program an array of registers.
672  *
673  * @adev: amdgpu_device pointer
674  * @registers: pointer to the register array
675  * @array_size: size of the register array
676  *
677  * Programs an array or registers with and and or masks.
678  * This is a helper for setting golden registers.
679  */
680 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
681 					     const u32 *registers,
682 					     const u32 array_size)
683 {
684 	u32 tmp, reg, and_mask, or_mask;
685 	int i;
686 
687 	if (array_size % 3)
688 		return;
689 
690 	for (i = 0; i < array_size; i +=3) {
691 		reg = registers[i + 0];
692 		and_mask = registers[i + 1];
693 		or_mask = registers[i + 2];
694 
695 		if (and_mask == 0xffffffff) {
696 			tmp = or_mask;
697 		} else {
698 			tmp = RREG32(reg);
699 			tmp &= ~and_mask;
700 			if (adev->family >= AMDGPU_FAMILY_AI)
701 				tmp |= (or_mask & and_mask);
702 			else
703 				tmp |= or_mask;
704 		}
705 		WREG32(reg, tmp);
706 	}
707 }
708 
709 /**
710  * amdgpu_device_pci_config_reset - reset the GPU
711  *
712  * @adev: amdgpu_device pointer
713  *
714  * Resets the GPU using the pci config reset sequence.
715  * Only applicable to asics prior to vega10.
716  */
717 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
718 {
719 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
720 }
721 
722 /*
723  * GPU doorbell aperture helpers function.
724  */
725 /**
726  * amdgpu_device_doorbell_init - Init doorbell driver information.
727  *
728  * @adev: amdgpu_device pointer
729  *
730  * Init doorbell driver information (CIK)
731  * Returns 0 on success, error on failure.
732  */
733 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
734 {
735 
736 	/* No doorbell on SI hardware generation */
737 	if (adev->asic_type < CHIP_BONAIRE) {
738 		adev->doorbell.base = 0;
739 		adev->doorbell.size = 0;
740 		adev->doorbell.num_doorbells = 0;
741 		adev->doorbell.ptr = NULL;
742 		return 0;
743 	}
744 
745 	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
746 		return -EINVAL;
747 
748 	amdgpu_asic_init_doorbell_index(adev);
749 
750 	/* doorbell bar mapping */
751 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
752 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
753 
754 	adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
755 					     adev->doorbell_index.max_assignment+1);
756 	if (adev->doorbell.num_doorbells == 0)
757 		return -EINVAL;
758 
759 	/* For Vega, reserve and map two pages on doorbell BAR since SDMA
760 	 * paging queue doorbell use the second page. The
761 	 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
762 	 * doorbells are in the first page. So with paging queue enabled,
763 	 * the max num_doorbells should + 1 page (0x400 in dword)
764 	 */
765 	if (adev->asic_type >= CHIP_VEGA10)
766 		adev->doorbell.num_doorbells += 0x400;
767 
768 	adev->doorbell.ptr = ioremap(adev->doorbell.base,
769 				     adev->doorbell.num_doorbells *
770 				     sizeof(u32));
771 	if (adev->doorbell.ptr == NULL)
772 		return -ENOMEM;
773 
774 	return 0;
775 }
776 
777 /**
778  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
779  *
780  * @adev: amdgpu_device pointer
781  *
782  * Tear down doorbell driver information (CIK)
783  */
784 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
785 {
786 	iounmap(adev->doorbell.ptr);
787 	adev->doorbell.ptr = NULL;
788 }
789 
790 
791 
792 /*
793  * amdgpu_device_wb_*()
794  * Writeback is the method by which the GPU updates special pages in memory
795  * with the status of certain GPU events (fences, ring pointers,etc.).
796  */
797 
798 /**
799  * amdgpu_device_wb_fini - Disable Writeback and free memory
800  *
801  * @adev: amdgpu_device pointer
802  *
803  * Disables Writeback and frees the Writeback memory (all asics).
804  * Used at driver shutdown.
805  */
806 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
807 {
808 	if (adev->wb.wb_obj) {
809 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
810 				      &adev->wb.gpu_addr,
811 				      (void **)&adev->wb.wb);
812 		adev->wb.wb_obj = NULL;
813 	}
814 }
815 
816 /**
817  * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
818  *
819  * @adev: amdgpu_device pointer
820  *
821  * Initializes writeback and allocates writeback memory (all asics).
822  * Used at driver startup.
823  * Returns 0 on success or an -error on failure.
824  */
825 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
826 {
827 	int r;
828 
829 	if (adev->wb.wb_obj == NULL) {
830 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
831 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
832 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
833 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
834 					    (void **)&adev->wb.wb);
835 		if (r) {
836 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
837 			return r;
838 		}
839 
840 		adev->wb.num_wb = AMDGPU_MAX_WB;
841 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
842 
843 		/* clear wb memory */
844 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
845 	}
846 
847 	return 0;
848 }
849 
850 /**
851  * amdgpu_device_wb_get - Allocate a wb entry
852  *
853  * @adev: amdgpu_device pointer
854  * @wb: wb index
855  *
856  * Allocate a wb slot for use by the driver (all asics).
857  * Returns 0 on success or -EINVAL on failure.
858  */
859 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
860 {
861 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
862 
863 	if (offset < adev->wb.num_wb) {
864 		__set_bit(offset, adev->wb.used);
865 		*wb = offset << 3; /* convert to dw offset */
866 		return 0;
867 	} else {
868 		return -EINVAL;
869 	}
870 }
871 
872 /**
873  * amdgpu_device_wb_free - Free a wb entry
874  *
875  * @adev: amdgpu_device pointer
876  * @wb: wb index
877  *
878  * Free a wb slot allocated for use by the driver (all asics)
879  */
880 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
881 {
882 	wb >>= 3;
883 	if (wb < adev->wb.num_wb)
884 		__clear_bit(wb, adev->wb.used);
885 }
886 
887 /**
888  * amdgpu_device_resize_fb_bar - try to resize FB BAR
889  *
890  * @adev: amdgpu_device pointer
891  *
892  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
893  * to fail, but if any of the BARs is not accessible after the size we abort
894  * driver loading by returning -ENODEV.
895  */
896 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
897 {
898 	u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
899 	u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
900 	struct pci_bus *root;
901 	struct resource *res;
902 	unsigned i;
903 	u16 cmd;
904 	int r;
905 
906 	/* Bypass for VF */
907 	if (amdgpu_sriov_vf(adev))
908 		return 0;
909 
910 	/* Check if the root BUS has 64bit memory resources */
911 	root = adev->pdev->bus;
912 	while (root->parent)
913 		root = root->parent;
914 
915 	pci_bus_for_each_resource(root, res, i) {
916 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
917 		    res->start > 0x100000000ull)
918 			break;
919 	}
920 
921 	/* Trying to resize is pointless without a root hub window above 4GB */
922 	if (!res)
923 		return 0;
924 
925 	/* Disable memory decoding while we change the BAR addresses and size */
926 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
927 	pci_write_config_word(adev->pdev, PCI_COMMAND,
928 			      cmd & ~PCI_COMMAND_MEMORY);
929 
930 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
931 	amdgpu_device_doorbell_fini(adev);
932 	if (adev->asic_type >= CHIP_BONAIRE)
933 		pci_release_resource(adev->pdev, 2);
934 
935 	pci_release_resource(adev->pdev, 0);
936 
937 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
938 	if (r == -ENOSPC)
939 		DRM_INFO("Not enough PCI address space for a large BAR.");
940 	else if (r && r != -ENOTSUPP)
941 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
942 
943 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
944 
945 	/* When the doorbell or fb BAR isn't available we have no chance of
946 	 * using the device.
947 	 */
948 	r = amdgpu_device_doorbell_init(adev);
949 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
950 		return -ENODEV;
951 
952 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
953 
954 	return 0;
955 }
956 
957 /*
958  * GPU helpers function.
959  */
960 /**
961  * amdgpu_device_need_post - check if the hw need post or not
962  *
963  * @adev: amdgpu_device pointer
964  *
965  * Check if the asic has been initialized (all asics) at driver startup
966  * or post is needed if  hw reset is performed.
967  * Returns true if need or false if not.
968  */
969 bool amdgpu_device_need_post(struct amdgpu_device *adev)
970 {
971 	uint32_t reg;
972 
973 	if (amdgpu_sriov_vf(adev))
974 		return false;
975 
976 	if (amdgpu_passthrough(adev)) {
977 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
978 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
979 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
980 		 * vpost executed for smc version below 22.15
981 		 */
982 		if (adev->asic_type == CHIP_FIJI) {
983 			int err;
984 			uint32_t fw_ver;
985 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
986 			/* force vPost if error occured */
987 			if (err)
988 				return true;
989 
990 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
991 			if (fw_ver < 0x00160e00)
992 				return true;
993 		}
994 	}
995 
996 	if (adev->has_hw_reset) {
997 		adev->has_hw_reset = false;
998 		return true;
999 	}
1000 
1001 	/* bios scratch used on CIK+ */
1002 	if (adev->asic_type >= CHIP_BONAIRE)
1003 		return amdgpu_atombios_scratch_need_asic_init(adev);
1004 
1005 	/* check MEM_SIZE for older asics */
1006 	reg = amdgpu_asic_get_config_memsize(adev);
1007 
1008 	if ((reg != 0) && (reg != 0xffffffff))
1009 		return false;
1010 
1011 	return true;
1012 }
1013 
1014 /* if we get transitioned to only one device, take VGA back */
1015 /**
1016  * amdgpu_device_vga_set_decode - enable/disable vga decode
1017  *
1018  * @cookie: amdgpu_device pointer
1019  * @state: enable/disable vga decode
1020  *
1021  * Enable/disable vga decode (all asics).
1022  * Returns VGA resource flags.
1023  */
1024 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
1025 {
1026 	struct amdgpu_device *adev = cookie;
1027 	amdgpu_asic_set_vga_state(adev, state);
1028 	if (state)
1029 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1030 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1031 	else
1032 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1033 }
1034 
1035 /**
1036  * amdgpu_device_check_block_size - validate the vm block size
1037  *
1038  * @adev: amdgpu_device pointer
1039  *
1040  * Validates the vm block size specified via module parameter.
1041  * The vm block size defines number of bits in page table versus page directory,
1042  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1043  * page table and the remaining bits are in the page directory.
1044  */
1045 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1046 {
1047 	/* defines number of bits in page table versus page directory,
1048 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1049 	 * page table and the remaining bits are in the page directory */
1050 	if (amdgpu_vm_block_size == -1)
1051 		return;
1052 
1053 	if (amdgpu_vm_block_size < 9) {
1054 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1055 			 amdgpu_vm_block_size);
1056 		amdgpu_vm_block_size = -1;
1057 	}
1058 }
1059 
1060 /**
1061  * amdgpu_device_check_vm_size - validate the vm size
1062  *
1063  * @adev: amdgpu_device pointer
1064  *
1065  * Validates the vm size in GB specified via module parameter.
1066  * The VM size is the size of the GPU virtual memory space in GB.
1067  */
1068 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1069 {
1070 	/* no need to check the default value */
1071 	if (amdgpu_vm_size == -1)
1072 		return;
1073 
1074 	if (amdgpu_vm_size < 1) {
1075 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1076 			 amdgpu_vm_size);
1077 		amdgpu_vm_size = -1;
1078 	}
1079 }
1080 
1081 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1082 {
1083 	struct sysinfo si;
1084 	bool is_os_64 = (sizeof(void *) == 8);
1085 	uint64_t total_memory;
1086 	uint64_t dram_size_seven_GB = 0x1B8000000;
1087 	uint64_t dram_size_three_GB = 0xB8000000;
1088 
1089 	if (amdgpu_smu_memory_pool_size == 0)
1090 		return;
1091 
1092 	if (!is_os_64) {
1093 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1094 		goto def_value;
1095 	}
1096 	si_meminfo(&si);
1097 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1098 
1099 	if ((amdgpu_smu_memory_pool_size == 1) ||
1100 		(amdgpu_smu_memory_pool_size == 2)) {
1101 		if (total_memory < dram_size_three_GB)
1102 			goto def_value1;
1103 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1104 		(amdgpu_smu_memory_pool_size == 8)) {
1105 		if (total_memory < dram_size_seven_GB)
1106 			goto def_value1;
1107 	} else {
1108 		DRM_WARN("Smu memory pool size not supported\n");
1109 		goto def_value;
1110 	}
1111 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1112 
1113 	return;
1114 
1115 def_value1:
1116 	DRM_WARN("No enough system memory\n");
1117 def_value:
1118 	adev->pm.smu_prv_buffer_size = 0;
1119 }
1120 
1121 /**
1122  * amdgpu_device_check_arguments - validate module params
1123  *
1124  * @adev: amdgpu_device pointer
1125  *
1126  * Validates certain module parameters and updates
1127  * the associated values used by the driver (all asics).
1128  */
1129 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1130 {
1131 	if (amdgpu_sched_jobs < 4) {
1132 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1133 			 amdgpu_sched_jobs);
1134 		amdgpu_sched_jobs = 4;
1135 	} else if (!is_power_of_2(amdgpu_sched_jobs)){
1136 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1137 			 amdgpu_sched_jobs);
1138 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1139 	}
1140 
1141 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1142 		/* gart size must be greater or equal to 32M */
1143 		dev_warn(adev->dev, "gart size (%d) too small\n",
1144 			 amdgpu_gart_size);
1145 		amdgpu_gart_size = -1;
1146 	}
1147 
1148 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1149 		/* gtt size must be greater or equal to 32M */
1150 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1151 				 amdgpu_gtt_size);
1152 		amdgpu_gtt_size = -1;
1153 	}
1154 
1155 	/* valid range is between 4 and 9 inclusive */
1156 	if (amdgpu_vm_fragment_size != -1 &&
1157 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1158 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1159 		amdgpu_vm_fragment_size = -1;
1160 	}
1161 
1162 	amdgpu_device_check_smu_prv_buffer_size(adev);
1163 
1164 	amdgpu_device_check_vm_size(adev);
1165 
1166 	amdgpu_device_check_block_size(adev);
1167 
1168 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1169 
1170 	amdgpu_gmc_tmz_set(adev);
1171 
1172 	return 0;
1173 }
1174 
1175 /**
1176  * amdgpu_switcheroo_set_state - set switcheroo state
1177  *
1178  * @pdev: pci dev pointer
1179  * @state: vga_switcheroo state
1180  *
1181  * Callback for the switcheroo driver.  Suspends or resumes the
1182  * the asics before or after it is powered up using ACPI methods.
1183  */
1184 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1185 {
1186 	struct drm_device *dev = pci_get_drvdata(pdev);
1187 	int r;
1188 
1189 	if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
1190 		return;
1191 
1192 	if (state == VGA_SWITCHEROO_ON) {
1193 		pr_info("switched on\n");
1194 		/* don't suspend or resume card normally */
1195 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1196 
1197 		pci_set_power_state(dev->pdev, PCI_D0);
1198 		pci_restore_state(dev->pdev);
1199 		r = pci_enable_device(dev->pdev);
1200 		if (r)
1201 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1202 		amdgpu_device_resume(dev, true);
1203 
1204 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1205 		drm_kms_helper_poll_enable(dev);
1206 	} else {
1207 		pr_info("switched off\n");
1208 		drm_kms_helper_poll_disable(dev);
1209 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1210 		amdgpu_device_suspend(dev, true);
1211 		pci_save_state(dev->pdev);
1212 		/* Shut down the device */
1213 		pci_disable_device(dev->pdev);
1214 		pci_set_power_state(dev->pdev, PCI_D3cold);
1215 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1216 	}
1217 }
1218 
1219 /**
1220  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1221  *
1222  * @pdev: pci dev pointer
1223  *
1224  * Callback for the switcheroo driver.  Check of the switcheroo
1225  * state can be changed.
1226  * Returns true if the state can be changed, false if not.
1227  */
1228 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1229 {
1230 	struct drm_device *dev = pci_get_drvdata(pdev);
1231 
1232 	/*
1233 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1234 	* locking inversion with the driver load path. And the access here is
1235 	* completely racy anyway. So don't bother with locking for now.
1236 	*/
1237 	return atomic_read(&dev->open_count) == 0;
1238 }
1239 
1240 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1241 	.set_gpu_state = amdgpu_switcheroo_set_state,
1242 	.reprobe = NULL,
1243 	.can_switch = amdgpu_switcheroo_can_switch,
1244 };
1245 
1246 /**
1247  * amdgpu_device_ip_set_clockgating_state - set the CG state
1248  *
1249  * @dev: amdgpu_device pointer
1250  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1251  * @state: clockgating state (gate or ungate)
1252  *
1253  * Sets the requested clockgating state for all instances of
1254  * the hardware IP specified.
1255  * Returns the error code from the last instance.
1256  */
1257 int amdgpu_device_ip_set_clockgating_state(void *dev,
1258 					   enum amd_ip_block_type block_type,
1259 					   enum amd_clockgating_state state)
1260 {
1261 	struct amdgpu_device *adev = dev;
1262 	int i, r = 0;
1263 
1264 	for (i = 0; i < adev->num_ip_blocks; i++) {
1265 		if (!adev->ip_blocks[i].status.valid)
1266 			continue;
1267 		if (adev->ip_blocks[i].version->type != block_type)
1268 			continue;
1269 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1270 			continue;
1271 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1272 			(void *)adev, state);
1273 		if (r)
1274 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1275 				  adev->ip_blocks[i].version->funcs->name, r);
1276 	}
1277 	return r;
1278 }
1279 
1280 /**
1281  * amdgpu_device_ip_set_powergating_state - set the PG state
1282  *
1283  * @dev: amdgpu_device pointer
1284  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1285  * @state: powergating state (gate or ungate)
1286  *
1287  * Sets the requested powergating state for all instances of
1288  * the hardware IP specified.
1289  * Returns the error code from the last instance.
1290  */
1291 int amdgpu_device_ip_set_powergating_state(void *dev,
1292 					   enum amd_ip_block_type block_type,
1293 					   enum amd_powergating_state state)
1294 {
1295 	struct amdgpu_device *adev = dev;
1296 	int i, r = 0;
1297 
1298 	for (i = 0; i < adev->num_ip_blocks; i++) {
1299 		if (!adev->ip_blocks[i].status.valid)
1300 			continue;
1301 		if (adev->ip_blocks[i].version->type != block_type)
1302 			continue;
1303 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1304 			continue;
1305 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1306 			(void *)adev, state);
1307 		if (r)
1308 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1309 				  adev->ip_blocks[i].version->funcs->name, r);
1310 	}
1311 	return r;
1312 }
1313 
1314 /**
1315  * amdgpu_device_ip_get_clockgating_state - get the CG state
1316  *
1317  * @adev: amdgpu_device pointer
1318  * @flags: clockgating feature flags
1319  *
1320  * Walks the list of IPs on the device and updates the clockgating
1321  * flags for each IP.
1322  * Updates @flags with the feature flags for each hardware IP where
1323  * clockgating is enabled.
1324  */
1325 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1326 					    u32 *flags)
1327 {
1328 	int i;
1329 
1330 	for (i = 0; i < adev->num_ip_blocks; i++) {
1331 		if (!adev->ip_blocks[i].status.valid)
1332 			continue;
1333 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1334 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1335 	}
1336 }
1337 
1338 /**
1339  * amdgpu_device_ip_wait_for_idle - wait for idle
1340  *
1341  * @adev: amdgpu_device pointer
1342  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1343  *
1344  * Waits for the request hardware IP to be idle.
1345  * Returns 0 for success or a negative error code on failure.
1346  */
1347 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1348 				   enum amd_ip_block_type block_type)
1349 {
1350 	int i, r;
1351 
1352 	for (i = 0; i < adev->num_ip_blocks; i++) {
1353 		if (!adev->ip_blocks[i].status.valid)
1354 			continue;
1355 		if (adev->ip_blocks[i].version->type == block_type) {
1356 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1357 			if (r)
1358 				return r;
1359 			break;
1360 		}
1361 	}
1362 	return 0;
1363 
1364 }
1365 
1366 /**
1367  * amdgpu_device_ip_is_idle - is the hardware IP idle
1368  *
1369  * @adev: amdgpu_device pointer
1370  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1371  *
1372  * Check if the hardware IP is idle or not.
1373  * Returns true if it the IP is idle, false if not.
1374  */
1375 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1376 			      enum amd_ip_block_type block_type)
1377 {
1378 	int i;
1379 
1380 	for (i = 0; i < adev->num_ip_blocks; i++) {
1381 		if (!adev->ip_blocks[i].status.valid)
1382 			continue;
1383 		if (adev->ip_blocks[i].version->type == block_type)
1384 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1385 	}
1386 	return true;
1387 
1388 }
1389 
1390 /**
1391  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1392  *
1393  * @adev: amdgpu_device pointer
1394  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1395  *
1396  * Returns a pointer to the hardware IP block structure
1397  * if it exists for the asic, otherwise NULL.
1398  */
1399 struct amdgpu_ip_block *
1400 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1401 			      enum amd_ip_block_type type)
1402 {
1403 	int i;
1404 
1405 	for (i = 0; i < adev->num_ip_blocks; i++)
1406 		if (adev->ip_blocks[i].version->type == type)
1407 			return &adev->ip_blocks[i];
1408 
1409 	return NULL;
1410 }
1411 
1412 /**
1413  * amdgpu_device_ip_block_version_cmp
1414  *
1415  * @adev: amdgpu_device pointer
1416  * @type: enum amd_ip_block_type
1417  * @major: major version
1418  * @minor: minor version
1419  *
1420  * return 0 if equal or greater
1421  * return 1 if smaller or the ip_block doesn't exist
1422  */
1423 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1424 				       enum amd_ip_block_type type,
1425 				       u32 major, u32 minor)
1426 {
1427 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1428 
1429 	if (ip_block && ((ip_block->version->major > major) ||
1430 			((ip_block->version->major == major) &&
1431 			(ip_block->version->minor >= minor))))
1432 		return 0;
1433 
1434 	return 1;
1435 }
1436 
1437 /**
1438  * amdgpu_device_ip_block_add
1439  *
1440  * @adev: amdgpu_device pointer
1441  * @ip_block_version: pointer to the IP to add
1442  *
1443  * Adds the IP block driver information to the collection of IPs
1444  * on the asic.
1445  */
1446 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1447 			       const struct amdgpu_ip_block_version *ip_block_version)
1448 {
1449 	if (!ip_block_version)
1450 		return -EINVAL;
1451 
1452 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1453 		  ip_block_version->funcs->name);
1454 
1455 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1456 
1457 	return 0;
1458 }
1459 
1460 /**
1461  * amdgpu_device_enable_virtual_display - enable virtual display feature
1462  *
1463  * @adev: amdgpu_device pointer
1464  *
1465  * Enabled the virtual display feature if the user has enabled it via
1466  * the module parameter virtual_display.  This feature provides a virtual
1467  * display hardware on headless boards or in virtualized environments.
1468  * This function parses and validates the configuration string specified by
1469  * the user and configues the virtual display configuration (number of
1470  * virtual connectors, crtcs, etc.) specified.
1471  */
1472 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1473 {
1474 	adev->enable_virtual_display = false;
1475 
1476 	if (amdgpu_virtual_display) {
1477 		struct drm_device *ddev = adev->ddev;
1478 		const char *pci_address_name = pci_name(ddev->pdev);
1479 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1480 
1481 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1482 		pciaddstr_tmp = pciaddstr;
1483 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1484 			pciaddname = strsep(&pciaddname_tmp, ",");
1485 			if (!strcmp("all", pciaddname)
1486 			    || !strcmp(pci_address_name, pciaddname)) {
1487 				long num_crtc;
1488 				int res = -1;
1489 
1490 				adev->enable_virtual_display = true;
1491 
1492 				if (pciaddname_tmp)
1493 					res = kstrtol(pciaddname_tmp, 10,
1494 						      &num_crtc);
1495 
1496 				if (!res) {
1497 					if (num_crtc < 1)
1498 						num_crtc = 1;
1499 					if (num_crtc > 6)
1500 						num_crtc = 6;
1501 					adev->mode_info.num_crtc = num_crtc;
1502 				} else {
1503 					adev->mode_info.num_crtc = 1;
1504 				}
1505 				break;
1506 			}
1507 		}
1508 
1509 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1510 			 amdgpu_virtual_display, pci_address_name,
1511 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1512 
1513 		kfree(pciaddstr);
1514 	}
1515 }
1516 
1517 /**
1518  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1519  *
1520  * @adev: amdgpu_device pointer
1521  *
1522  * Parses the asic configuration parameters specified in the gpu info
1523  * firmware and makes them availale to the driver for use in configuring
1524  * the asic.
1525  * Returns 0 on success, -EINVAL on failure.
1526  */
1527 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1528 {
1529 	const char *chip_name;
1530 	char fw_name[30];
1531 	int err;
1532 	const struct gpu_info_firmware_header_v1_0 *hdr;
1533 
1534 	adev->firmware.gpu_info_fw = NULL;
1535 
1536 	switch (adev->asic_type) {
1537 	case CHIP_TOPAZ:
1538 	case CHIP_TONGA:
1539 	case CHIP_FIJI:
1540 	case CHIP_POLARIS10:
1541 	case CHIP_POLARIS11:
1542 	case CHIP_POLARIS12:
1543 	case CHIP_VEGAM:
1544 	case CHIP_CARRIZO:
1545 	case CHIP_STONEY:
1546 #ifdef CONFIG_DRM_AMDGPU_SI
1547 	case CHIP_VERDE:
1548 	case CHIP_TAHITI:
1549 	case CHIP_PITCAIRN:
1550 	case CHIP_OLAND:
1551 	case CHIP_HAINAN:
1552 #endif
1553 #ifdef CONFIG_DRM_AMDGPU_CIK
1554 	case CHIP_BONAIRE:
1555 	case CHIP_HAWAII:
1556 	case CHIP_KAVERI:
1557 	case CHIP_KABINI:
1558 	case CHIP_MULLINS:
1559 #endif
1560 	case CHIP_VEGA20:
1561 	default:
1562 		return 0;
1563 	case CHIP_VEGA10:
1564 		chip_name = "vega10";
1565 		break;
1566 	case CHIP_VEGA12:
1567 		chip_name = "vega12";
1568 		break;
1569 	case CHIP_RAVEN:
1570 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1571 			chip_name = "raven2";
1572 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1573 			chip_name = "picasso";
1574 		else
1575 			chip_name = "raven";
1576 		break;
1577 	case CHIP_ARCTURUS:
1578 		chip_name = "arcturus";
1579 		break;
1580 	case CHIP_RENOIR:
1581 		chip_name = "renoir";
1582 		break;
1583 	case CHIP_NAVI10:
1584 		chip_name = "navi10";
1585 		break;
1586 	case CHIP_NAVI14:
1587 		chip_name = "navi14";
1588 		break;
1589 	case CHIP_NAVI12:
1590 		chip_name = "navi12";
1591 		break;
1592 	}
1593 
1594 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1595 	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1596 	if (err) {
1597 		dev_err(adev->dev,
1598 			"Failed to load gpu_info firmware \"%s\"\n",
1599 			fw_name);
1600 		goto out;
1601 	}
1602 	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1603 	if (err) {
1604 		dev_err(adev->dev,
1605 			"Failed to validate gpu_info firmware \"%s\"\n",
1606 			fw_name);
1607 		goto out;
1608 	}
1609 
1610 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1611 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1612 
1613 	switch (hdr->version_major) {
1614 	case 1:
1615 	{
1616 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1617 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1618 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1619 
1620 		if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
1621 			amdgpu_discovery_get_gfx_info(adev);
1622 			goto parse_soc_bounding_box;
1623 		}
1624 
1625 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1626 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1627 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1628 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1629 		adev->gfx.config.max_texture_channel_caches =
1630 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1631 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1632 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1633 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1634 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1635 		adev->gfx.config.double_offchip_lds_buf =
1636 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1637 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1638 		adev->gfx.cu_info.max_waves_per_simd =
1639 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1640 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1641 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1642 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1643 		if (hdr->version_minor >= 1) {
1644 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1645 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1646 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1647 			adev->gfx.config.num_sc_per_sh =
1648 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1649 			adev->gfx.config.num_packer_per_sc =
1650 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1651 		}
1652 
1653 parse_soc_bounding_box:
1654 		/*
1655 		 * soc bounding box info is not integrated in disocovery table,
1656 		 * we always need to parse it from gpu info firmware.
1657 		 */
1658 		if (hdr->version_minor == 2) {
1659 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1660 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1661 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1662 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1663 		}
1664 		break;
1665 	}
1666 	default:
1667 		dev_err(adev->dev,
1668 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1669 		err = -EINVAL;
1670 		goto out;
1671 	}
1672 out:
1673 	return err;
1674 }
1675 
1676 /**
1677  * amdgpu_device_ip_early_init - run early init for hardware IPs
1678  *
1679  * @adev: amdgpu_device pointer
1680  *
1681  * Early initialization pass for hardware IPs.  The hardware IPs that make
1682  * up each asic are discovered each IP's early_init callback is run.  This
1683  * is the first stage in initializing the asic.
1684  * Returns 0 on success, negative error code on failure.
1685  */
1686 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1687 {
1688 	int i, r;
1689 
1690 	amdgpu_device_enable_virtual_display(adev);
1691 
1692 	switch (adev->asic_type) {
1693 	case CHIP_TOPAZ:
1694 	case CHIP_TONGA:
1695 	case CHIP_FIJI:
1696 	case CHIP_POLARIS10:
1697 	case CHIP_POLARIS11:
1698 	case CHIP_POLARIS12:
1699 	case CHIP_VEGAM:
1700 	case CHIP_CARRIZO:
1701 	case CHIP_STONEY:
1702 		if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
1703 			adev->family = AMDGPU_FAMILY_CZ;
1704 		else
1705 			adev->family = AMDGPU_FAMILY_VI;
1706 
1707 		r = vi_set_ip_blocks(adev);
1708 		if (r)
1709 			return r;
1710 		break;
1711 #ifdef CONFIG_DRM_AMDGPU_SI
1712 	case CHIP_VERDE:
1713 	case CHIP_TAHITI:
1714 	case CHIP_PITCAIRN:
1715 	case CHIP_OLAND:
1716 	case CHIP_HAINAN:
1717 		adev->family = AMDGPU_FAMILY_SI;
1718 		r = si_set_ip_blocks(adev);
1719 		if (r)
1720 			return r;
1721 		break;
1722 #endif
1723 #ifdef CONFIG_DRM_AMDGPU_CIK
1724 	case CHIP_BONAIRE:
1725 	case CHIP_HAWAII:
1726 	case CHIP_KAVERI:
1727 	case CHIP_KABINI:
1728 	case CHIP_MULLINS:
1729 		if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1730 			adev->family = AMDGPU_FAMILY_CI;
1731 		else
1732 			adev->family = AMDGPU_FAMILY_KV;
1733 
1734 		r = cik_set_ip_blocks(adev);
1735 		if (r)
1736 			return r;
1737 		break;
1738 #endif
1739 	case CHIP_VEGA10:
1740 	case CHIP_VEGA12:
1741 	case CHIP_VEGA20:
1742 	case CHIP_RAVEN:
1743 	case CHIP_ARCTURUS:
1744 	case CHIP_RENOIR:
1745 		if (adev->asic_type == CHIP_RAVEN ||
1746 		    adev->asic_type == CHIP_RENOIR)
1747 			adev->family = AMDGPU_FAMILY_RV;
1748 		else
1749 			adev->family = AMDGPU_FAMILY_AI;
1750 
1751 		r = soc15_set_ip_blocks(adev);
1752 		if (r)
1753 			return r;
1754 		break;
1755 	case  CHIP_NAVI10:
1756 	case  CHIP_NAVI14:
1757 	case  CHIP_NAVI12:
1758 		adev->family = AMDGPU_FAMILY_NV;
1759 
1760 		r = nv_set_ip_blocks(adev);
1761 		if (r)
1762 			return r;
1763 		break;
1764 	default:
1765 		/* FIXME: not supported yet */
1766 		return -EINVAL;
1767 	}
1768 
1769 	amdgpu_amdkfd_device_probe(adev);
1770 
1771 	if (amdgpu_sriov_vf(adev)) {
1772 		/* handle vbios stuff prior full access mode for new handshake */
1773 		if (adev->virt.req_init_data_ver == 1) {
1774 			if (!amdgpu_get_bios(adev)) {
1775 				DRM_ERROR("failed to get vbios\n");
1776 				return -EINVAL;
1777 			}
1778 
1779 			r = amdgpu_atombios_init(adev);
1780 			if (r) {
1781 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1782 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1783 				return r;
1784 			}
1785 		}
1786 	}
1787 
1788 	/* we need to send REQ_GPU here for legacy handshaker otherwise the vbios
1789 	 * will not be prepared by host for this VF */
1790 	if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
1791 		r = amdgpu_virt_request_full_gpu(adev, true);
1792 		if (r)
1793 			return r;
1794 	}
1795 
1796 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
1797 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
1798 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1799 
1800 	for (i = 0; i < adev->num_ip_blocks; i++) {
1801 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1802 			DRM_ERROR("disabled ip block: %d <%s>\n",
1803 				  i, adev->ip_blocks[i].version->funcs->name);
1804 			adev->ip_blocks[i].status.valid = false;
1805 		} else {
1806 			if (adev->ip_blocks[i].version->funcs->early_init) {
1807 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
1808 				if (r == -ENOENT) {
1809 					adev->ip_blocks[i].status.valid = false;
1810 				} else if (r) {
1811 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
1812 						  adev->ip_blocks[i].version->funcs->name, r);
1813 					return r;
1814 				} else {
1815 					adev->ip_blocks[i].status.valid = true;
1816 				}
1817 			} else {
1818 				adev->ip_blocks[i].status.valid = true;
1819 			}
1820 		}
1821 		/* get the vbios after the asic_funcs are set up */
1822 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1823 			r = amdgpu_device_parse_gpu_info_fw(adev);
1824 			if (r)
1825 				return r;
1826 
1827 			/* skip vbios handling for new handshake */
1828 			if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1)
1829 				continue;
1830 
1831 			/* Read BIOS */
1832 			if (!amdgpu_get_bios(adev))
1833 				return -EINVAL;
1834 
1835 			r = amdgpu_atombios_init(adev);
1836 			if (r) {
1837 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1838 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1839 				return r;
1840 			}
1841 		}
1842 	}
1843 
1844 	adev->cg_flags &= amdgpu_cg_mask;
1845 	adev->pg_flags &= amdgpu_pg_mask;
1846 
1847 	return 0;
1848 }
1849 
1850 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1851 {
1852 	int i, r;
1853 
1854 	for (i = 0; i < adev->num_ip_blocks; i++) {
1855 		if (!adev->ip_blocks[i].status.sw)
1856 			continue;
1857 		if (adev->ip_blocks[i].status.hw)
1858 			continue;
1859 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1860 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
1861 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1862 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1863 			if (r) {
1864 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1865 					  adev->ip_blocks[i].version->funcs->name, r);
1866 				return r;
1867 			}
1868 			adev->ip_blocks[i].status.hw = true;
1869 		}
1870 	}
1871 
1872 	return 0;
1873 }
1874 
1875 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1876 {
1877 	int i, r;
1878 
1879 	for (i = 0; i < adev->num_ip_blocks; i++) {
1880 		if (!adev->ip_blocks[i].status.sw)
1881 			continue;
1882 		if (adev->ip_blocks[i].status.hw)
1883 			continue;
1884 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1885 		if (r) {
1886 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1887 				  adev->ip_blocks[i].version->funcs->name, r);
1888 			return r;
1889 		}
1890 		adev->ip_blocks[i].status.hw = true;
1891 	}
1892 
1893 	return 0;
1894 }
1895 
1896 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1897 {
1898 	int r = 0;
1899 	int i;
1900 	uint32_t smu_version;
1901 
1902 	if (adev->asic_type >= CHIP_VEGA10) {
1903 		for (i = 0; i < adev->num_ip_blocks; i++) {
1904 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1905 				continue;
1906 
1907 			/* no need to do the fw loading again if already done*/
1908 			if (adev->ip_blocks[i].status.hw == true)
1909 				break;
1910 
1911 			if (adev->in_gpu_reset || adev->in_suspend) {
1912 				r = adev->ip_blocks[i].version->funcs->resume(adev);
1913 				if (r) {
1914 					DRM_ERROR("resume of IP block <%s> failed %d\n",
1915 							  adev->ip_blocks[i].version->funcs->name, r);
1916 					return r;
1917 				}
1918 			} else {
1919 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1920 				if (r) {
1921 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1922 							  adev->ip_blocks[i].version->funcs->name, r);
1923 					return r;
1924 				}
1925 			}
1926 
1927 			adev->ip_blocks[i].status.hw = true;
1928 			break;
1929 		}
1930 	}
1931 
1932 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1933 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
1934 
1935 	return r;
1936 }
1937 
1938 /**
1939  * amdgpu_device_ip_init - run init for hardware IPs
1940  *
1941  * @adev: amdgpu_device pointer
1942  *
1943  * Main initialization pass for hardware IPs.  The list of all the hardware
1944  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1945  * are run.  sw_init initializes the software state associated with each IP
1946  * and hw_init initializes the hardware associated with each IP.
1947  * Returns 0 on success, negative error code on failure.
1948  */
1949 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1950 {
1951 	int i, r;
1952 
1953 	r = amdgpu_ras_init(adev);
1954 	if (r)
1955 		return r;
1956 
1957 	if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
1958 		r = amdgpu_virt_request_full_gpu(adev, true);
1959 		if (r)
1960 			return -EAGAIN;
1961 	}
1962 
1963 	for (i = 0; i < adev->num_ip_blocks; i++) {
1964 		if (!adev->ip_blocks[i].status.valid)
1965 			continue;
1966 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
1967 		if (r) {
1968 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1969 				  adev->ip_blocks[i].version->funcs->name, r);
1970 			goto init_failed;
1971 		}
1972 		adev->ip_blocks[i].status.sw = true;
1973 
1974 		/* need to do gmc hw init early so we can allocate gpu mem */
1975 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
1976 			r = amdgpu_device_vram_scratch_init(adev);
1977 			if (r) {
1978 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
1979 				goto init_failed;
1980 			}
1981 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
1982 			if (r) {
1983 				DRM_ERROR("hw_init %d failed %d\n", i, r);
1984 				goto init_failed;
1985 			}
1986 			r = amdgpu_device_wb_init(adev);
1987 			if (r) {
1988 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
1989 				goto init_failed;
1990 			}
1991 			adev->ip_blocks[i].status.hw = true;
1992 
1993 			/* right after GMC hw init, we create CSA */
1994 			if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1995 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1996 								AMDGPU_GEM_DOMAIN_VRAM,
1997 								AMDGPU_CSA_SIZE);
1998 				if (r) {
1999 					DRM_ERROR("allocate CSA failed %d\n", r);
2000 					goto init_failed;
2001 				}
2002 			}
2003 		}
2004 	}
2005 
2006 	if (amdgpu_sriov_vf(adev))
2007 		amdgpu_virt_init_data_exchange(adev);
2008 
2009 	r = amdgpu_ib_pool_init(adev);
2010 	if (r) {
2011 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2012 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2013 		goto init_failed;
2014 	}
2015 
2016 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2017 	if (r)
2018 		goto init_failed;
2019 
2020 	r = amdgpu_device_ip_hw_init_phase1(adev);
2021 	if (r)
2022 		goto init_failed;
2023 
2024 	r = amdgpu_device_fw_loading(adev);
2025 	if (r)
2026 		goto init_failed;
2027 
2028 	r = amdgpu_device_ip_hw_init_phase2(adev);
2029 	if (r)
2030 		goto init_failed;
2031 
2032 	/*
2033 	 * retired pages will be loaded from eeprom and reserved here,
2034 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2035 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2036 	 * for I2C communication which only true at this point.
2037 	 * recovery_init may fail, but it can free all resources allocated by
2038 	 * itself and its failure should not stop amdgpu init process.
2039 	 *
2040 	 * Note: theoretically, this should be called before all vram allocations
2041 	 * to protect retired page from abusing
2042 	 */
2043 	amdgpu_ras_recovery_init(adev);
2044 
2045 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2046 		amdgpu_xgmi_add_device(adev);
2047 	amdgpu_amdkfd_device_init(adev);
2048 
2049 	amdgpu_fru_get_product_info(adev);
2050 
2051 init_failed:
2052 	if (amdgpu_sriov_vf(adev))
2053 		amdgpu_virt_release_full_gpu(adev, true);
2054 
2055 	return r;
2056 }
2057 
2058 /**
2059  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2060  *
2061  * @adev: amdgpu_device pointer
2062  *
2063  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2064  * this function before a GPU reset.  If the value is retained after a
2065  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2066  */
2067 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2068 {
2069 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2070 }
2071 
2072 /**
2073  * amdgpu_device_check_vram_lost - check if vram is valid
2074  *
2075  * @adev: amdgpu_device pointer
2076  *
2077  * Checks the reset magic value written to the gart pointer in VRAM.
2078  * The driver calls this after a GPU reset to see if the contents of
2079  * VRAM is lost or now.
2080  * returns true if vram is lost, false if not.
2081  */
2082 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2083 {
2084 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2085 			AMDGPU_RESET_MAGIC_NUM))
2086 		return true;
2087 
2088 	if (!adev->in_gpu_reset)
2089 		return false;
2090 
2091 	/*
2092 	 * For all ASICs with baco/mode1 reset, the VRAM is
2093 	 * always assumed to be lost.
2094 	 */
2095 	switch (amdgpu_asic_reset_method(adev)) {
2096 	case AMD_RESET_METHOD_BACO:
2097 	case AMD_RESET_METHOD_MODE1:
2098 		return true;
2099 	default:
2100 		return false;
2101 	}
2102 }
2103 
2104 /**
2105  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2106  *
2107  * @adev: amdgpu_device pointer
2108  * @state: clockgating state (gate or ungate)
2109  *
2110  * The list of all the hardware IPs that make up the asic is walked and the
2111  * set_clockgating_state callbacks are run.
2112  * Late initialization pass enabling clockgating for hardware IPs.
2113  * Fini or suspend, pass disabling clockgating for hardware IPs.
2114  * Returns 0 on success, negative error code on failure.
2115  */
2116 
2117 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2118 						enum amd_clockgating_state state)
2119 {
2120 	int i, j, r;
2121 
2122 	if (amdgpu_emu_mode == 1)
2123 		return 0;
2124 
2125 	for (j = 0; j < adev->num_ip_blocks; j++) {
2126 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2127 		if (!adev->ip_blocks[i].status.late_initialized)
2128 			continue;
2129 		/* skip CG for VCE/UVD, it's handled specially */
2130 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2131 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2132 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2133 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2134 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2135 			/* enable clockgating to save power */
2136 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2137 										     state);
2138 			if (r) {
2139 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2140 					  adev->ip_blocks[i].version->funcs->name, r);
2141 				return r;
2142 			}
2143 		}
2144 	}
2145 
2146 	return 0;
2147 }
2148 
2149 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
2150 {
2151 	int i, j, r;
2152 
2153 	if (amdgpu_emu_mode == 1)
2154 		return 0;
2155 
2156 	for (j = 0; j < adev->num_ip_blocks; j++) {
2157 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2158 		if (!adev->ip_blocks[i].status.late_initialized)
2159 			continue;
2160 		/* skip CG for VCE/UVD, it's handled specially */
2161 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2162 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2163 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2164 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2165 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2166 			/* enable powergating to save power */
2167 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2168 											state);
2169 			if (r) {
2170 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2171 					  adev->ip_blocks[i].version->funcs->name, r);
2172 				return r;
2173 			}
2174 		}
2175 	}
2176 	return 0;
2177 }
2178 
2179 static int amdgpu_device_enable_mgpu_fan_boost(void)
2180 {
2181 	struct amdgpu_gpu_instance *gpu_ins;
2182 	struct amdgpu_device *adev;
2183 	int i, ret = 0;
2184 
2185 	mutex_lock(&mgpu_info.mutex);
2186 
2187 	/*
2188 	 * MGPU fan boost feature should be enabled
2189 	 * only when there are two or more dGPUs in
2190 	 * the system
2191 	 */
2192 	if (mgpu_info.num_dgpu < 2)
2193 		goto out;
2194 
2195 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2196 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2197 		adev = gpu_ins->adev;
2198 		if (!(adev->flags & AMD_IS_APU) &&
2199 		    !gpu_ins->mgpu_fan_enabled &&
2200 		    adev->powerplay.pp_funcs &&
2201 		    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2202 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2203 			if (ret)
2204 				break;
2205 
2206 			gpu_ins->mgpu_fan_enabled = 1;
2207 		}
2208 	}
2209 
2210 out:
2211 	mutex_unlock(&mgpu_info.mutex);
2212 
2213 	return ret;
2214 }
2215 
2216 /**
2217  * amdgpu_device_ip_late_init - run late init for hardware IPs
2218  *
2219  * @adev: amdgpu_device pointer
2220  *
2221  * Late initialization pass for hardware IPs.  The list of all the hardware
2222  * IPs that make up the asic is walked and the late_init callbacks are run.
2223  * late_init covers any special initialization that an IP requires
2224  * after all of the have been initialized or something that needs to happen
2225  * late in the init process.
2226  * Returns 0 on success, negative error code on failure.
2227  */
2228 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2229 {
2230 	struct amdgpu_gpu_instance *gpu_instance;
2231 	int i = 0, r;
2232 
2233 	for (i = 0; i < adev->num_ip_blocks; i++) {
2234 		if (!adev->ip_blocks[i].status.hw)
2235 			continue;
2236 		if (adev->ip_blocks[i].version->funcs->late_init) {
2237 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2238 			if (r) {
2239 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
2240 					  adev->ip_blocks[i].version->funcs->name, r);
2241 				return r;
2242 			}
2243 		}
2244 		adev->ip_blocks[i].status.late_initialized = true;
2245 	}
2246 
2247 	amdgpu_ras_set_error_query_ready(adev, true);
2248 
2249 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2250 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2251 
2252 	amdgpu_device_fill_reset_magic(adev);
2253 
2254 	r = amdgpu_device_enable_mgpu_fan_boost();
2255 	if (r)
2256 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2257 
2258 
2259 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2260 		mutex_lock(&mgpu_info.mutex);
2261 
2262 		/*
2263 		 * Reset device p-state to low as this was booted with high.
2264 		 *
2265 		 * This should be performed only after all devices from the same
2266 		 * hive get initialized.
2267 		 *
2268 		 * However, it's unknown how many device in the hive in advance.
2269 		 * As this is counted one by one during devices initializations.
2270 		 *
2271 		 * So, we wait for all XGMI interlinked devices initialized.
2272 		 * This may bring some delays as those devices may come from
2273 		 * different hives. But that should be OK.
2274 		 */
2275 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2276 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2277 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2278 				if (gpu_instance->adev->flags & AMD_IS_APU)
2279 					continue;
2280 
2281 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2282 						AMDGPU_XGMI_PSTATE_MIN);
2283 				if (r) {
2284 					DRM_ERROR("pstate setting failed (%d).\n", r);
2285 					break;
2286 				}
2287 			}
2288 		}
2289 
2290 		mutex_unlock(&mgpu_info.mutex);
2291 	}
2292 
2293 	return 0;
2294 }
2295 
2296 /**
2297  * amdgpu_device_ip_fini - run fini for hardware IPs
2298  *
2299  * @adev: amdgpu_device pointer
2300  *
2301  * Main teardown pass for hardware IPs.  The list of all the hardware
2302  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2303  * are run.  hw_fini tears down the hardware associated with each IP
2304  * and sw_fini tears down any software state associated with each IP.
2305  * Returns 0 on success, negative error code on failure.
2306  */
2307 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2308 {
2309 	int i, r;
2310 
2311 	amdgpu_ras_pre_fini(adev);
2312 
2313 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2314 		amdgpu_xgmi_remove_device(adev);
2315 
2316 	amdgpu_amdkfd_device_fini(adev);
2317 
2318 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2319 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2320 
2321 	/* need to disable SMC first */
2322 	for (i = 0; i < adev->num_ip_blocks; i++) {
2323 		if (!adev->ip_blocks[i].status.hw)
2324 			continue;
2325 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2326 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2327 			/* XXX handle errors */
2328 			if (r) {
2329 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2330 					  adev->ip_blocks[i].version->funcs->name, r);
2331 			}
2332 			adev->ip_blocks[i].status.hw = false;
2333 			break;
2334 		}
2335 	}
2336 
2337 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2338 		if (!adev->ip_blocks[i].status.hw)
2339 			continue;
2340 
2341 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2342 		/* XXX handle errors */
2343 		if (r) {
2344 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2345 				  adev->ip_blocks[i].version->funcs->name, r);
2346 		}
2347 
2348 		adev->ip_blocks[i].status.hw = false;
2349 	}
2350 
2351 
2352 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2353 		if (!adev->ip_blocks[i].status.sw)
2354 			continue;
2355 
2356 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2357 			amdgpu_ucode_free_bo(adev);
2358 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2359 			amdgpu_device_wb_fini(adev);
2360 			amdgpu_device_vram_scratch_fini(adev);
2361 			amdgpu_ib_pool_fini(adev);
2362 		}
2363 
2364 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2365 		/* XXX handle errors */
2366 		if (r) {
2367 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2368 				  adev->ip_blocks[i].version->funcs->name, r);
2369 		}
2370 		adev->ip_blocks[i].status.sw = false;
2371 		adev->ip_blocks[i].status.valid = false;
2372 	}
2373 
2374 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2375 		if (!adev->ip_blocks[i].status.late_initialized)
2376 			continue;
2377 		if (adev->ip_blocks[i].version->funcs->late_fini)
2378 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2379 		adev->ip_blocks[i].status.late_initialized = false;
2380 	}
2381 
2382 	amdgpu_ras_fini(adev);
2383 
2384 	if (amdgpu_sriov_vf(adev))
2385 		if (amdgpu_virt_release_full_gpu(adev, false))
2386 			DRM_ERROR("failed to release exclusive mode on fini\n");
2387 
2388 	return 0;
2389 }
2390 
2391 /**
2392  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2393  *
2394  * @work: work_struct.
2395  */
2396 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2397 {
2398 	struct amdgpu_device *adev =
2399 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2400 	int r;
2401 
2402 	r = amdgpu_ib_ring_tests(adev);
2403 	if (r)
2404 		DRM_ERROR("ib ring test failed (%d).\n", r);
2405 }
2406 
2407 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2408 {
2409 	struct amdgpu_device *adev =
2410 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2411 
2412 	mutex_lock(&adev->gfx.gfx_off_mutex);
2413 	if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2414 		if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2415 			adev->gfx.gfx_off_state = true;
2416 	}
2417 	mutex_unlock(&adev->gfx.gfx_off_mutex);
2418 }
2419 
2420 /**
2421  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2422  *
2423  * @adev: amdgpu_device pointer
2424  *
2425  * Main suspend function for hardware IPs.  The list of all the hardware
2426  * IPs that make up the asic is walked, clockgating is disabled and the
2427  * suspend callbacks are run.  suspend puts the hardware and software state
2428  * in each IP into a state suitable for suspend.
2429  * Returns 0 on success, negative error code on failure.
2430  */
2431 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2432 {
2433 	int i, r;
2434 
2435 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2436 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2437 
2438 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2439 		if (!adev->ip_blocks[i].status.valid)
2440 			continue;
2441 		/* displays are handled separately */
2442 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
2443 			/* XXX handle errors */
2444 			r = adev->ip_blocks[i].version->funcs->suspend(adev);
2445 			/* XXX handle errors */
2446 			if (r) {
2447 				DRM_ERROR("suspend of IP block <%s> failed %d\n",
2448 					  adev->ip_blocks[i].version->funcs->name, r);
2449 				return r;
2450 			}
2451 			adev->ip_blocks[i].status.hw = false;
2452 		}
2453 	}
2454 
2455 	return 0;
2456 }
2457 
2458 /**
2459  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2460  *
2461  * @adev: amdgpu_device pointer
2462  *
2463  * Main suspend function for hardware IPs.  The list of all the hardware
2464  * IPs that make up the asic is walked, clockgating is disabled and the
2465  * suspend callbacks are run.  suspend puts the hardware and software state
2466  * in each IP into a state suitable for suspend.
2467  * Returns 0 on success, negative error code on failure.
2468  */
2469 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2470 {
2471 	int i, r;
2472 
2473 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2474 		if (!adev->ip_blocks[i].status.valid)
2475 			continue;
2476 		/* displays are handled in phase1 */
2477 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2478 			continue;
2479 		/* PSP lost connection when err_event_athub occurs */
2480 		if (amdgpu_ras_intr_triggered() &&
2481 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2482 			adev->ip_blocks[i].status.hw = false;
2483 			continue;
2484 		}
2485 		/* XXX handle errors */
2486 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2487 		/* XXX handle errors */
2488 		if (r) {
2489 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2490 				  adev->ip_blocks[i].version->funcs->name, r);
2491 		}
2492 		adev->ip_blocks[i].status.hw = false;
2493 		/* handle putting the SMC in the appropriate state */
2494 		if(!amdgpu_sriov_vf(adev)){
2495 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2496 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2497 				if (r) {
2498 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2499 							adev->mp1_state, r);
2500 					return r;
2501 				}
2502 			}
2503 		}
2504 		adev->ip_blocks[i].status.hw = false;
2505 	}
2506 
2507 	return 0;
2508 }
2509 
2510 /**
2511  * amdgpu_device_ip_suspend - run suspend for hardware IPs
2512  *
2513  * @adev: amdgpu_device pointer
2514  *
2515  * Main suspend function for hardware IPs.  The list of all the hardware
2516  * IPs that make up the asic is walked, clockgating is disabled and the
2517  * suspend callbacks are run.  suspend puts the hardware and software state
2518  * in each IP into a state suitable for suspend.
2519  * Returns 0 on success, negative error code on failure.
2520  */
2521 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2522 {
2523 	int r;
2524 
2525 	if (amdgpu_sriov_vf(adev))
2526 		amdgpu_virt_request_full_gpu(adev, false);
2527 
2528 	r = amdgpu_device_ip_suspend_phase1(adev);
2529 	if (r)
2530 		return r;
2531 	r = amdgpu_device_ip_suspend_phase2(adev);
2532 
2533 	if (amdgpu_sriov_vf(adev))
2534 		amdgpu_virt_release_full_gpu(adev, false);
2535 
2536 	return r;
2537 }
2538 
2539 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2540 {
2541 	int i, r;
2542 
2543 	static enum amd_ip_block_type ip_order[] = {
2544 		AMD_IP_BLOCK_TYPE_GMC,
2545 		AMD_IP_BLOCK_TYPE_COMMON,
2546 		AMD_IP_BLOCK_TYPE_PSP,
2547 		AMD_IP_BLOCK_TYPE_IH,
2548 	};
2549 
2550 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2551 		int j;
2552 		struct amdgpu_ip_block *block;
2553 
2554 		for (j = 0; j < adev->num_ip_blocks; j++) {
2555 			block = &adev->ip_blocks[j];
2556 
2557 			block->status.hw = false;
2558 			if (block->version->type != ip_order[i] ||
2559 				!block->status.valid)
2560 				continue;
2561 
2562 			r = block->version->funcs->hw_init(adev);
2563 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2564 			if (r)
2565 				return r;
2566 			block->status.hw = true;
2567 		}
2568 	}
2569 
2570 	return 0;
2571 }
2572 
2573 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2574 {
2575 	int i, r;
2576 
2577 	static enum amd_ip_block_type ip_order[] = {
2578 		AMD_IP_BLOCK_TYPE_SMC,
2579 		AMD_IP_BLOCK_TYPE_DCE,
2580 		AMD_IP_BLOCK_TYPE_GFX,
2581 		AMD_IP_BLOCK_TYPE_SDMA,
2582 		AMD_IP_BLOCK_TYPE_UVD,
2583 		AMD_IP_BLOCK_TYPE_VCE,
2584 		AMD_IP_BLOCK_TYPE_VCN
2585 	};
2586 
2587 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2588 		int j;
2589 		struct amdgpu_ip_block *block;
2590 
2591 		for (j = 0; j < adev->num_ip_blocks; j++) {
2592 			block = &adev->ip_blocks[j];
2593 
2594 			if (block->version->type != ip_order[i] ||
2595 				!block->status.valid ||
2596 				block->status.hw)
2597 				continue;
2598 
2599 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2600 				r = block->version->funcs->resume(adev);
2601 			else
2602 				r = block->version->funcs->hw_init(adev);
2603 
2604 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2605 			if (r)
2606 				return r;
2607 			block->status.hw = true;
2608 		}
2609 	}
2610 
2611 	return 0;
2612 }
2613 
2614 /**
2615  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2616  *
2617  * @adev: amdgpu_device pointer
2618  *
2619  * First resume function for hardware IPs.  The list of all the hardware
2620  * IPs that make up the asic is walked and the resume callbacks are run for
2621  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
2622  * after a suspend and updates the software state as necessary.  This
2623  * function is also used for restoring the GPU after a GPU reset.
2624  * Returns 0 on success, negative error code on failure.
2625  */
2626 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
2627 {
2628 	int i, r;
2629 
2630 	for (i = 0; i < adev->num_ip_blocks; i++) {
2631 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2632 			continue;
2633 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2634 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2635 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2636 
2637 			r = adev->ip_blocks[i].version->funcs->resume(adev);
2638 			if (r) {
2639 				DRM_ERROR("resume of IP block <%s> failed %d\n",
2640 					  adev->ip_blocks[i].version->funcs->name, r);
2641 				return r;
2642 			}
2643 			adev->ip_blocks[i].status.hw = true;
2644 		}
2645 	}
2646 
2647 	return 0;
2648 }
2649 
2650 /**
2651  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2652  *
2653  * @adev: amdgpu_device pointer
2654  *
2655  * First resume function for hardware IPs.  The list of all the hardware
2656  * IPs that make up the asic is walked and the resume callbacks are run for
2657  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
2658  * functional state after a suspend and updates the software state as
2659  * necessary.  This function is also used for restoring the GPU after a GPU
2660  * reset.
2661  * Returns 0 on success, negative error code on failure.
2662  */
2663 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
2664 {
2665 	int i, r;
2666 
2667 	for (i = 0; i < adev->num_ip_blocks; i++) {
2668 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2669 			continue;
2670 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2671 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2672 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2673 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
2674 			continue;
2675 		r = adev->ip_blocks[i].version->funcs->resume(adev);
2676 		if (r) {
2677 			DRM_ERROR("resume of IP block <%s> failed %d\n",
2678 				  adev->ip_blocks[i].version->funcs->name, r);
2679 			return r;
2680 		}
2681 		adev->ip_blocks[i].status.hw = true;
2682 	}
2683 
2684 	return 0;
2685 }
2686 
2687 /**
2688  * amdgpu_device_ip_resume - run resume for hardware IPs
2689  *
2690  * @adev: amdgpu_device pointer
2691  *
2692  * Main resume function for hardware IPs.  The hardware IPs
2693  * are split into two resume functions because they are
2694  * are also used in in recovering from a GPU reset and some additional
2695  * steps need to be take between them.  In this case (S3/S4) they are
2696  * run sequentially.
2697  * Returns 0 on success, negative error code on failure.
2698  */
2699 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
2700 {
2701 	int r;
2702 
2703 	r = amdgpu_device_ip_resume_phase1(adev);
2704 	if (r)
2705 		return r;
2706 
2707 	r = amdgpu_device_fw_loading(adev);
2708 	if (r)
2709 		return r;
2710 
2711 	r = amdgpu_device_ip_resume_phase2(adev);
2712 
2713 	return r;
2714 }
2715 
2716 /**
2717  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2718  *
2719  * @adev: amdgpu_device pointer
2720  *
2721  * Query the VBIOS data tables to determine if the board supports SR-IOV.
2722  */
2723 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
2724 {
2725 	if (amdgpu_sriov_vf(adev)) {
2726 		if (adev->is_atom_fw) {
2727 			if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2728 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2729 		} else {
2730 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2731 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2732 		}
2733 
2734 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2735 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2736 	}
2737 }
2738 
2739 /**
2740  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2741  *
2742  * @asic_type: AMD asic type
2743  *
2744  * Check if there is DC (new modesetting infrastructre) support for an asic.
2745  * returns true if DC has support, false if not.
2746  */
2747 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2748 {
2749 	switch (asic_type) {
2750 #if defined(CONFIG_DRM_AMD_DC)
2751 	case CHIP_BONAIRE:
2752 	case CHIP_KAVERI:
2753 	case CHIP_KABINI:
2754 	case CHIP_MULLINS:
2755 		/*
2756 		 * We have systems in the wild with these ASICs that require
2757 		 * LVDS and VGA support which is not supported with DC.
2758 		 *
2759 		 * Fallback to the non-DC driver here by default so as not to
2760 		 * cause regressions.
2761 		 */
2762 		return amdgpu_dc > 0;
2763 	case CHIP_HAWAII:
2764 	case CHIP_CARRIZO:
2765 	case CHIP_STONEY:
2766 	case CHIP_POLARIS10:
2767 	case CHIP_POLARIS11:
2768 	case CHIP_POLARIS12:
2769 	case CHIP_VEGAM:
2770 	case CHIP_TONGA:
2771 	case CHIP_FIJI:
2772 	case CHIP_VEGA10:
2773 	case CHIP_VEGA12:
2774 	case CHIP_VEGA20:
2775 #if defined(CONFIG_DRM_AMD_DC_DCN)
2776 	case CHIP_RAVEN:
2777 	case CHIP_NAVI10:
2778 	case CHIP_NAVI14:
2779 	case CHIP_NAVI12:
2780 	case CHIP_RENOIR:
2781 #endif
2782 		return amdgpu_dc != 0;
2783 #endif
2784 	default:
2785 		if (amdgpu_dc > 0)
2786 			DRM_INFO("Display Core has been requested via kernel parameter "
2787 					 "but isn't supported by ASIC, ignoring\n");
2788 		return false;
2789 	}
2790 }
2791 
2792 /**
2793  * amdgpu_device_has_dc_support - check if dc is supported
2794  *
2795  * @adev: amdgpu_device_pointer
2796  *
2797  * Returns true for supported, false for not supported
2798  */
2799 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2800 {
2801 	if (amdgpu_sriov_vf(adev))
2802 		return false;
2803 
2804 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
2805 }
2806 
2807 
2808 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2809 {
2810 	struct amdgpu_device *adev =
2811 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
2812 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
2813 
2814 	/* It's a bug to not have a hive within this function */
2815 	if (WARN_ON(!hive))
2816 		return;
2817 
2818 	/*
2819 	 * Use task barrier to synchronize all xgmi reset works across the
2820 	 * hive. task_barrier_enter and task_barrier_exit will block
2821 	 * until all the threads running the xgmi reset works reach
2822 	 * those points. task_barrier_full will do both blocks.
2823 	 */
2824 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2825 
2826 		task_barrier_enter(&hive->tb);
2827 		adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2828 
2829 		if (adev->asic_reset_res)
2830 			goto fail;
2831 
2832 		task_barrier_exit(&hive->tb);
2833 		adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2834 
2835 		if (adev->asic_reset_res)
2836 			goto fail;
2837 
2838 		if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2839 			adev->mmhub.funcs->reset_ras_error_count(adev);
2840 	} else {
2841 
2842 		task_barrier_full(&hive->tb);
2843 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
2844 	}
2845 
2846 fail:
2847 	if (adev->asic_reset_res)
2848 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
2849 			 adev->asic_reset_res, adev->ddev->unique);
2850 }
2851 
2852 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2853 {
2854 	char *input = amdgpu_lockup_timeout;
2855 	char *timeout_setting = NULL;
2856 	int index = 0;
2857 	long timeout;
2858 	int ret = 0;
2859 
2860 	/*
2861 	 * By default timeout for non compute jobs is 10000.
2862 	 * And there is no timeout enforced on compute jobs.
2863 	 * In SR-IOV or passthrough mode, timeout for compute
2864 	 * jobs are 60000 by default.
2865 	 */
2866 	adev->gfx_timeout = msecs_to_jiffies(10000);
2867 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2868 	if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2869 		adev->compute_timeout =  msecs_to_jiffies(60000);
2870 	else
2871 		adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2872 
2873 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
2874 		while ((timeout_setting = strsep(&input, ",")) &&
2875 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
2876 			ret = kstrtol(timeout_setting, 0, &timeout);
2877 			if (ret)
2878 				return ret;
2879 
2880 			if (timeout == 0) {
2881 				index++;
2882 				continue;
2883 			} else if (timeout < 0) {
2884 				timeout = MAX_SCHEDULE_TIMEOUT;
2885 			} else {
2886 				timeout = msecs_to_jiffies(timeout);
2887 			}
2888 
2889 			switch (index++) {
2890 			case 0:
2891 				adev->gfx_timeout = timeout;
2892 				break;
2893 			case 1:
2894 				adev->compute_timeout = timeout;
2895 				break;
2896 			case 2:
2897 				adev->sdma_timeout = timeout;
2898 				break;
2899 			case 3:
2900 				adev->video_timeout = timeout;
2901 				break;
2902 			default:
2903 				break;
2904 			}
2905 		}
2906 		/*
2907 		 * There is only one value specified and
2908 		 * it should apply to all non-compute jobs.
2909 		 */
2910 		if (index == 1) {
2911 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2912 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2913 				adev->compute_timeout = adev->gfx_timeout;
2914 		}
2915 	}
2916 
2917 	return ret;
2918 }
2919 
2920 static const struct attribute *amdgpu_dev_attributes[] = {
2921 	&dev_attr_product_name.attr,
2922 	&dev_attr_product_number.attr,
2923 	&dev_attr_serial_number.attr,
2924 	&dev_attr_pcie_replay_count.attr,
2925 	NULL
2926 };
2927 
2928 /**
2929  * amdgpu_device_init - initialize the driver
2930  *
2931  * @adev: amdgpu_device pointer
2932  * @ddev: drm dev pointer
2933  * @pdev: pci dev pointer
2934  * @flags: driver flags
2935  *
2936  * Initializes the driver info and hw (all asics).
2937  * Returns 0 for success or an error on failure.
2938  * Called at driver startup.
2939  */
2940 int amdgpu_device_init(struct amdgpu_device *adev,
2941 		       struct drm_device *ddev,
2942 		       struct pci_dev *pdev,
2943 		       uint32_t flags)
2944 {
2945 	int r, i;
2946 	bool boco = false;
2947 	u32 max_MBps;
2948 
2949 	adev->shutdown = false;
2950 	adev->dev = &pdev->dev;
2951 	adev->ddev = ddev;
2952 	adev->pdev = pdev;
2953 	adev->flags = flags;
2954 
2955 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2956 		adev->asic_type = amdgpu_force_asic_type;
2957 	else
2958 		adev->asic_type = flags & AMD_ASIC_MASK;
2959 
2960 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
2961 	if (amdgpu_emu_mode == 1)
2962 		adev->usec_timeout *= 10;
2963 	adev->gmc.gart_size = 512 * 1024 * 1024;
2964 	adev->accel_working = false;
2965 	adev->num_rings = 0;
2966 	adev->mman.buffer_funcs = NULL;
2967 	adev->mman.buffer_funcs_ring = NULL;
2968 	adev->vm_manager.vm_pte_funcs = NULL;
2969 	adev->vm_manager.vm_pte_num_scheds = 0;
2970 	adev->gmc.gmc_funcs = NULL;
2971 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2972 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2973 
2974 	adev->smc_rreg = &amdgpu_invalid_rreg;
2975 	adev->smc_wreg = &amdgpu_invalid_wreg;
2976 	adev->pcie_rreg = &amdgpu_invalid_rreg;
2977 	adev->pcie_wreg = &amdgpu_invalid_wreg;
2978 	adev->pciep_rreg = &amdgpu_invalid_rreg;
2979 	adev->pciep_wreg = &amdgpu_invalid_wreg;
2980 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2981 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
2982 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2983 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2984 	adev->didt_rreg = &amdgpu_invalid_rreg;
2985 	adev->didt_wreg = &amdgpu_invalid_wreg;
2986 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2987 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
2988 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2989 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2990 
2991 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2992 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2993 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
2994 
2995 	/* mutex initialization are all done here so we
2996 	 * can recall function without having locking issues */
2997 	atomic_set(&adev->irq.ih.lock, 0);
2998 	mutex_init(&adev->firmware.mutex);
2999 	mutex_init(&adev->pm.mutex);
3000 	mutex_init(&adev->gfx.gpu_clock_mutex);
3001 	mutex_init(&adev->srbm_mutex);
3002 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3003 	mutex_init(&adev->gfx.gfx_off_mutex);
3004 	mutex_init(&adev->grbm_idx_mutex);
3005 	mutex_init(&adev->mn_lock);
3006 	mutex_init(&adev->virt.vf_errors.lock);
3007 	hash_init(adev->mn_hash);
3008 	mutex_init(&adev->lock_reset);
3009 	mutex_init(&adev->psp.mutex);
3010 	mutex_init(&adev->notifier_lock);
3011 
3012 	r = amdgpu_device_check_arguments(adev);
3013 	if (r)
3014 		return r;
3015 
3016 	spin_lock_init(&adev->mmio_idx_lock);
3017 	spin_lock_init(&adev->smc_idx_lock);
3018 	spin_lock_init(&adev->pcie_idx_lock);
3019 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3020 	spin_lock_init(&adev->didt_idx_lock);
3021 	spin_lock_init(&adev->gc_cac_idx_lock);
3022 	spin_lock_init(&adev->se_cac_idx_lock);
3023 	spin_lock_init(&adev->audio_endpt_idx_lock);
3024 	spin_lock_init(&adev->mm_stats.lock);
3025 
3026 	INIT_LIST_HEAD(&adev->shadow_list);
3027 	mutex_init(&adev->shadow_list_lock);
3028 
3029 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3030 			  amdgpu_device_delayed_init_work_handler);
3031 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3032 			  amdgpu_device_delay_enable_gfx_off);
3033 
3034 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3035 
3036 	adev->gfx.gfx_off_req_count = 1;
3037 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3038 
3039 	/* Registers mapping */
3040 	/* TODO: block userspace mapping of io register */
3041 	if (adev->asic_type >= CHIP_BONAIRE) {
3042 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3043 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3044 	} else {
3045 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3046 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3047 	}
3048 
3049 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3050 	if (adev->rmmio == NULL) {
3051 		return -ENOMEM;
3052 	}
3053 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3054 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3055 
3056 	/* io port mapping */
3057 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3058 		if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3059 			adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3060 			adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3061 			break;
3062 		}
3063 	}
3064 	if (adev->rio_mem == NULL)
3065 		DRM_INFO("PCI I/O BAR is not found.\n");
3066 
3067 	/* enable PCIE atomic ops */
3068 	r = pci_enable_atomic_ops_to_root(adev->pdev,
3069 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3070 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3071 	if (r) {
3072 		adev->have_atomics_support = false;
3073 		DRM_INFO("PCIE atomic ops is not supported\n");
3074 	} else {
3075 		adev->have_atomics_support = true;
3076 	}
3077 
3078 	amdgpu_device_get_pcie_info(adev);
3079 
3080 	if (amdgpu_mcbp)
3081 		DRM_INFO("MCBP is enabled\n");
3082 
3083 	if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3084 		adev->enable_mes = true;
3085 
3086 	/* detect hw virtualization here */
3087 	amdgpu_detect_virtualization(adev);
3088 
3089 	r = amdgpu_device_get_job_timeout_settings(adev);
3090 	if (r) {
3091 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3092 		return r;
3093 	}
3094 
3095 	/* early init functions */
3096 	r = amdgpu_device_ip_early_init(adev);
3097 	if (r)
3098 		return r;
3099 
3100 	/* doorbell bar mapping and doorbell index init*/
3101 	amdgpu_device_doorbell_init(adev);
3102 
3103 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3104 	/* this will fail for cards that aren't VGA class devices, just
3105 	 * ignore it */
3106 	vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
3107 
3108 	if (amdgpu_device_supports_boco(ddev))
3109 		boco = true;
3110 	if (amdgpu_has_atpx() &&
3111 	    (amdgpu_is_atpx_hybrid() ||
3112 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
3113 	    !pci_is_thunderbolt_attached(adev->pdev))
3114 		vga_switcheroo_register_client(adev->pdev,
3115 					       &amdgpu_switcheroo_ops, boco);
3116 	if (boco)
3117 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3118 
3119 	if (amdgpu_emu_mode == 1) {
3120 		/* post the asic on emulation mode */
3121 		emu_soc_asic_init(adev);
3122 		goto fence_driver_init;
3123 	}
3124 
3125 	/* detect if we are with an SRIOV vbios */
3126 	amdgpu_device_detect_sriov_bios(adev);
3127 
3128 	/* check if we need to reset the asic
3129 	 *  E.g., driver was not cleanly unloaded previously, etc.
3130 	 */
3131 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3132 		r = amdgpu_asic_reset(adev);
3133 		if (r) {
3134 			dev_err(adev->dev, "asic reset on init failed\n");
3135 			goto failed;
3136 		}
3137 	}
3138 
3139 	/* Post card if necessary */
3140 	if (amdgpu_device_need_post(adev)) {
3141 		if (!adev->bios) {
3142 			dev_err(adev->dev, "no vBIOS found\n");
3143 			r = -EINVAL;
3144 			goto failed;
3145 		}
3146 		DRM_INFO("GPU posting now...\n");
3147 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3148 		if (r) {
3149 			dev_err(adev->dev, "gpu post error!\n");
3150 			goto failed;
3151 		}
3152 	}
3153 
3154 	if (adev->is_atom_fw) {
3155 		/* Initialize clocks */
3156 		r = amdgpu_atomfirmware_get_clock_info(adev);
3157 		if (r) {
3158 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3159 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3160 			goto failed;
3161 		}
3162 	} else {
3163 		/* Initialize clocks */
3164 		r = amdgpu_atombios_get_clock_info(adev);
3165 		if (r) {
3166 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3167 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3168 			goto failed;
3169 		}
3170 		/* init i2c buses */
3171 		if (!amdgpu_device_has_dc_support(adev))
3172 			amdgpu_atombios_i2c_init(adev);
3173 	}
3174 
3175 fence_driver_init:
3176 	/* Fence driver */
3177 	r = amdgpu_fence_driver_init(adev);
3178 	if (r) {
3179 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
3180 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3181 		goto failed;
3182 	}
3183 
3184 	/* init the mode config */
3185 	drm_mode_config_init(adev->ddev);
3186 
3187 	r = amdgpu_device_ip_init(adev);
3188 	if (r) {
3189 		/* failed in exclusive mode due to timeout */
3190 		if (amdgpu_sriov_vf(adev) &&
3191 		    !amdgpu_sriov_runtime(adev) &&
3192 		    amdgpu_virt_mmio_blocked(adev) &&
3193 		    !amdgpu_virt_wait_reset(adev)) {
3194 			dev_err(adev->dev, "VF exclusive mode timeout\n");
3195 			/* Don't send request since VF is inactive. */
3196 			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3197 			adev->virt.ops = NULL;
3198 			r = -EAGAIN;
3199 			goto failed;
3200 		}
3201 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3202 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3203 		goto failed;
3204 	}
3205 
3206 	dev_info(adev->dev,
3207 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3208 			adev->gfx.config.max_shader_engines,
3209 			adev->gfx.config.max_sh_per_se,
3210 			adev->gfx.config.max_cu_per_sh,
3211 			adev->gfx.cu_info.number);
3212 
3213 	adev->accel_working = true;
3214 
3215 	amdgpu_vm_check_compute_bug(adev);
3216 
3217 	/* Initialize the buffer migration limit. */
3218 	if (amdgpu_moverate >= 0)
3219 		max_MBps = amdgpu_moverate;
3220 	else
3221 		max_MBps = 8; /* Allow 8 MB/s. */
3222 	/* Get a log2 for easy divisions. */
3223 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3224 
3225 	amdgpu_fbdev_init(adev);
3226 
3227 	r = amdgpu_pm_sysfs_init(adev);
3228 	if (r) {
3229 		adev->pm_sysfs_en = false;
3230 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3231 	} else
3232 		adev->pm_sysfs_en = true;
3233 
3234 	r = amdgpu_ucode_sysfs_init(adev);
3235 	if (r) {
3236 		adev->ucode_sysfs_en = false;
3237 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3238 	} else
3239 		adev->ucode_sysfs_en = true;
3240 
3241 	if ((amdgpu_testing & 1)) {
3242 		if (adev->accel_working)
3243 			amdgpu_test_moves(adev);
3244 		else
3245 			DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3246 	}
3247 	if (amdgpu_benchmarking) {
3248 		if (adev->accel_working)
3249 			amdgpu_benchmark(adev, amdgpu_benchmarking);
3250 		else
3251 			DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3252 	}
3253 
3254 	/*
3255 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3256 	 * Otherwise the mgpu fan boost feature will be skipped due to the
3257 	 * gpu instance is counted less.
3258 	 */
3259 	amdgpu_register_gpu_instance(adev);
3260 
3261 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
3262 	 * explicit gating rather than handling it automatically.
3263 	 */
3264 	r = amdgpu_device_ip_late_init(adev);
3265 	if (r) {
3266 		dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3267 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3268 		goto failed;
3269 	}
3270 
3271 	/* must succeed. */
3272 	amdgpu_ras_resume(adev);
3273 
3274 	queue_delayed_work(system_wq, &adev->delayed_init_work,
3275 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3276 
3277 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3278 	if (r) {
3279 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3280 		return r;
3281 	}
3282 
3283 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3284 		r = amdgpu_pmu_init(adev);
3285 	if (r)
3286 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3287 
3288 	return 0;
3289 
3290 failed:
3291 	amdgpu_vf_error_trans_all(adev);
3292 	if (boco)
3293 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
3294 
3295 	return r;
3296 }
3297 
3298 /**
3299  * amdgpu_device_fini - tear down the driver
3300  *
3301  * @adev: amdgpu_device pointer
3302  *
3303  * Tear down the driver info (all asics).
3304  * Called at driver shutdown.
3305  */
3306 void amdgpu_device_fini(struct amdgpu_device *adev)
3307 {
3308 	int r;
3309 
3310 	DRM_INFO("amdgpu: finishing device.\n");
3311 	flush_delayed_work(&adev->delayed_init_work);
3312 	adev->shutdown = true;
3313 
3314 	/* make sure IB test finished before entering exclusive mode
3315 	 * to avoid preemption on IB test
3316 	 * */
3317 	if (amdgpu_sriov_vf(adev))
3318 		amdgpu_virt_request_full_gpu(adev, false);
3319 
3320 	/* disable all interrupts */
3321 	amdgpu_irq_disable_all(adev);
3322 	if (adev->mode_info.mode_config_initialized){
3323 		if (!amdgpu_device_has_dc_support(adev))
3324 			drm_helper_force_disable_all(adev->ddev);
3325 		else
3326 			drm_atomic_helper_shutdown(adev->ddev);
3327 	}
3328 	amdgpu_fence_driver_fini(adev);
3329 	if (adev->pm_sysfs_en)
3330 		amdgpu_pm_sysfs_fini(adev);
3331 	amdgpu_fbdev_fini(adev);
3332 	r = amdgpu_device_ip_fini(adev);
3333 	if (adev->firmware.gpu_info_fw) {
3334 		release_firmware(adev->firmware.gpu_info_fw);
3335 		adev->firmware.gpu_info_fw = NULL;
3336 	}
3337 	adev->accel_working = false;
3338 	/* free i2c buses */
3339 	if (!amdgpu_device_has_dc_support(adev))
3340 		amdgpu_i2c_fini(adev);
3341 
3342 	if (amdgpu_emu_mode != 1)
3343 		amdgpu_atombios_fini(adev);
3344 
3345 	kfree(adev->bios);
3346 	adev->bios = NULL;
3347 	if (amdgpu_has_atpx() &&
3348 	    (amdgpu_is_atpx_hybrid() ||
3349 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
3350 	    !pci_is_thunderbolt_attached(adev->pdev))
3351 		vga_switcheroo_unregister_client(adev->pdev);
3352 	if (amdgpu_device_supports_boco(adev->ddev))
3353 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
3354 	vga_client_register(adev->pdev, NULL, NULL, NULL);
3355 	if (adev->rio_mem)
3356 		pci_iounmap(adev->pdev, adev->rio_mem);
3357 	adev->rio_mem = NULL;
3358 	iounmap(adev->rmmio);
3359 	adev->rmmio = NULL;
3360 	amdgpu_device_doorbell_fini(adev);
3361 
3362 	if (adev->ucode_sysfs_en)
3363 		amdgpu_ucode_sysfs_fini(adev);
3364 
3365 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
3366 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3367 		amdgpu_pmu_fini(adev);
3368 	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
3369 		amdgpu_discovery_fini(adev);
3370 }
3371 
3372 
3373 /*
3374  * Suspend & resume.
3375  */
3376 /**
3377  * amdgpu_device_suspend - initiate device suspend
3378  *
3379  * @dev: drm dev pointer
3380  * @suspend: suspend state
3381  * @fbcon : notify the fbdev of suspend
3382  *
3383  * Puts the hw in the suspend state (all asics).
3384  * Returns 0 for success or an error on failure.
3385  * Called at driver suspend.
3386  */
3387 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
3388 {
3389 	struct amdgpu_device *adev;
3390 	struct drm_crtc *crtc;
3391 	struct drm_connector *connector;
3392 	struct drm_connector_list_iter iter;
3393 	int r;
3394 
3395 	if (dev == NULL || dev->dev_private == NULL) {
3396 		return -ENODEV;
3397 	}
3398 
3399 	adev = dev->dev_private;
3400 
3401 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3402 		return 0;
3403 
3404 	adev->in_suspend = true;
3405 	drm_kms_helper_poll_disable(dev);
3406 
3407 	if (fbcon)
3408 		amdgpu_fbdev_set_suspend(adev, 1);
3409 
3410 	cancel_delayed_work_sync(&adev->delayed_init_work);
3411 
3412 	if (!amdgpu_device_has_dc_support(adev)) {
3413 		/* turn off display hw */
3414 		drm_modeset_lock_all(dev);
3415 		drm_connector_list_iter_begin(dev, &iter);
3416 		drm_for_each_connector_iter(connector, &iter)
3417 			drm_helper_connector_dpms(connector,
3418 						  DRM_MODE_DPMS_OFF);
3419 		drm_connector_list_iter_end(&iter);
3420 		drm_modeset_unlock_all(dev);
3421 			/* unpin the front buffers and cursors */
3422 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3423 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3424 			struct drm_framebuffer *fb = crtc->primary->fb;
3425 			struct amdgpu_bo *robj;
3426 
3427 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3428 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3429 				r = amdgpu_bo_reserve(aobj, true);
3430 				if (r == 0) {
3431 					amdgpu_bo_unpin(aobj);
3432 					amdgpu_bo_unreserve(aobj);
3433 				}
3434 			}
3435 
3436 			if (fb == NULL || fb->obj[0] == NULL) {
3437 				continue;
3438 			}
3439 			robj = gem_to_amdgpu_bo(fb->obj[0]);
3440 			/* don't unpin kernel fb objects */
3441 			if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3442 				r = amdgpu_bo_reserve(robj, true);
3443 				if (r == 0) {
3444 					amdgpu_bo_unpin(robj);
3445 					amdgpu_bo_unreserve(robj);
3446 				}
3447 			}
3448 		}
3449 	}
3450 
3451 	amdgpu_ras_suspend(adev);
3452 
3453 	r = amdgpu_device_ip_suspend_phase1(adev);
3454 
3455 	amdgpu_amdkfd_suspend(adev, !fbcon);
3456 
3457 	/* evict vram memory */
3458 	amdgpu_bo_evict_vram(adev);
3459 
3460 	amdgpu_fence_driver_suspend(adev);
3461 
3462 	r = amdgpu_device_ip_suspend_phase2(adev);
3463 
3464 	/* evict remaining vram memory
3465 	 * This second call to evict vram is to evict the gart page table
3466 	 * using the CPU.
3467 	 */
3468 	amdgpu_bo_evict_vram(adev);
3469 
3470 	return 0;
3471 }
3472 
3473 /**
3474  * amdgpu_device_resume - initiate device resume
3475  *
3476  * @dev: drm dev pointer
3477  * @resume: resume state
3478  * @fbcon : notify the fbdev of resume
3479  *
3480  * Bring the hw back to operating state (all asics).
3481  * Returns 0 for success or an error on failure.
3482  * Called at driver resume.
3483  */
3484 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
3485 {
3486 	struct drm_connector *connector;
3487 	struct drm_connector_list_iter iter;
3488 	struct amdgpu_device *adev = dev->dev_private;
3489 	struct drm_crtc *crtc;
3490 	int r = 0;
3491 
3492 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3493 		return 0;
3494 
3495 	/* post card */
3496 	if (amdgpu_device_need_post(adev)) {
3497 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3498 		if (r)
3499 			DRM_ERROR("amdgpu asic init failed\n");
3500 	}
3501 
3502 	r = amdgpu_device_ip_resume(adev);
3503 	if (r) {
3504 		DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
3505 		return r;
3506 	}
3507 	amdgpu_fence_driver_resume(adev);
3508 
3509 
3510 	r = amdgpu_device_ip_late_init(adev);
3511 	if (r)
3512 		return r;
3513 
3514 	queue_delayed_work(system_wq, &adev->delayed_init_work,
3515 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3516 
3517 	if (!amdgpu_device_has_dc_support(adev)) {
3518 		/* pin cursors */
3519 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3520 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3521 
3522 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3523 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3524 				r = amdgpu_bo_reserve(aobj, true);
3525 				if (r == 0) {
3526 					r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3527 					if (r != 0)
3528 						DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3529 					amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3530 					amdgpu_bo_unreserve(aobj);
3531 				}
3532 			}
3533 		}
3534 	}
3535 	r = amdgpu_amdkfd_resume(adev, !fbcon);
3536 	if (r)
3537 		return r;
3538 
3539 	/* Make sure IB tests flushed */
3540 	flush_delayed_work(&adev->delayed_init_work);
3541 
3542 	/* blat the mode back in */
3543 	if (fbcon) {
3544 		if (!amdgpu_device_has_dc_support(adev)) {
3545 			/* pre DCE11 */
3546 			drm_helper_resume_force_mode(dev);
3547 
3548 			/* turn on display hw */
3549 			drm_modeset_lock_all(dev);
3550 
3551 			drm_connector_list_iter_begin(dev, &iter);
3552 			drm_for_each_connector_iter(connector, &iter)
3553 				drm_helper_connector_dpms(connector,
3554 							  DRM_MODE_DPMS_ON);
3555 			drm_connector_list_iter_end(&iter);
3556 
3557 			drm_modeset_unlock_all(dev);
3558 		}
3559 		amdgpu_fbdev_set_suspend(adev, 0);
3560 	}
3561 
3562 	drm_kms_helper_poll_enable(dev);
3563 
3564 	amdgpu_ras_resume(adev);
3565 
3566 	/*
3567 	 * Most of the connector probing functions try to acquire runtime pm
3568 	 * refs to ensure that the GPU is powered on when connector polling is
3569 	 * performed. Since we're calling this from a runtime PM callback,
3570 	 * trying to acquire rpm refs will cause us to deadlock.
3571 	 *
3572 	 * Since we're guaranteed to be holding the rpm lock, it's safe to
3573 	 * temporarily disable the rpm helpers so this doesn't deadlock us.
3574 	 */
3575 #ifdef CONFIG_PM
3576 	dev->dev->power.disable_depth++;
3577 #endif
3578 	if (!amdgpu_device_has_dc_support(adev))
3579 		drm_helper_hpd_irq_event(dev);
3580 	else
3581 		drm_kms_helper_hotplug_event(dev);
3582 #ifdef CONFIG_PM
3583 	dev->dev->power.disable_depth--;
3584 #endif
3585 	adev->in_suspend = false;
3586 
3587 	return 0;
3588 }
3589 
3590 /**
3591  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3592  *
3593  * @adev: amdgpu_device pointer
3594  *
3595  * The list of all the hardware IPs that make up the asic is walked and
3596  * the check_soft_reset callbacks are run.  check_soft_reset determines
3597  * if the asic is still hung or not.
3598  * Returns true if any of the IPs are still in a hung state, false if not.
3599  */
3600 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
3601 {
3602 	int i;
3603 	bool asic_hang = false;
3604 
3605 	if (amdgpu_sriov_vf(adev))
3606 		return true;
3607 
3608 	if (amdgpu_asic_need_full_reset(adev))
3609 		return true;
3610 
3611 	for (i = 0; i < adev->num_ip_blocks; i++) {
3612 		if (!adev->ip_blocks[i].status.valid)
3613 			continue;
3614 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3615 			adev->ip_blocks[i].status.hang =
3616 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3617 		if (adev->ip_blocks[i].status.hang) {
3618 			DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
3619 			asic_hang = true;
3620 		}
3621 	}
3622 	return asic_hang;
3623 }
3624 
3625 /**
3626  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3627  *
3628  * @adev: amdgpu_device pointer
3629  *
3630  * The list of all the hardware IPs that make up the asic is walked and the
3631  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
3632  * handles any IP specific hardware or software state changes that are
3633  * necessary for a soft reset to succeed.
3634  * Returns 0 on success, negative error code on failure.
3635  */
3636 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
3637 {
3638 	int i, r = 0;
3639 
3640 	for (i = 0; i < adev->num_ip_blocks; i++) {
3641 		if (!adev->ip_blocks[i].status.valid)
3642 			continue;
3643 		if (adev->ip_blocks[i].status.hang &&
3644 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3645 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
3646 			if (r)
3647 				return r;
3648 		}
3649 	}
3650 
3651 	return 0;
3652 }
3653 
3654 /**
3655  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3656  *
3657  * @adev: amdgpu_device pointer
3658  *
3659  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
3660  * reset is necessary to recover.
3661  * Returns true if a full asic reset is required, false if not.
3662  */
3663 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
3664 {
3665 	int i;
3666 
3667 	if (amdgpu_asic_need_full_reset(adev))
3668 		return true;
3669 
3670 	for (i = 0; i < adev->num_ip_blocks; i++) {
3671 		if (!adev->ip_blocks[i].status.valid)
3672 			continue;
3673 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3674 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3675 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
3676 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3677 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3678 			if (adev->ip_blocks[i].status.hang) {
3679 				DRM_INFO("Some block need full reset!\n");
3680 				return true;
3681 			}
3682 		}
3683 	}
3684 	return false;
3685 }
3686 
3687 /**
3688  * amdgpu_device_ip_soft_reset - do a soft reset
3689  *
3690  * @adev: amdgpu_device pointer
3691  *
3692  * The list of all the hardware IPs that make up the asic is walked and the
3693  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
3694  * IP specific hardware or software state changes that are necessary to soft
3695  * reset the IP.
3696  * Returns 0 on success, negative error code on failure.
3697  */
3698 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
3699 {
3700 	int i, r = 0;
3701 
3702 	for (i = 0; i < adev->num_ip_blocks; i++) {
3703 		if (!adev->ip_blocks[i].status.valid)
3704 			continue;
3705 		if (adev->ip_blocks[i].status.hang &&
3706 		    adev->ip_blocks[i].version->funcs->soft_reset) {
3707 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
3708 			if (r)
3709 				return r;
3710 		}
3711 	}
3712 
3713 	return 0;
3714 }
3715 
3716 /**
3717  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3718  *
3719  * @adev: amdgpu_device pointer
3720  *
3721  * The list of all the hardware IPs that make up the asic is walked and the
3722  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
3723  * handles any IP specific hardware or software state changes that are
3724  * necessary after the IP has been soft reset.
3725  * Returns 0 on success, negative error code on failure.
3726  */
3727 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
3728 {
3729 	int i, r = 0;
3730 
3731 	for (i = 0; i < adev->num_ip_blocks; i++) {
3732 		if (!adev->ip_blocks[i].status.valid)
3733 			continue;
3734 		if (adev->ip_blocks[i].status.hang &&
3735 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
3736 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
3737 		if (r)
3738 			return r;
3739 	}
3740 
3741 	return 0;
3742 }
3743 
3744 /**
3745  * amdgpu_device_recover_vram - Recover some VRAM contents
3746  *
3747  * @adev: amdgpu_device pointer
3748  *
3749  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
3750  * restore things like GPUVM page tables after a GPU reset where
3751  * the contents of VRAM might be lost.
3752  *
3753  * Returns:
3754  * 0 on success, negative error code on failure.
3755  */
3756 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
3757 {
3758 	struct dma_fence *fence = NULL, *next = NULL;
3759 	struct amdgpu_bo *shadow;
3760 	long r = 1, tmo;
3761 
3762 	if (amdgpu_sriov_runtime(adev))
3763 		tmo = msecs_to_jiffies(8000);
3764 	else
3765 		tmo = msecs_to_jiffies(100);
3766 
3767 	DRM_INFO("recover vram bo from shadow start\n");
3768 	mutex_lock(&adev->shadow_list_lock);
3769 	list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3770 
3771 		/* No need to recover an evicted BO */
3772 		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3773 		    shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
3774 		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3775 			continue;
3776 
3777 		r = amdgpu_bo_restore_shadow(shadow, &next);
3778 		if (r)
3779 			break;
3780 
3781 		if (fence) {
3782 			tmo = dma_fence_wait_timeout(fence, false, tmo);
3783 			dma_fence_put(fence);
3784 			fence = next;
3785 			if (tmo == 0) {
3786 				r = -ETIMEDOUT;
3787 				break;
3788 			} else if (tmo < 0) {
3789 				r = tmo;
3790 				break;
3791 			}
3792 		} else {
3793 			fence = next;
3794 		}
3795 	}
3796 	mutex_unlock(&adev->shadow_list_lock);
3797 
3798 	if (fence)
3799 		tmo = dma_fence_wait_timeout(fence, false, tmo);
3800 	dma_fence_put(fence);
3801 
3802 	if (r < 0 || tmo <= 0) {
3803 		DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
3804 		return -EIO;
3805 	}
3806 
3807 	DRM_INFO("recover vram bo from shadow done\n");
3808 	return 0;
3809 }
3810 
3811 
3812 /**
3813  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
3814  *
3815  * @adev: amdgpu device pointer
3816  * @from_hypervisor: request from hypervisor
3817  *
3818  * do VF FLR and reinitialize Asic
3819  * return 0 means succeeded otherwise failed
3820  */
3821 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3822 				     bool from_hypervisor)
3823 {
3824 	int r;
3825 
3826 	if (from_hypervisor)
3827 		r = amdgpu_virt_request_full_gpu(adev, true);
3828 	else
3829 		r = amdgpu_virt_reset_gpu(adev);
3830 	if (r)
3831 		return r;
3832 
3833 	amdgpu_amdkfd_pre_reset(adev);
3834 
3835 	/* Resume IP prior to SMC */
3836 	r = amdgpu_device_ip_reinit_early_sriov(adev);
3837 	if (r)
3838 		goto error;
3839 
3840 	amdgpu_virt_init_data_exchange(adev);
3841 	/* we need recover gart prior to run SMC/CP/SDMA resume */
3842 	amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
3843 
3844 	r = amdgpu_device_fw_loading(adev);
3845 	if (r)
3846 		return r;
3847 
3848 	/* now we are okay to resume SMC/CP/SDMA */
3849 	r = amdgpu_device_ip_reinit_late_sriov(adev);
3850 	if (r)
3851 		goto error;
3852 
3853 	amdgpu_irq_gpu_reset_resume_helper(adev);
3854 	r = amdgpu_ib_ring_tests(adev);
3855 	amdgpu_amdkfd_post_reset(adev);
3856 
3857 error:
3858 	amdgpu_virt_release_full_gpu(adev, true);
3859 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3860 		amdgpu_inc_vram_lost(adev);
3861 		r = amdgpu_device_recover_vram(adev);
3862 	}
3863 
3864 	return r;
3865 }
3866 
3867 /**
3868  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3869  *
3870  * @adev: amdgpu device pointer
3871  *
3872  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3873  * a hung GPU.
3874  */
3875 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3876 {
3877 	if (!amdgpu_device_ip_check_soft_reset(adev)) {
3878 		DRM_INFO("Timeout, but no hardware hang detected.\n");
3879 		return false;
3880 	}
3881 
3882 	if (amdgpu_gpu_recovery == 0)
3883 		goto disabled;
3884 
3885 	if (amdgpu_sriov_vf(adev))
3886 		return true;
3887 
3888 	if (amdgpu_gpu_recovery == -1) {
3889 		switch (adev->asic_type) {
3890 		case CHIP_BONAIRE:
3891 		case CHIP_HAWAII:
3892 		case CHIP_TOPAZ:
3893 		case CHIP_TONGA:
3894 		case CHIP_FIJI:
3895 		case CHIP_POLARIS10:
3896 		case CHIP_POLARIS11:
3897 		case CHIP_POLARIS12:
3898 		case CHIP_VEGAM:
3899 		case CHIP_VEGA20:
3900 		case CHIP_VEGA10:
3901 		case CHIP_VEGA12:
3902 		case CHIP_RAVEN:
3903 		case CHIP_ARCTURUS:
3904 		case CHIP_RENOIR:
3905 		case CHIP_NAVI10:
3906 		case CHIP_NAVI14:
3907 		case CHIP_NAVI12:
3908 			break;
3909 		default:
3910 			goto disabled;
3911 		}
3912 	}
3913 
3914 	return true;
3915 
3916 disabled:
3917 		DRM_INFO("GPU recovery disabled.\n");
3918 		return false;
3919 }
3920 
3921 
3922 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3923 					struct amdgpu_job *job,
3924 					bool *need_full_reset_arg)
3925 {
3926 	int i, r = 0;
3927 	bool need_full_reset  = *need_full_reset_arg;
3928 
3929 	amdgpu_debugfs_wait_dump(adev);
3930 
3931 	/* block all schedulers and reset given job's ring */
3932 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3933 		struct amdgpu_ring *ring = adev->rings[i];
3934 
3935 		if (!ring || !ring->sched.thread)
3936 			continue;
3937 
3938 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3939 		amdgpu_fence_driver_force_completion(ring);
3940 	}
3941 
3942 	if(job)
3943 		drm_sched_increase_karma(&job->base);
3944 
3945 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
3946 	if (!amdgpu_sriov_vf(adev)) {
3947 
3948 		if (!need_full_reset)
3949 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3950 
3951 		if (!need_full_reset) {
3952 			amdgpu_device_ip_pre_soft_reset(adev);
3953 			r = amdgpu_device_ip_soft_reset(adev);
3954 			amdgpu_device_ip_post_soft_reset(adev);
3955 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3956 				DRM_INFO("soft reset failed, will fallback to full reset!\n");
3957 				need_full_reset = true;
3958 			}
3959 		}
3960 
3961 		if (need_full_reset)
3962 			r = amdgpu_device_ip_suspend(adev);
3963 
3964 		*need_full_reset_arg = need_full_reset;
3965 	}
3966 
3967 	return r;
3968 }
3969 
3970 static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3971 			       struct list_head *device_list_handle,
3972 			       bool *need_full_reset_arg)
3973 {
3974 	struct amdgpu_device *tmp_adev = NULL;
3975 	bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3976 	int r = 0;
3977 
3978 	/*
3979 	 * ASIC reset has to be done on all HGMI hive nodes ASAP
3980 	 * to allow proper links negotiation in FW (within 1 sec)
3981 	 */
3982 	if (need_full_reset) {
3983 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3984 			/* For XGMI run all resets in parallel to speed up the process */
3985 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3986 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
3987 					r = -EALREADY;
3988 			} else
3989 				r = amdgpu_asic_reset(tmp_adev);
3990 
3991 			if (r) {
3992 				DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3993 					 r, tmp_adev->ddev->unique);
3994 				break;
3995 			}
3996 		}
3997 
3998 		/* For XGMI wait for all resets to complete before proceed */
3999 		if (!r) {
4000 			list_for_each_entry(tmp_adev, device_list_handle,
4001 					    gmc.xgmi.head) {
4002 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4003 					flush_work(&tmp_adev->xgmi_reset_work);
4004 					r = tmp_adev->asic_reset_res;
4005 					if (r)
4006 						break;
4007 				}
4008 			}
4009 		}
4010 	}
4011 
4012 	if (!r && amdgpu_ras_intr_triggered()) {
4013 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4014 			if (tmp_adev->mmhub.funcs &&
4015 			    tmp_adev->mmhub.funcs->reset_ras_error_count)
4016 				tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4017 		}
4018 
4019 		amdgpu_ras_intr_cleared();
4020 	}
4021 
4022 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4023 		if (need_full_reset) {
4024 			/* post card */
4025 			if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4026 				DRM_WARN("asic atom init failed!");
4027 
4028 			if (!r) {
4029 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4030 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
4031 				if (r)
4032 					goto out;
4033 
4034 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4035 				if (vram_lost) {
4036 					DRM_INFO("VRAM is lost due to GPU reset!\n");
4037 					amdgpu_inc_vram_lost(tmp_adev);
4038 				}
4039 
4040 				r = amdgpu_gtt_mgr_recover(
4041 					&tmp_adev->mman.bdev.man[TTM_PL_TT]);
4042 				if (r)
4043 					goto out;
4044 
4045 				r = amdgpu_device_fw_loading(tmp_adev);
4046 				if (r)
4047 					return r;
4048 
4049 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
4050 				if (r)
4051 					goto out;
4052 
4053 				if (vram_lost)
4054 					amdgpu_device_fill_reset_magic(tmp_adev);
4055 
4056 				/*
4057 				 * Add this ASIC as tracked as reset was already
4058 				 * complete successfully.
4059 				 */
4060 				amdgpu_register_gpu_instance(tmp_adev);
4061 
4062 				r = amdgpu_device_ip_late_init(tmp_adev);
4063 				if (r)
4064 					goto out;
4065 
4066 				amdgpu_fbdev_set_suspend(tmp_adev, 0);
4067 
4068 				/* must succeed. */
4069 				amdgpu_ras_resume(tmp_adev);
4070 
4071 				/* Update PSP FW topology after reset */
4072 				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4073 					r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4074 			}
4075 		}
4076 
4077 
4078 out:
4079 		if (!r) {
4080 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4081 			r = amdgpu_ib_ring_tests(tmp_adev);
4082 			if (r) {
4083 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4084 				r = amdgpu_device_ip_suspend(tmp_adev);
4085 				need_full_reset = true;
4086 				r = -EAGAIN;
4087 				goto end;
4088 			}
4089 		}
4090 
4091 		if (!r)
4092 			r = amdgpu_device_recover_vram(tmp_adev);
4093 		else
4094 			tmp_adev->asic_reset_res = r;
4095 	}
4096 
4097 end:
4098 	*need_full_reset_arg = need_full_reset;
4099 	return r;
4100 }
4101 
4102 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
4103 {
4104 	if (trylock) {
4105 		if (!mutex_trylock(&adev->lock_reset))
4106 			return false;
4107 	} else
4108 		mutex_lock(&adev->lock_reset);
4109 
4110 	atomic_inc(&adev->gpu_reset_counter);
4111 	adev->in_gpu_reset = true;
4112 	switch (amdgpu_asic_reset_method(adev)) {
4113 	case AMD_RESET_METHOD_MODE1:
4114 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4115 		break;
4116 	case AMD_RESET_METHOD_MODE2:
4117 		adev->mp1_state = PP_MP1_STATE_RESET;
4118 		break;
4119 	default:
4120 		adev->mp1_state = PP_MP1_STATE_NONE;
4121 		break;
4122 	}
4123 
4124 	return true;
4125 }
4126 
4127 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4128 {
4129 	amdgpu_vf_error_trans_all(adev);
4130 	adev->mp1_state = PP_MP1_STATE_NONE;
4131 	adev->in_gpu_reset = false;
4132 	mutex_unlock(&adev->lock_reset);
4133 }
4134 
4135 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4136 {
4137 	struct pci_dev *p = NULL;
4138 
4139 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4140 			adev->pdev->bus->number, 1);
4141 	if (p) {
4142 		pm_runtime_enable(&(p->dev));
4143 		pm_runtime_resume(&(p->dev));
4144 	}
4145 }
4146 
4147 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4148 {
4149 	enum amd_reset_method reset_method;
4150 	struct pci_dev *p = NULL;
4151 	u64 expires;
4152 
4153 	/*
4154 	 * For now, only BACO and mode1 reset are confirmed
4155 	 * to suffer the audio issue without proper suspended.
4156 	 */
4157 	reset_method = amdgpu_asic_reset_method(adev);
4158 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
4159 	     (reset_method != AMD_RESET_METHOD_MODE1))
4160 		return -EINVAL;
4161 
4162 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4163 			adev->pdev->bus->number, 1);
4164 	if (!p)
4165 		return -ENODEV;
4166 
4167 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
4168 	if (!expires)
4169 		/*
4170 		 * If we cannot get the audio device autosuspend delay,
4171 		 * a fixed 4S interval will be used. Considering 3S is
4172 		 * the audio controller default autosuspend delay setting.
4173 		 * 4S used here is guaranteed to cover that.
4174 		 */
4175 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
4176 
4177 	while (!pm_runtime_status_suspended(&(p->dev))) {
4178 		if (!pm_runtime_suspend(&(p->dev)))
4179 			break;
4180 
4181 		if (expires < ktime_get_mono_fast_ns()) {
4182 			dev_warn(adev->dev, "failed to suspend display audio\n");
4183 			/* TODO: abort the succeeding gpu reset? */
4184 			return -ETIMEDOUT;
4185 		}
4186 	}
4187 
4188 	pm_runtime_disable(&(p->dev));
4189 
4190 	return 0;
4191 }
4192 
4193 /**
4194  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4195  *
4196  * @adev: amdgpu device pointer
4197  * @job: which job trigger hang
4198  *
4199  * Attempt to reset the GPU if it has hung (all asics).
4200  * Attempt to do soft-reset or full-reset and reinitialize Asic
4201  * Returns 0 for success or an error on failure.
4202  */
4203 
4204 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4205 			      struct amdgpu_job *job)
4206 {
4207 	struct list_head device_list, *device_list_handle =  NULL;
4208 	bool need_full_reset = false;
4209 	bool job_signaled = false;
4210 	struct amdgpu_hive_info *hive = NULL;
4211 	struct amdgpu_device *tmp_adev = NULL;
4212 	int i, r = 0;
4213 	bool in_ras_intr = amdgpu_ras_intr_triggered();
4214 	bool use_baco =
4215 		(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4216 		true : false;
4217 	bool audio_suspended = false;
4218 
4219 	/*
4220 	 * Flush RAM to disk so that after reboot
4221 	 * the user can read log and see why the system rebooted.
4222 	 */
4223 	if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
4224 
4225 		DRM_WARN("Emergency reboot.");
4226 
4227 		ksys_sync_helper();
4228 		emergency_restart();
4229 	}
4230 
4231 	dev_info(adev->dev, "GPU %s begin!\n",
4232 		(in_ras_intr && !use_baco) ? "jobs stop":"reset");
4233 
4234 	/*
4235 	 * Here we trylock to avoid chain of resets executing from
4236 	 * either trigger by jobs on different adevs in XGMI hive or jobs on
4237 	 * different schedulers for same device while this TO handler is running.
4238 	 * We always reset all schedulers for device and all devices for XGMI
4239 	 * hive so that should take care of them too.
4240 	 */
4241 	hive = amdgpu_get_xgmi_hive(adev, true);
4242 	if (hive && !mutex_trylock(&hive->reset_lock)) {
4243 		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4244 			  job ? job->base.id : -1, hive->hive_id);
4245 		mutex_unlock(&hive->hive_lock);
4246 		return 0;
4247 	}
4248 
4249 	/*
4250 	 * Build list of devices to reset.
4251 	 * In case we are in XGMI hive mode, resort the device list
4252 	 * to put adev in the 1st position.
4253 	 */
4254 	INIT_LIST_HEAD(&device_list);
4255 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
4256 		if (!hive)
4257 			return -ENODEV;
4258 		if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4259 			list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
4260 		device_list_handle = &hive->device_list;
4261 	} else {
4262 		list_add_tail(&adev->gmc.xgmi.head, &device_list);
4263 		device_list_handle = &device_list;
4264 	}
4265 
4266 	/* block all schedulers and reset given job's ring */
4267 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4268 		if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
4269 			DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
4270 				  job ? job->base.id : -1);
4271 			mutex_unlock(&hive->hive_lock);
4272 			return 0;
4273 		}
4274 
4275 		/*
4276 		 * Try to put the audio codec into suspend state
4277 		 * before gpu reset started.
4278 		 *
4279 		 * Due to the power domain of the graphics device
4280 		 * is shared with AZ power domain. Without this,
4281 		 * we may change the audio hardware from behind
4282 		 * the audio driver's back. That will trigger
4283 		 * some audio codec errors.
4284 		 */
4285 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
4286 			audio_suspended = true;
4287 
4288 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
4289 
4290 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4291 
4292 		if (!amdgpu_sriov_vf(tmp_adev))
4293 			amdgpu_amdkfd_pre_reset(tmp_adev);
4294 
4295 		/*
4296 		 * Mark these ASICs to be reseted as untracked first
4297 		 * And add them back after reset completed
4298 		 */
4299 		amdgpu_unregister_gpu_instance(tmp_adev);
4300 
4301 		amdgpu_fbdev_set_suspend(tmp_adev, 1);
4302 
4303 		/* disable ras on ALL IPs */
4304 		if (!(in_ras_intr && !use_baco) &&
4305 		      amdgpu_device_ip_need_full_reset(tmp_adev))
4306 			amdgpu_ras_suspend(tmp_adev);
4307 
4308 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4309 			struct amdgpu_ring *ring = tmp_adev->rings[i];
4310 
4311 			if (!ring || !ring->sched.thread)
4312 				continue;
4313 
4314 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
4315 
4316 			if (in_ras_intr && !use_baco)
4317 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
4318 		}
4319 	}
4320 
4321 	if (in_ras_intr && !use_baco)
4322 		goto skip_sched_resume;
4323 
4324 	/*
4325 	 * Must check guilty signal here since after this point all old
4326 	 * HW fences are force signaled.
4327 	 *
4328 	 * job->base holds a reference to parent fence
4329 	 */
4330 	if (job && job->base.s_fence->parent &&
4331 	    dma_fence_is_signaled(job->base.s_fence->parent)) {
4332 		job_signaled = true;
4333 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4334 		goto skip_hw_reset;
4335 	}
4336 
4337 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
4338 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4339 		r = amdgpu_device_pre_asic_reset(tmp_adev,
4340 						 NULL,
4341 						 &need_full_reset);
4342 		/*TODO Should we stop ?*/
4343 		if (r) {
4344 			DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4345 				  r, tmp_adev->ddev->unique);
4346 			tmp_adev->asic_reset_res = r;
4347 		}
4348 	}
4349 
4350 	/* Actual ASIC resets if needed.*/
4351 	/* TODO Implement XGMI hive reset logic for SRIOV */
4352 	if (amdgpu_sriov_vf(adev)) {
4353 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
4354 		if (r)
4355 			adev->asic_reset_res = r;
4356 	} else {
4357 		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
4358 		if (r && r == -EAGAIN)
4359 			goto retry;
4360 	}
4361 
4362 skip_hw_reset:
4363 
4364 	/* Post ASIC reset for all devs .*/
4365 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4366 
4367 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4368 			struct amdgpu_ring *ring = tmp_adev->rings[i];
4369 
4370 			if (!ring || !ring->sched.thread)
4371 				continue;
4372 
4373 			/* No point to resubmit jobs if we didn't HW reset*/
4374 			if (!tmp_adev->asic_reset_res && !job_signaled)
4375 				drm_sched_resubmit_jobs(&ring->sched);
4376 
4377 			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4378 		}
4379 
4380 		if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4381 			drm_helper_resume_force_mode(tmp_adev->ddev);
4382 		}
4383 
4384 		tmp_adev->asic_reset_res = 0;
4385 
4386 		if (r) {
4387 			/* bad news, how to tell it to userspace ? */
4388 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
4389 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4390 		} else {
4391 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
4392 		}
4393 	}
4394 
4395 skip_sched_resume:
4396 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4397 		/*unlock kfd: SRIOV would do it separately */
4398 		if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
4399 	                amdgpu_amdkfd_post_reset(tmp_adev);
4400 		if (audio_suspended)
4401 			amdgpu_device_resume_display_audio(tmp_adev);
4402 		amdgpu_device_unlock_adev(tmp_adev);
4403 	}
4404 
4405 	if (hive) {
4406 		mutex_unlock(&hive->reset_lock);
4407 		mutex_unlock(&hive->hive_lock);
4408 	}
4409 
4410 	if (r)
4411 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
4412 	return r;
4413 }
4414 
4415 /**
4416  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4417  *
4418  * @adev: amdgpu_device pointer
4419  *
4420  * Fetchs and stores in the driver the PCIE capabilities (gen speed
4421  * and lanes) of the slot the device is in. Handles APUs and
4422  * virtualized environments where PCIE config space may not be available.
4423  */
4424 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
4425 {
4426 	struct pci_dev *pdev;
4427 	enum pci_bus_speed speed_cap, platform_speed_cap;
4428 	enum pcie_link_width platform_link_width;
4429 
4430 	if (amdgpu_pcie_gen_cap)
4431 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
4432 
4433 	if (amdgpu_pcie_lane_cap)
4434 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
4435 
4436 	/* covers APUs as well */
4437 	if (pci_is_root_bus(adev->pdev->bus)) {
4438 		if (adev->pm.pcie_gen_mask == 0)
4439 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4440 		if (adev->pm.pcie_mlw_mask == 0)
4441 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
4442 		return;
4443 	}
4444 
4445 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4446 		return;
4447 
4448 	pcie_bandwidth_available(adev->pdev, NULL,
4449 				 &platform_speed_cap, &platform_link_width);
4450 
4451 	if (adev->pm.pcie_gen_mask == 0) {
4452 		/* asic caps */
4453 		pdev = adev->pdev;
4454 		speed_cap = pcie_get_speed_cap(pdev);
4455 		if (speed_cap == PCI_SPEED_UNKNOWN) {
4456 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4457 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4458 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4459 		} else {
4460 			if (speed_cap == PCIE_SPEED_16_0GT)
4461 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4462 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4463 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4464 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4465 			else if (speed_cap == PCIE_SPEED_8_0GT)
4466 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4467 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4468 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4469 			else if (speed_cap == PCIE_SPEED_5_0GT)
4470 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4471 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4472 			else
4473 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4474 		}
4475 		/* platform caps */
4476 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
4477 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4478 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4479 		} else {
4480 			if (platform_speed_cap == PCIE_SPEED_16_0GT)
4481 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4482 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4483 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4484 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
4485 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
4486 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4487 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4488 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
4489 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
4490 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4491 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4492 			else
4493 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4494 
4495 		}
4496 	}
4497 	if (adev->pm.pcie_mlw_mask == 0) {
4498 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
4499 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4500 		} else {
4501 			switch (platform_link_width) {
4502 			case PCIE_LNK_X32:
4503 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4504 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4505 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4506 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4507 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4508 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4509 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4510 				break;
4511 			case PCIE_LNK_X16:
4512 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4513 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4514 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4515 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4516 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4517 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4518 				break;
4519 			case PCIE_LNK_X12:
4520 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4521 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4522 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4523 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4524 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4525 				break;
4526 			case PCIE_LNK_X8:
4527 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4528 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4529 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4530 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4531 				break;
4532 			case PCIE_LNK_X4:
4533 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4534 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4535 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4536 				break;
4537 			case PCIE_LNK_X2:
4538 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4539 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4540 				break;
4541 			case PCIE_LNK_X1:
4542 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4543 				break;
4544 			default:
4545 				break;
4546 			}
4547 		}
4548 	}
4549 }
4550 
4551 int amdgpu_device_baco_enter(struct drm_device *dev)
4552 {
4553 	struct amdgpu_device *adev = dev->dev_private;
4554 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4555 
4556 	if (!amdgpu_device_supports_baco(adev->ddev))
4557 		return -ENOTSUPP;
4558 
4559 	if (ras && ras->supported)
4560 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4561 
4562 	return amdgpu_dpm_baco_enter(adev);
4563 }
4564 
4565 int amdgpu_device_baco_exit(struct drm_device *dev)
4566 {
4567 	struct amdgpu_device *adev = dev->dev_private;
4568 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4569 	int ret = 0;
4570 
4571 	if (!amdgpu_device_supports_baco(adev->ddev))
4572 		return -ENOTSUPP;
4573 
4574 	ret = amdgpu_dpm_baco_exit(adev);
4575 	if (ret)
4576 		return ret;
4577 
4578 	if (ras && ras->supported)
4579 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4580 
4581 	return 0;
4582 }
4583