1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/console.h>
31 #include <linux/slab.h>
32 #include <drm/drmP.h>
33 #include <drm/drm_atomic_helper.h>
34 #include <drm/drm_probe_helper.h>
35 #include <drm/amdgpu_drm.h>
36 #include <linux/vgaarb.h>
37 #include <linux/vga_switcheroo.h>
38 #include <linux/efi.h>
39 #include "amdgpu.h"
40 #include "amdgpu_trace.h"
41 #include "amdgpu_i2c.h"
42 #include "atom.h"
43 #include "amdgpu_atombios.h"
44 #include "amdgpu_atomfirmware.h"
45 #include "amd_pcie.h"
46 #ifdef CONFIG_DRM_AMDGPU_SI
47 #include "si.h"
48 #endif
49 #ifdef CONFIG_DRM_AMDGPU_CIK
50 #include "cik.h"
51 #endif
52 #include "vi.h"
53 #include "soc15.h"
54 #include "bif/bif_4_1_d.h"
55 #include <linux/pci.h>
56 #include <linux/firmware.h>
57 #include "amdgpu_vf_error.h"
58 
59 #include "amdgpu_amdkfd.h"
60 #include "amdgpu_pm.h"
61 
62 #include "amdgpu_xgmi.h"
63 #include "amdgpu_ras.h"
64 #include "amdgpu_pmu.h"
65 
66 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
67 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
68 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
69 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
70 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
71 
72 #define AMDGPU_RESUME_MS		2000
73 
74 static const char *amdgpu_asic_name[] = {
75 	"TAHITI",
76 	"PITCAIRN",
77 	"VERDE",
78 	"OLAND",
79 	"HAINAN",
80 	"BONAIRE",
81 	"KAVERI",
82 	"KABINI",
83 	"HAWAII",
84 	"MULLINS",
85 	"TOPAZ",
86 	"TONGA",
87 	"FIJI",
88 	"CARRIZO",
89 	"STONEY",
90 	"POLARIS10",
91 	"POLARIS11",
92 	"POLARIS12",
93 	"VEGAM",
94 	"VEGA10",
95 	"VEGA12",
96 	"VEGA20",
97 	"RAVEN",
98 	"LAST",
99 };
100 
101 /**
102  * DOC: pcie_replay_count
103  *
104  * The amdgpu driver provides a sysfs API for reporting the total number
105  * of PCIe replays (NAKs)
106  * The file pcie_replay_count is used for this and returns the total
107  * number of replays as a sum of the NAKs generated and NAKs received
108  */
109 
110 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
111 		struct device_attribute *attr, char *buf)
112 {
113 	struct drm_device *ddev = dev_get_drvdata(dev);
114 	struct amdgpu_device *adev = ddev->dev_private;
115 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
116 
117 	return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
118 }
119 
120 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
121 		amdgpu_device_get_pcie_replay_count, NULL);
122 
123 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
124 
125 /**
126  * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
127  *
128  * @dev: drm_device pointer
129  *
130  * Returns true if the device is a dGPU with HG/PX power control,
131  * otherwise return false.
132  */
133 bool amdgpu_device_is_px(struct drm_device *dev)
134 {
135 	struct amdgpu_device *adev = dev->dev_private;
136 
137 	if (adev->flags & AMD_IS_PX)
138 		return true;
139 	return false;
140 }
141 
142 /*
143  * MMIO register access helper functions.
144  */
145 /**
146  * amdgpu_mm_rreg - read a memory mapped IO register
147  *
148  * @adev: amdgpu_device pointer
149  * @reg: dword aligned register offset
150  * @acc_flags: access flags which require special behavior
151  *
152  * Returns the 32 bit value from the offset specified.
153  */
154 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
155 			uint32_t acc_flags)
156 {
157 	uint32_t ret;
158 
159 	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
160 		return amdgpu_virt_kiq_rreg(adev, reg);
161 
162 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
163 		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
164 	else {
165 		unsigned long flags;
166 
167 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
168 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
169 		ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
170 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
171 	}
172 	trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
173 	return ret;
174 }
175 
176 /*
177  * MMIO register read with bytes helper functions
178  * @offset:bytes offset from MMIO start
179  *
180 */
181 
182 /**
183  * amdgpu_mm_rreg8 - read a memory mapped IO register
184  *
185  * @adev: amdgpu_device pointer
186  * @offset: byte aligned register offset
187  *
188  * Returns the 8 bit value from the offset specified.
189  */
190 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
191 	if (offset < adev->rmmio_size)
192 		return (readb(adev->rmmio + offset));
193 	BUG();
194 }
195 
196 /*
197  * MMIO register write with bytes helper functions
198  * @offset:bytes offset from MMIO start
199  * @value: the value want to be written to the register
200  *
201 */
202 /**
203  * amdgpu_mm_wreg8 - read a memory mapped IO register
204  *
205  * @adev: amdgpu_device pointer
206  * @offset: byte aligned register offset
207  * @value: 8 bit value to write
208  *
209  * Writes the value specified to the offset specified.
210  */
211 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
212 	if (offset < adev->rmmio_size)
213 		writeb(value, adev->rmmio + offset);
214 	else
215 		BUG();
216 }
217 
218 /**
219  * amdgpu_mm_wreg - write to a memory mapped IO register
220  *
221  * @adev: amdgpu_device pointer
222  * @reg: dword aligned register offset
223  * @v: 32 bit value to write to the register
224  * @acc_flags: access flags which require special behavior
225  *
226  * Writes the value specified to the offset specified.
227  */
228 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
229 		    uint32_t acc_flags)
230 {
231 	trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
232 
233 	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
234 		adev->last_mm_index = v;
235 	}
236 
237 	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
238 		return amdgpu_virt_kiq_wreg(adev, reg, v);
239 
240 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
241 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
242 	else {
243 		unsigned long flags;
244 
245 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
246 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
247 		writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
248 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
249 	}
250 
251 	if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
252 		udelay(500);
253 	}
254 }
255 
256 /**
257  * amdgpu_io_rreg - read an IO register
258  *
259  * @adev: amdgpu_device pointer
260  * @reg: dword aligned register offset
261  *
262  * Returns the 32 bit value from the offset specified.
263  */
264 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
265 {
266 	if ((reg * 4) < adev->rio_mem_size)
267 		return ioread32(adev->rio_mem + (reg * 4));
268 	else {
269 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
270 		return ioread32(adev->rio_mem + (mmMM_DATA * 4));
271 	}
272 }
273 
274 /**
275  * amdgpu_io_wreg - write to an IO register
276  *
277  * @adev: amdgpu_device pointer
278  * @reg: dword aligned register offset
279  * @v: 32 bit value to write to the register
280  *
281  * Writes the value specified to the offset specified.
282  */
283 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
284 {
285 	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
286 		adev->last_mm_index = v;
287 	}
288 
289 	if ((reg * 4) < adev->rio_mem_size)
290 		iowrite32(v, adev->rio_mem + (reg * 4));
291 	else {
292 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
293 		iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
294 	}
295 
296 	if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
297 		udelay(500);
298 	}
299 }
300 
301 /**
302  * amdgpu_mm_rdoorbell - read a doorbell dword
303  *
304  * @adev: amdgpu_device pointer
305  * @index: doorbell index
306  *
307  * Returns the value in the doorbell aperture at the
308  * requested doorbell index (CIK).
309  */
310 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
311 {
312 	if (index < adev->doorbell.num_doorbells) {
313 		return readl(adev->doorbell.ptr + index);
314 	} else {
315 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
316 		return 0;
317 	}
318 }
319 
320 /**
321  * amdgpu_mm_wdoorbell - write a doorbell dword
322  *
323  * @adev: amdgpu_device pointer
324  * @index: doorbell index
325  * @v: value to write
326  *
327  * Writes @v to the doorbell aperture at the
328  * requested doorbell index (CIK).
329  */
330 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
331 {
332 	if (index < adev->doorbell.num_doorbells) {
333 		writel(v, adev->doorbell.ptr + index);
334 	} else {
335 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
336 	}
337 }
338 
339 /**
340  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
341  *
342  * @adev: amdgpu_device pointer
343  * @index: doorbell index
344  *
345  * Returns the value in the doorbell aperture at the
346  * requested doorbell index (VEGA10+).
347  */
348 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
349 {
350 	if (index < adev->doorbell.num_doorbells) {
351 		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
352 	} else {
353 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
354 		return 0;
355 	}
356 }
357 
358 /**
359  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
360  *
361  * @adev: amdgpu_device pointer
362  * @index: doorbell index
363  * @v: value to write
364  *
365  * Writes @v to the doorbell aperture at the
366  * requested doorbell index (VEGA10+).
367  */
368 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
369 {
370 	if (index < adev->doorbell.num_doorbells) {
371 		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
372 	} else {
373 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
374 	}
375 }
376 
377 /**
378  * amdgpu_invalid_rreg - dummy reg read function
379  *
380  * @adev: amdgpu device pointer
381  * @reg: offset of register
382  *
383  * Dummy register read function.  Used for register blocks
384  * that certain asics don't have (all asics).
385  * Returns the value in the register.
386  */
387 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
388 {
389 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
390 	BUG();
391 	return 0;
392 }
393 
394 /**
395  * amdgpu_invalid_wreg - dummy reg write function
396  *
397  * @adev: amdgpu device pointer
398  * @reg: offset of register
399  * @v: value to write to the register
400  *
401  * Dummy register read function.  Used for register blocks
402  * that certain asics don't have (all asics).
403  */
404 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
405 {
406 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
407 		  reg, v);
408 	BUG();
409 }
410 
411 /**
412  * amdgpu_block_invalid_rreg - dummy reg read function
413  *
414  * @adev: amdgpu device pointer
415  * @block: offset of instance
416  * @reg: offset of register
417  *
418  * Dummy register read function.  Used for register blocks
419  * that certain asics don't have (all asics).
420  * Returns the value in the register.
421  */
422 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
423 					  uint32_t block, uint32_t reg)
424 {
425 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
426 		  reg, block);
427 	BUG();
428 	return 0;
429 }
430 
431 /**
432  * amdgpu_block_invalid_wreg - dummy reg write function
433  *
434  * @adev: amdgpu device pointer
435  * @block: offset of instance
436  * @reg: offset of register
437  * @v: value to write to the register
438  *
439  * Dummy register read function.  Used for register blocks
440  * that certain asics don't have (all asics).
441  */
442 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
443 				      uint32_t block,
444 				      uint32_t reg, uint32_t v)
445 {
446 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
447 		  reg, block, v);
448 	BUG();
449 }
450 
451 /**
452  * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
453  *
454  * @adev: amdgpu device pointer
455  *
456  * Allocates a scratch page of VRAM for use by various things in the
457  * driver.
458  */
459 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
460 {
461 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
462 				       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
463 				       &adev->vram_scratch.robj,
464 				       &adev->vram_scratch.gpu_addr,
465 				       (void **)&adev->vram_scratch.ptr);
466 }
467 
468 /**
469  * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
470  *
471  * @adev: amdgpu device pointer
472  *
473  * Frees the VRAM scratch page.
474  */
475 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
476 {
477 	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
478 }
479 
480 /**
481  * amdgpu_device_program_register_sequence - program an array of registers.
482  *
483  * @adev: amdgpu_device pointer
484  * @registers: pointer to the register array
485  * @array_size: size of the register array
486  *
487  * Programs an array or registers with and and or masks.
488  * This is a helper for setting golden registers.
489  */
490 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
491 					     const u32 *registers,
492 					     const u32 array_size)
493 {
494 	u32 tmp, reg, and_mask, or_mask;
495 	int i;
496 
497 	if (array_size % 3)
498 		return;
499 
500 	for (i = 0; i < array_size; i +=3) {
501 		reg = registers[i + 0];
502 		and_mask = registers[i + 1];
503 		or_mask = registers[i + 2];
504 
505 		if (and_mask == 0xffffffff) {
506 			tmp = or_mask;
507 		} else {
508 			tmp = RREG32(reg);
509 			tmp &= ~and_mask;
510 			tmp |= or_mask;
511 		}
512 		WREG32(reg, tmp);
513 	}
514 }
515 
516 /**
517  * amdgpu_device_pci_config_reset - reset the GPU
518  *
519  * @adev: amdgpu_device pointer
520  *
521  * Resets the GPU using the pci config reset sequence.
522  * Only applicable to asics prior to vega10.
523  */
524 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
525 {
526 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
527 }
528 
529 /*
530  * GPU doorbell aperture helpers function.
531  */
532 /**
533  * amdgpu_device_doorbell_init - Init doorbell driver information.
534  *
535  * @adev: amdgpu_device pointer
536  *
537  * Init doorbell driver information (CIK)
538  * Returns 0 on success, error on failure.
539  */
540 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
541 {
542 
543 	/* No doorbell on SI hardware generation */
544 	if (adev->asic_type < CHIP_BONAIRE) {
545 		adev->doorbell.base = 0;
546 		adev->doorbell.size = 0;
547 		adev->doorbell.num_doorbells = 0;
548 		adev->doorbell.ptr = NULL;
549 		return 0;
550 	}
551 
552 	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
553 		return -EINVAL;
554 
555 	amdgpu_asic_init_doorbell_index(adev);
556 
557 	/* doorbell bar mapping */
558 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
559 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
560 
561 	adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
562 					     adev->doorbell_index.max_assignment+1);
563 	if (adev->doorbell.num_doorbells == 0)
564 		return -EINVAL;
565 
566 	/* For Vega, reserve and map two pages on doorbell BAR since SDMA
567 	 * paging queue doorbell use the second page. The
568 	 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
569 	 * doorbells are in the first page. So with paging queue enabled,
570 	 * the max num_doorbells should + 1 page (0x400 in dword)
571 	 */
572 	if (adev->asic_type >= CHIP_VEGA10)
573 		adev->doorbell.num_doorbells += 0x400;
574 
575 	adev->doorbell.ptr = ioremap(adev->doorbell.base,
576 				     adev->doorbell.num_doorbells *
577 				     sizeof(u32));
578 	if (adev->doorbell.ptr == NULL)
579 		return -ENOMEM;
580 
581 	return 0;
582 }
583 
584 /**
585  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
586  *
587  * @adev: amdgpu_device pointer
588  *
589  * Tear down doorbell driver information (CIK)
590  */
591 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
592 {
593 	iounmap(adev->doorbell.ptr);
594 	adev->doorbell.ptr = NULL;
595 }
596 
597 
598 
599 /*
600  * amdgpu_device_wb_*()
601  * Writeback is the method by which the GPU updates special pages in memory
602  * with the status of certain GPU events (fences, ring pointers,etc.).
603  */
604 
605 /**
606  * amdgpu_device_wb_fini - Disable Writeback and free memory
607  *
608  * @adev: amdgpu_device pointer
609  *
610  * Disables Writeback and frees the Writeback memory (all asics).
611  * Used at driver shutdown.
612  */
613 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
614 {
615 	if (adev->wb.wb_obj) {
616 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
617 				      &adev->wb.gpu_addr,
618 				      (void **)&adev->wb.wb);
619 		adev->wb.wb_obj = NULL;
620 	}
621 }
622 
623 /**
624  * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
625  *
626  * @adev: amdgpu_device pointer
627  *
628  * Initializes writeback and allocates writeback memory (all asics).
629  * Used at driver startup.
630  * Returns 0 on success or an -error on failure.
631  */
632 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
633 {
634 	int r;
635 
636 	if (adev->wb.wb_obj == NULL) {
637 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
638 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
639 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
640 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
641 					    (void **)&adev->wb.wb);
642 		if (r) {
643 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
644 			return r;
645 		}
646 
647 		adev->wb.num_wb = AMDGPU_MAX_WB;
648 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
649 
650 		/* clear wb memory */
651 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
652 	}
653 
654 	return 0;
655 }
656 
657 /**
658  * amdgpu_device_wb_get - Allocate a wb entry
659  *
660  * @adev: amdgpu_device pointer
661  * @wb: wb index
662  *
663  * Allocate a wb slot for use by the driver (all asics).
664  * Returns 0 on success or -EINVAL on failure.
665  */
666 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
667 {
668 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
669 
670 	if (offset < adev->wb.num_wb) {
671 		__set_bit(offset, adev->wb.used);
672 		*wb = offset << 3; /* convert to dw offset */
673 		return 0;
674 	} else {
675 		return -EINVAL;
676 	}
677 }
678 
679 /**
680  * amdgpu_device_wb_free - Free a wb entry
681  *
682  * @adev: amdgpu_device pointer
683  * @wb: wb index
684  *
685  * Free a wb slot allocated for use by the driver (all asics)
686  */
687 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
688 {
689 	wb >>= 3;
690 	if (wb < adev->wb.num_wb)
691 		__clear_bit(wb, adev->wb.used);
692 }
693 
694 /**
695  * amdgpu_device_resize_fb_bar - try to resize FB BAR
696  *
697  * @adev: amdgpu_device pointer
698  *
699  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
700  * to fail, but if any of the BARs is not accessible after the size we abort
701  * driver loading by returning -ENODEV.
702  */
703 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
704 {
705 	u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
706 	u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
707 	struct pci_bus *root;
708 	struct resource *res;
709 	unsigned i;
710 	u16 cmd;
711 	int r;
712 
713 	/* Bypass for VF */
714 	if (amdgpu_sriov_vf(adev))
715 		return 0;
716 
717 	/* Check if the root BUS has 64bit memory resources */
718 	root = adev->pdev->bus;
719 	while (root->parent)
720 		root = root->parent;
721 
722 	pci_bus_for_each_resource(root, res, i) {
723 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
724 		    res->start > 0x100000000ull)
725 			break;
726 	}
727 
728 	/* Trying to resize is pointless without a root hub window above 4GB */
729 	if (!res)
730 		return 0;
731 
732 	/* Disable memory decoding while we change the BAR addresses and size */
733 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
734 	pci_write_config_word(adev->pdev, PCI_COMMAND,
735 			      cmd & ~PCI_COMMAND_MEMORY);
736 
737 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
738 	amdgpu_device_doorbell_fini(adev);
739 	if (adev->asic_type >= CHIP_BONAIRE)
740 		pci_release_resource(adev->pdev, 2);
741 
742 	pci_release_resource(adev->pdev, 0);
743 
744 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
745 	if (r == -ENOSPC)
746 		DRM_INFO("Not enough PCI address space for a large BAR.");
747 	else if (r && r != -ENOTSUPP)
748 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
749 
750 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
751 
752 	/* When the doorbell or fb BAR isn't available we have no chance of
753 	 * using the device.
754 	 */
755 	r = amdgpu_device_doorbell_init(adev);
756 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
757 		return -ENODEV;
758 
759 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
760 
761 	return 0;
762 }
763 
764 /*
765  * GPU helpers function.
766  */
767 /**
768  * amdgpu_device_need_post - check if the hw need post or not
769  *
770  * @adev: amdgpu_device pointer
771  *
772  * Check if the asic has been initialized (all asics) at driver startup
773  * or post is needed if  hw reset is performed.
774  * Returns true if need or false if not.
775  */
776 bool amdgpu_device_need_post(struct amdgpu_device *adev)
777 {
778 	uint32_t reg;
779 
780 	if (amdgpu_sriov_vf(adev))
781 		return false;
782 
783 	if (amdgpu_passthrough(adev)) {
784 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
785 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
786 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
787 		 * vpost executed for smc version below 22.15
788 		 */
789 		if (adev->asic_type == CHIP_FIJI) {
790 			int err;
791 			uint32_t fw_ver;
792 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
793 			/* force vPost if error occured */
794 			if (err)
795 				return true;
796 
797 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
798 			if (fw_ver < 0x00160e00)
799 				return true;
800 		}
801 	}
802 
803 	if (adev->has_hw_reset) {
804 		adev->has_hw_reset = false;
805 		return true;
806 	}
807 
808 	/* bios scratch used on CIK+ */
809 	if (adev->asic_type >= CHIP_BONAIRE)
810 		return amdgpu_atombios_scratch_need_asic_init(adev);
811 
812 	/* check MEM_SIZE for older asics */
813 	reg = amdgpu_asic_get_config_memsize(adev);
814 
815 	if ((reg != 0) && (reg != 0xffffffff))
816 		return false;
817 
818 	return true;
819 }
820 
821 /* if we get transitioned to only one device, take VGA back */
822 /**
823  * amdgpu_device_vga_set_decode - enable/disable vga decode
824  *
825  * @cookie: amdgpu_device pointer
826  * @state: enable/disable vga decode
827  *
828  * Enable/disable vga decode (all asics).
829  * Returns VGA resource flags.
830  */
831 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
832 {
833 	struct amdgpu_device *adev = cookie;
834 	amdgpu_asic_set_vga_state(adev, state);
835 	if (state)
836 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
837 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
838 	else
839 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
840 }
841 
842 /**
843  * amdgpu_device_check_block_size - validate the vm block size
844  *
845  * @adev: amdgpu_device pointer
846  *
847  * Validates the vm block size specified via module parameter.
848  * The vm block size defines number of bits in page table versus page directory,
849  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
850  * page table and the remaining bits are in the page directory.
851  */
852 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
853 {
854 	/* defines number of bits in page table versus page directory,
855 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
856 	 * page table and the remaining bits are in the page directory */
857 	if (amdgpu_vm_block_size == -1)
858 		return;
859 
860 	if (amdgpu_vm_block_size < 9) {
861 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
862 			 amdgpu_vm_block_size);
863 		amdgpu_vm_block_size = -1;
864 	}
865 }
866 
867 /**
868  * amdgpu_device_check_vm_size - validate the vm size
869  *
870  * @adev: amdgpu_device pointer
871  *
872  * Validates the vm size in GB specified via module parameter.
873  * The VM size is the size of the GPU virtual memory space in GB.
874  */
875 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
876 {
877 	/* no need to check the default value */
878 	if (amdgpu_vm_size == -1)
879 		return;
880 
881 	if (amdgpu_vm_size < 1) {
882 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
883 			 amdgpu_vm_size);
884 		amdgpu_vm_size = -1;
885 	}
886 }
887 
888 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
889 {
890 	struct sysinfo si;
891 	bool is_os_64 = (sizeof(void *) == 8) ? true : false;
892 	uint64_t total_memory;
893 	uint64_t dram_size_seven_GB = 0x1B8000000;
894 	uint64_t dram_size_three_GB = 0xB8000000;
895 
896 	if (amdgpu_smu_memory_pool_size == 0)
897 		return;
898 
899 	if (!is_os_64) {
900 		DRM_WARN("Not 64-bit OS, feature not supported\n");
901 		goto def_value;
902 	}
903 	si_meminfo(&si);
904 	total_memory = (uint64_t)si.totalram * si.mem_unit;
905 
906 	if ((amdgpu_smu_memory_pool_size == 1) ||
907 		(amdgpu_smu_memory_pool_size == 2)) {
908 		if (total_memory < dram_size_three_GB)
909 			goto def_value1;
910 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
911 		(amdgpu_smu_memory_pool_size == 8)) {
912 		if (total_memory < dram_size_seven_GB)
913 			goto def_value1;
914 	} else {
915 		DRM_WARN("Smu memory pool size not supported\n");
916 		goto def_value;
917 	}
918 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
919 
920 	return;
921 
922 def_value1:
923 	DRM_WARN("No enough system memory\n");
924 def_value:
925 	adev->pm.smu_prv_buffer_size = 0;
926 }
927 
928 /**
929  * amdgpu_device_check_arguments - validate module params
930  *
931  * @adev: amdgpu_device pointer
932  *
933  * Validates certain module parameters and updates
934  * the associated values used by the driver (all asics).
935  */
936 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
937 {
938 	int ret = 0;
939 
940 	if (amdgpu_sched_jobs < 4) {
941 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
942 			 amdgpu_sched_jobs);
943 		amdgpu_sched_jobs = 4;
944 	} else if (!is_power_of_2(amdgpu_sched_jobs)){
945 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
946 			 amdgpu_sched_jobs);
947 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
948 	}
949 
950 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
951 		/* gart size must be greater or equal to 32M */
952 		dev_warn(adev->dev, "gart size (%d) too small\n",
953 			 amdgpu_gart_size);
954 		amdgpu_gart_size = -1;
955 	}
956 
957 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
958 		/* gtt size must be greater or equal to 32M */
959 		dev_warn(adev->dev, "gtt size (%d) too small\n",
960 				 amdgpu_gtt_size);
961 		amdgpu_gtt_size = -1;
962 	}
963 
964 	/* valid range is between 4 and 9 inclusive */
965 	if (amdgpu_vm_fragment_size != -1 &&
966 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
967 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
968 		amdgpu_vm_fragment_size = -1;
969 	}
970 
971 	amdgpu_device_check_smu_prv_buffer_size(adev);
972 
973 	amdgpu_device_check_vm_size(adev);
974 
975 	amdgpu_device_check_block_size(adev);
976 
977 	ret = amdgpu_device_get_job_timeout_settings(adev);
978 	if (ret) {
979 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
980 		return ret;
981 	}
982 
983 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
984 
985 	return ret;
986 }
987 
988 /**
989  * amdgpu_switcheroo_set_state - set switcheroo state
990  *
991  * @pdev: pci dev pointer
992  * @state: vga_switcheroo state
993  *
994  * Callback for the switcheroo driver.  Suspends or resumes the
995  * the asics before or after it is powered up using ACPI methods.
996  */
997 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
998 {
999 	struct drm_device *dev = pci_get_drvdata(pdev);
1000 
1001 	if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
1002 		return;
1003 
1004 	if (state == VGA_SWITCHEROO_ON) {
1005 		pr_info("amdgpu: switched on\n");
1006 		/* don't suspend or resume card normally */
1007 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1008 
1009 		amdgpu_device_resume(dev, true, true);
1010 
1011 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1012 		drm_kms_helper_poll_enable(dev);
1013 	} else {
1014 		pr_info("amdgpu: switched off\n");
1015 		drm_kms_helper_poll_disable(dev);
1016 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1017 		amdgpu_device_suspend(dev, true, true);
1018 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1019 	}
1020 }
1021 
1022 /**
1023  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1024  *
1025  * @pdev: pci dev pointer
1026  *
1027  * Callback for the switcheroo driver.  Check of the switcheroo
1028  * state can be changed.
1029  * Returns true if the state can be changed, false if not.
1030  */
1031 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1032 {
1033 	struct drm_device *dev = pci_get_drvdata(pdev);
1034 
1035 	/*
1036 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1037 	* locking inversion with the driver load path. And the access here is
1038 	* completely racy anyway. So don't bother with locking for now.
1039 	*/
1040 	return dev->open_count == 0;
1041 }
1042 
1043 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1044 	.set_gpu_state = amdgpu_switcheroo_set_state,
1045 	.reprobe = NULL,
1046 	.can_switch = amdgpu_switcheroo_can_switch,
1047 };
1048 
1049 /**
1050  * amdgpu_device_ip_set_clockgating_state - set the CG state
1051  *
1052  * @dev: amdgpu_device pointer
1053  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1054  * @state: clockgating state (gate or ungate)
1055  *
1056  * Sets the requested clockgating state for all instances of
1057  * the hardware IP specified.
1058  * Returns the error code from the last instance.
1059  */
1060 int amdgpu_device_ip_set_clockgating_state(void *dev,
1061 					   enum amd_ip_block_type block_type,
1062 					   enum amd_clockgating_state state)
1063 {
1064 	struct amdgpu_device *adev = dev;
1065 	int i, r = 0;
1066 
1067 	for (i = 0; i < adev->num_ip_blocks; i++) {
1068 		if (!adev->ip_blocks[i].status.valid)
1069 			continue;
1070 		if (adev->ip_blocks[i].version->type != block_type)
1071 			continue;
1072 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1073 			continue;
1074 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1075 			(void *)adev, state);
1076 		if (r)
1077 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1078 				  adev->ip_blocks[i].version->funcs->name, r);
1079 	}
1080 	return r;
1081 }
1082 
1083 /**
1084  * amdgpu_device_ip_set_powergating_state - set the PG state
1085  *
1086  * @dev: amdgpu_device pointer
1087  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1088  * @state: powergating state (gate or ungate)
1089  *
1090  * Sets the requested powergating state for all instances of
1091  * the hardware IP specified.
1092  * Returns the error code from the last instance.
1093  */
1094 int amdgpu_device_ip_set_powergating_state(void *dev,
1095 					   enum amd_ip_block_type block_type,
1096 					   enum amd_powergating_state state)
1097 {
1098 	struct amdgpu_device *adev = dev;
1099 	int i, r = 0;
1100 
1101 	for (i = 0; i < adev->num_ip_blocks; i++) {
1102 		if (!adev->ip_blocks[i].status.valid)
1103 			continue;
1104 		if (adev->ip_blocks[i].version->type != block_type)
1105 			continue;
1106 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1107 			continue;
1108 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1109 			(void *)adev, state);
1110 		if (r)
1111 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1112 				  adev->ip_blocks[i].version->funcs->name, r);
1113 	}
1114 	return r;
1115 }
1116 
1117 /**
1118  * amdgpu_device_ip_get_clockgating_state - get the CG state
1119  *
1120  * @adev: amdgpu_device pointer
1121  * @flags: clockgating feature flags
1122  *
1123  * Walks the list of IPs on the device and updates the clockgating
1124  * flags for each IP.
1125  * Updates @flags with the feature flags for each hardware IP where
1126  * clockgating is enabled.
1127  */
1128 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1129 					    u32 *flags)
1130 {
1131 	int i;
1132 
1133 	for (i = 0; i < adev->num_ip_blocks; i++) {
1134 		if (!adev->ip_blocks[i].status.valid)
1135 			continue;
1136 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1137 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1138 	}
1139 }
1140 
1141 /**
1142  * amdgpu_device_ip_wait_for_idle - wait for idle
1143  *
1144  * @adev: amdgpu_device pointer
1145  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1146  *
1147  * Waits for the request hardware IP to be idle.
1148  * Returns 0 for success or a negative error code on failure.
1149  */
1150 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1151 				   enum amd_ip_block_type block_type)
1152 {
1153 	int i, r;
1154 
1155 	for (i = 0; i < adev->num_ip_blocks; i++) {
1156 		if (!adev->ip_blocks[i].status.valid)
1157 			continue;
1158 		if (adev->ip_blocks[i].version->type == block_type) {
1159 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1160 			if (r)
1161 				return r;
1162 			break;
1163 		}
1164 	}
1165 	return 0;
1166 
1167 }
1168 
1169 /**
1170  * amdgpu_device_ip_is_idle - is the hardware IP idle
1171  *
1172  * @adev: amdgpu_device pointer
1173  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1174  *
1175  * Check if the hardware IP is idle or not.
1176  * Returns true if it the IP is idle, false if not.
1177  */
1178 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1179 			      enum amd_ip_block_type block_type)
1180 {
1181 	int i;
1182 
1183 	for (i = 0; i < adev->num_ip_blocks; i++) {
1184 		if (!adev->ip_blocks[i].status.valid)
1185 			continue;
1186 		if (adev->ip_blocks[i].version->type == block_type)
1187 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1188 	}
1189 	return true;
1190 
1191 }
1192 
1193 /**
1194  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1195  *
1196  * @adev: amdgpu_device pointer
1197  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1198  *
1199  * Returns a pointer to the hardware IP block structure
1200  * if it exists for the asic, otherwise NULL.
1201  */
1202 struct amdgpu_ip_block *
1203 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1204 			      enum amd_ip_block_type type)
1205 {
1206 	int i;
1207 
1208 	for (i = 0; i < adev->num_ip_blocks; i++)
1209 		if (adev->ip_blocks[i].version->type == type)
1210 			return &adev->ip_blocks[i];
1211 
1212 	return NULL;
1213 }
1214 
1215 /**
1216  * amdgpu_device_ip_block_version_cmp
1217  *
1218  * @adev: amdgpu_device pointer
1219  * @type: enum amd_ip_block_type
1220  * @major: major version
1221  * @minor: minor version
1222  *
1223  * return 0 if equal or greater
1224  * return 1 if smaller or the ip_block doesn't exist
1225  */
1226 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1227 				       enum amd_ip_block_type type,
1228 				       u32 major, u32 minor)
1229 {
1230 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1231 
1232 	if (ip_block && ((ip_block->version->major > major) ||
1233 			((ip_block->version->major == major) &&
1234 			(ip_block->version->minor >= minor))))
1235 		return 0;
1236 
1237 	return 1;
1238 }
1239 
1240 /**
1241  * amdgpu_device_ip_block_add
1242  *
1243  * @adev: amdgpu_device pointer
1244  * @ip_block_version: pointer to the IP to add
1245  *
1246  * Adds the IP block driver information to the collection of IPs
1247  * on the asic.
1248  */
1249 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1250 			       const struct amdgpu_ip_block_version *ip_block_version)
1251 {
1252 	if (!ip_block_version)
1253 		return -EINVAL;
1254 
1255 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1256 		  ip_block_version->funcs->name);
1257 
1258 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1259 
1260 	return 0;
1261 }
1262 
1263 /**
1264  * amdgpu_device_enable_virtual_display - enable virtual display feature
1265  *
1266  * @adev: amdgpu_device pointer
1267  *
1268  * Enabled the virtual display feature if the user has enabled it via
1269  * the module parameter virtual_display.  This feature provides a virtual
1270  * display hardware on headless boards or in virtualized environments.
1271  * This function parses and validates the configuration string specified by
1272  * the user and configues the virtual display configuration (number of
1273  * virtual connectors, crtcs, etc.) specified.
1274  */
1275 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1276 {
1277 	adev->enable_virtual_display = false;
1278 
1279 	if (amdgpu_virtual_display) {
1280 		struct drm_device *ddev = adev->ddev;
1281 		const char *pci_address_name = pci_name(ddev->pdev);
1282 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1283 
1284 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1285 		pciaddstr_tmp = pciaddstr;
1286 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1287 			pciaddname = strsep(&pciaddname_tmp, ",");
1288 			if (!strcmp("all", pciaddname)
1289 			    || !strcmp(pci_address_name, pciaddname)) {
1290 				long num_crtc;
1291 				int res = -1;
1292 
1293 				adev->enable_virtual_display = true;
1294 
1295 				if (pciaddname_tmp)
1296 					res = kstrtol(pciaddname_tmp, 10,
1297 						      &num_crtc);
1298 
1299 				if (!res) {
1300 					if (num_crtc < 1)
1301 						num_crtc = 1;
1302 					if (num_crtc > 6)
1303 						num_crtc = 6;
1304 					adev->mode_info.num_crtc = num_crtc;
1305 				} else {
1306 					adev->mode_info.num_crtc = 1;
1307 				}
1308 				break;
1309 			}
1310 		}
1311 
1312 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1313 			 amdgpu_virtual_display, pci_address_name,
1314 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1315 
1316 		kfree(pciaddstr);
1317 	}
1318 }
1319 
1320 /**
1321  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1322  *
1323  * @adev: amdgpu_device pointer
1324  *
1325  * Parses the asic configuration parameters specified in the gpu info
1326  * firmware and makes them availale to the driver for use in configuring
1327  * the asic.
1328  * Returns 0 on success, -EINVAL on failure.
1329  */
1330 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1331 {
1332 	const char *chip_name;
1333 	char fw_name[30];
1334 	int err;
1335 	const struct gpu_info_firmware_header_v1_0 *hdr;
1336 
1337 	adev->firmware.gpu_info_fw = NULL;
1338 
1339 	switch (adev->asic_type) {
1340 	case CHIP_TOPAZ:
1341 	case CHIP_TONGA:
1342 	case CHIP_FIJI:
1343 	case CHIP_POLARIS10:
1344 	case CHIP_POLARIS11:
1345 	case CHIP_POLARIS12:
1346 	case CHIP_VEGAM:
1347 	case CHIP_CARRIZO:
1348 	case CHIP_STONEY:
1349 #ifdef CONFIG_DRM_AMDGPU_SI
1350 	case CHIP_VERDE:
1351 	case CHIP_TAHITI:
1352 	case CHIP_PITCAIRN:
1353 	case CHIP_OLAND:
1354 	case CHIP_HAINAN:
1355 #endif
1356 #ifdef CONFIG_DRM_AMDGPU_CIK
1357 	case CHIP_BONAIRE:
1358 	case CHIP_HAWAII:
1359 	case CHIP_KAVERI:
1360 	case CHIP_KABINI:
1361 	case CHIP_MULLINS:
1362 #endif
1363 	case CHIP_VEGA20:
1364 	default:
1365 		return 0;
1366 	case CHIP_VEGA10:
1367 		chip_name = "vega10";
1368 		break;
1369 	case CHIP_VEGA12:
1370 		chip_name = "vega12";
1371 		break;
1372 	case CHIP_RAVEN:
1373 		if (adev->rev_id >= 8)
1374 			chip_name = "raven2";
1375 		else if (adev->pdev->device == 0x15d8)
1376 			chip_name = "picasso";
1377 		else
1378 			chip_name = "raven";
1379 		break;
1380 	}
1381 
1382 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1383 	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1384 	if (err) {
1385 		dev_err(adev->dev,
1386 			"Failed to load gpu_info firmware \"%s\"\n",
1387 			fw_name);
1388 		goto out;
1389 	}
1390 	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1391 	if (err) {
1392 		dev_err(adev->dev,
1393 			"Failed to validate gpu_info firmware \"%s\"\n",
1394 			fw_name);
1395 		goto out;
1396 	}
1397 
1398 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1399 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1400 
1401 	switch (hdr->version_major) {
1402 	case 1:
1403 	{
1404 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1405 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1406 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1407 
1408 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1409 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1410 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1411 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1412 		adev->gfx.config.max_texture_channel_caches =
1413 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1414 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1415 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1416 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1417 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1418 		adev->gfx.config.double_offchip_lds_buf =
1419 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1420 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1421 		adev->gfx.cu_info.max_waves_per_simd =
1422 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1423 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1424 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1425 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1426 		break;
1427 	}
1428 	default:
1429 		dev_err(adev->dev,
1430 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1431 		err = -EINVAL;
1432 		goto out;
1433 	}
1434 out:
1435 	return err;
1436 }
1437 
1438 /**
1439  * amdgpu_device_ip_early_init - run early init for hardware IPs
1440  *
1441  * @adev: amdgpu_device pointer
1442  *
1443  * Early initialization pass for hardware IPs.  The hardware IPs that make
1444  * up each asic are discovered each IP's early_init callback is run.  This
1445  * is the first stage in initializing the asic.
1446  * Returns 0 on success, negative error code on failure.
1447  */
1448 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1449 {
1450 	int i, r;
1451 
1452 	amdgpu_device_enable_virtual_display(adev);
1453 
1454 	switch (adev->asic_type) {
1455 	case CHIP_TOPAZ:
1456 	case CHIP_TONGA:
1457 	case CHIP_FIJI:
1458 	case CHIP_POLARIS10:
1459 	case CHIP_POLARIS11:
1460 	case CHIP_POLARIS12:
1461 	case CHIP_VEGAM:
1462 	case CHIP_CARRIZO:
1463 	case CHIP_STONEY:
1464 		if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
1465 			adev->family = AMDGPU_FAMILY_CZ;
1466 		else
1467 			adev->family = AMDGPU_FAMILY_VI;
1468 
1469 		r = vi_set_ip_blocks(adev);
1470 		if (r)
1471 			return r;
1472 		break;
1473 #ifdef CONFIG_DRM_AMDGPU_SI
1474 	case CHIP_VERDE:
1475 	case CHIP_TAHITI:
1476 	case CHIP_PITCAIRN:
1477 	case CHIP_OLAND:
1478 	case CHIP_HAINAN:
1479 		adev->family = AMDGPU_FAMILY_SI;
1480 		r = si_set_ip_blocks(adev);
1481 		if (r)
1482 			return r;
1483 		break;
1484 #endif
1485 #ifdef CONFIG_DRM_AMDGPU_CIK
1486 	case CHIP_BONAIRE:
1487 	case CHIP_HAWAII:
1488 	case CHIP_KAVERI:
1489 	case CHIP_KABINI:
1490 	case CHIP_MULLINS:
1491 		if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1492 			adev->family = AMDGPU_FAMILY_CI;
1493 		else
1494 			adev->family = AMDGPU_FAMILY_KV;
1495 
1496 		r = cik_set_ip_blocks(adev);
1497 		if (r)
1498 			return r;
1499 		break;
1500 #endif
1501 	case CHIP_VEGA10:
1502 	case CHIP_VEGA12:
1503 	case CHIP_VEGA20:
1504 	case CHIP_RAVEN:
1505 		if (adev->asic_type == CHIP_RAVEN)
1506 			adev->family = AMDGPU_FAMILY_RV;
1507 		else
1508 			adev->family = AMDGPU_FAMILY_AI;
1509 
1510 		r = soc15_set_ip_blocks(adev);
1511 		if (r)
1512 			return r;
1513 		break;
1514 	default:
1515 		/* FIXME: not supported yet */
1516 		return -EINVAL;
1517 	}
1518 
1519 	r = amdgpu_device_parse_gpu_info_fw(adev);
1520 	if (r)
1521 		return r;
1522 
1523 	amdgpu_amdkfd_device_probe(adev);
1524 
1525 	if (amdgpu_sriov_vf(adev)) {
1526 		r = amdgpu_virt_request_full_gpu(adev, true);
1527 		if (r)
1528 			return -EAGAIN;
1529 
1530 		/* query the reg access mode at the very beginning */
1531 		amdgpu_virt_init_reg_access_mode(adev);
1532 	}
1533 
1534 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
1535 	if (amdgpu_sriov_vf(adev))
1536 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1537 
1538 	for (i = 0; i < adev->num_ip_blocks; i++) {
1539 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1540 			DRM_ERROR("disabled ip block: %d <%s>\n",
1541 				  i, adev->ip_blocks[i].version->funcs->name);
1542 			adev->ip_blocks[i].status.valid = false;
1543 		} else {
1544 			if (adev->ip_blocks[i].version->funcs->early_init) {
1545 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
1546 				if (r == -ENOENT) {
1547 					adev->ip_blocks[i].status.valid = false;
1548 				} else if (r) {
1549 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
1550 						  adev->ip_blocks[i].version->funcs->name, r);
1551 					return r;
1552 				} else {
1553 					adev->ip_blocks[i].status.valid = true;
1554 				}
1555 			} else {
1556 				adev->ip_blocks[i].status.valid = true;
1557 			}
1558 		}
1559 		/* get the vbios after the asic_funcs are set up */
1560 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1561 			/* Read BIOS */
1562 			if (!amdgpu_get_bios(adev))
1563 				return -EINVAL;
1564 
1565 			r = amdgpu_atombios_init(adev);
1566 			if (r) {
1567 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1568 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1569 				return r;
1570 			}
1571 		}
1572 	}
1573 
1574 	adev->cg_flags &= amdgpu_cg_mask;
1575 	adev->pg_flags &= amdgpu_pg_mask;
1576 
1577 	return 0;
1578 }
1579 
1580 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1581 {
1582 	int i, r;
1583 
1584 	for (i = 0; i < adev->num_ip_blocks; i++) {
1585 		if (!adev->ip_blocks[i].status.sw)
1586 			continue;
1587 		if (adev->ip_blocks[i].status.hw)
1588 			continue;
1589 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1590 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
1591 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1592 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1593 			if (r) {
1594 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1595 					  adev->ip_blocks[i].version->funcs->name, r);
1596 				return r;
1597 			}
1598 			adev->ip_blocks[i].status.hw = true;
1599 		}
1600 	}
1601 
1602 	return 0;
1603 }
1604 
1605 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1606 {
1607 	int i, r;
1608 
1609 	for (i = 0; i < adev->num_ip_blocks; i++) {
1610 		if (!adev->ip_blocks[i].status.sw)
1611 			continue;
1612 		if (adev->ip_blocks[i].status.hw)
1613 			continue;
1614 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1615 		if (r) {
1616 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1617 				  adev->ip_blocks[i].version->funcs->name, r);
1618 			return r;
1619 		}
1620 		adev->ip_blocks[i].status.hw = true;
1621 	}
1622 
1623 	return 0;
1624 }
1625 
1626 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1627 {
1628 	int r = 0;
1629 	int i;
1630 	uint32_t smu_version;
1631 
1632 	if (adev->asic_type >= CHIP_VEGA10) {
1633 		for (i = 0; i < adev->num_ip_blocks; i++) {
1634 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
1635 				if (adev->in_gpu_reset || adev->in_suspend) {
1636 					if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
1637 						break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
1638 					r = adev->ip_blocks[i].version->funcs->resume(adev);
1639 					if (r) {
1640 						DRM_ERROR("resume of IP block <%s> failed %d\n",
1641 							  adev->ip_blocks[i].version->funcs->name, r);
1642 						return r;
1643 					}
1644 				} else {
1645 					r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1646 					if (r) {
1647 						DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1648 						  adev->ip_blocks[i].version->funcs->name, r);
1649 						return r;
1650 					}
1651 				}
1652 				adev->ip_blocks[i].status.hw = true;
1653 			}
1654 		}
1655 	}
1656 	r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
1657 
1658 	return r;
1659 }
1660 
1661 /**
1662  * amdgpu_device_ip_init - run init for hardware IPs
1663  *
1664  * @adev: amdgpu_device pointer
1665  *
1666  * Main initialization pass for hardware IPs.  The list of all the hardware
1667  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1668  * are run.  sw_init initializes the software state associated with each IP
1669  * and hw_init initializes the hardware associated with each IP.
1670  * Returns 0 on success, negative error code on failure.
1671  */
1672 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1673 {
1674 	int i, r;
1675 
1676 	r = amdgpu_ras_init(adev);
1677 	if (r)
1678 		return r;
1679 
1680 	for (i = 0; i < adev->num_ip_blocks; i++) {
1681 		if (!adev->ip_blocks[i].status.valid)
1682 			continue;
1683 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
1684 		if (r) {
1685 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1686 				  adev->ip_blocks[i].version->funcs->name, r);
1687 			goto init_failed;
1688 		}
1689 		adev->ip_blocks[i].status.sw = true;
1690 
1691 		/* need to do gmc hw init early so we can allocate gpu mem */
1692 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
1693 			r = amdgpu_device_vram_scratch_init(adev);
1694 			if (r) {
1695 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
1696 				goto init_failed;
1697 			}
1698 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
1699 			if (r) {
1700 				DRM_ERROR("hw_init %d failed %d\n", i, r);
1701 				goto init_failed;
1702 			}
1703 			r = amdgpu_device_wb_init(adev);
1704 			if (r) {
1705 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
1706 				goto init_failed;
1707 			}
1708 			adev->ip_blocks[i].status.hw = true;
1709 
1710 			/* right after GMC hw init, we create CSA */
1711 			if (amdgpu_sriov_vf(adev)) {
1712 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1713 								AMDGPU_GEM_DOMAIN_VRAM,
1714 								AMDGPU_CSA_SIZE);
1715 				if (r) {
1716 					DRM_ERROR("allocate CSA failed %d\n", r);
1717 					goto init_failed;
1718 				}
1719 			}
1720 		}
1721 	}
1722 
1723 	r = amdgpu_ib_pool_init(adev);
1724 	if (r) {
1725 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1726 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1727 		goto init_failed;
1728 	}
1729 
1730 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1731 	if (r)
1732 		goto init_failed;
1733 
1734 	r = amdgpu_device_ip_hw_init_phase1(adev);
1735 	if (r)
1736 		goto init_failed;
1737 
1738 	r = amdgpu_device_fw_loading(adev);
1739 	if (r)
1740 		goto init_failed;
1741 
1742 	r = amdgpu_device_ip_hw_init_phase2(adev);
1743 	if (r)
1744 		goto init_failed;
1745 
1746 	if (adev->gmc.xgmi.num_physical_nodes > 1)
1747 		amdgpu_xgmi_add_device(adev);
1748 	amdgpu_amdkfd_device_init(adev);
1749 
1750 init_failed:
1751 	if (amdgpu_sriov_vf(adev)) {
1752 		if (!r)
1753 			amdgpu_virt_init_data_exchange(adev);
1754 		amdgpu_virt_release_full_gpu(adev, true);
1755 	}
1756 
1757 	return r;
1758 }
1759 
1760 /**
1761  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1762  *
1763  * @adev: amdgpu_device pointer
1764  *
1765  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
1766  * this function before a GPU reset.  If the value is retained after a
1767  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
1768  */
1769 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
1770 {
1771 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1772 }
1773 
1774 /**
1775  * amdgpu_device_check_vram_lost - check if vram is valid
1776  *
1777  * @adev: amdgpu_device pointer
1778  *
1779  * Checks the reset magic value written to the gart pointer in VRAM.
1780  * The driver calls this after a GPU reset to see if the contents of
1781  * VRAM is lost or now.
1782  * returns true if vram is lost, false if not.
1783  */
1784 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
1785 {
1786 	return !!memcmp(adev->gart.ptr, adev->reset_magic,
1787 			AMDGPU_RESET_MAGIC_NUM);
1788 }
1789 
1790 /**
1791  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
1792  *
1793  * @adev: amdgpu_device pointer
1794  *
1795  * The list of all the hardware IPs that make up the asic is walked and the
1796  * set_clockgating_state callbacks are run.
1797  * Late initialization pass enabling clockgating for hardware IPs.
1798  * Fini or suspend, pass disabling clockgating for hardware IPs.
1799  * Returns 0 on success, negative error code on failure.
1800  */
1801 
1802 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1803 						enum amd_clockgating_state state)
1804 {
1805 	int i, j, r;
1806 
1807 	if (amdgpu_emu_mode == 1)
1808 		return 0;
1809 
1810 	for (j = 0; j < adev->num_ip_blocks; j++) {
1811 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
1812 		if (!adev->ip_blocks[i].status.late_initialized)
1813 			continue;
1814 		/* skip CG for VCE/UVD, it's handled specially */
1815 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1816 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1817 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1818 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1819 			/* enable clockgating to save power */
1820 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1821 										     state);
1822 			if (r) {
1823 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
1824 					  adev->ip_blocks[i].version->funcs->name, r);
1825 				return r;
1826 			}
1827 		}
1828 	}
1829 
1830 	return 0;
1831 }
1832 
1833 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
1834 {
1835 	int i, j, r;
1836 
1837 	if (amdgpu_emu_mode == 1)
1838 		return 0;
1839 
1840 	for (j = 0; j < adev->num_ip_blocks; j++) {
1841 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
1842 		if (!adev->ip_blocks[i].status.late_initialized)
1843 			continue;
1844 		/* skip CG for VCE/UVD, it's handled specially */
1845 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1846 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1847 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1848 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
1849 			/* enable powergating to save power */
1850 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1851 											state);
1852 			if (r) {
1853 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1854 					  adev->ip_blocks[i].version->funcs->name, r);
1855 				return r;
1856 			}
1857 		}
1858 	}
1859 	return 0;
1860 }
1861 
1862 static int amdgpu_device_enable_mgpu_fan_boost(void)
1863 {
1864 	struct amdgpu_gpu_instance *gpu_ins;
1865 	struct amdgpu_device *adev;
1866 	int i, ret = 0;
1867 
1868 	mutex_lock(&mgpu_info.mutex);
1869 
1870 	/*
1871 	 * MGPU fan boost feature should be enabled
1872 	 * only when there are two or more dGPUs in
1873 	 * the system
1874 	 */
1875 	if (mgpu_info.num_dgpu < 2)
1876 		goto out;
1877 
1878 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
1879 		gpu_ins = &(mgpu_info.gpu_ins[i]);
1880 		adev = gpu_ins->adev;
1881 		if (!(adev->flags & AMD_IS_APU) &&
1882 		    !gpu_ins->mgpu_fan_enabled &&
1883 		    adev->powerplay.pp_funcs &&
1884 		    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1885 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1886 			if (ret)
1887 				break;
1888 
1889 			gpu_ins->mgpu_fan_enabled = 1;
1890 		}
1891 	}
1892 
1893 out:
1894 	mutex_unlock(&mgpu_info.mutex);
1895 
1896 	return ret;
1897 }
1898 
1899 /**
1900  * amdgpu_device_ip_late_init - run late init for hardware IPs
1901  *
1902  * @adev: amdgpu_device pointer
1903  *
1904  * Late initialization pass for hardware IPs.  The list of all the hardware
1905  * IPs that make up the asic is walked and the late_init callbacks are run.
1906  * late_init covers any special initialization that an IP requires
1907  * after all of the have been initialized or something that needs to happen
1908  * late in the init process.
1909  * Returns 0 on success, negative error code on failure.
1910  */
1911 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
1912 {
1913 	int i = 0, r;
1914 
1915 	for (i = 0; i < adev->num_ip_blocks; i++) {
1916 		if (!adev->ip_blocks[i].status.hw)
1917 			continue;
1918 		if (adev->ip_blocks[i].version->funcs->late_init) {
1919 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1920 			if (r) {
1921 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
1922 					  adev->ip_blocks[i].version->funcs->name, r);
1923 				return r;
1924 			}
1925 		}
1926 		adev->ip_blocks[i].status.late_initialized = true;
1927 	}
1928 
1929 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
1930 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
1931 
1932 	amdgpu_device_fill_reset_magic(adev);
1933 
1934 	r = amdgpu_device_enable_mgpu_fan_boost();
1935 	if (r)
1936 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
1937 
1938 	/* set to low pstate by default */
1939 	amdgpu_xgmi_set_pstate(adev, 0);
1940 
1941 	return 0;
1942 }
1943 
1944 /**
1945  * amdgpu_device_ip_fini - run fini for hardware IPs
1946  *
1947  * @adev: amdgpu_device pointer
1948  *
1949  * Main teardown pass for hardware IPs.  The list of all the hardware
1950  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1951  * are run.  hw_fini tears down the hardware associated with each IP
1952  * and sw_fini tears down any software state associated with each IP.
1953  * Returns 0 on success, negative error code on failure.
1954  */
1955 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
1956 {
1957 	int i, r;
1958 
1959 	amdgpu_ras_pre_fini(adev);
1960 
1961 	if (adev->gmc.xgmi.num_physical_nodes > 1)
1962 		amdgpu_xgmi_remove_device(adev);
1963 
1964 	amdgpu_amdkfd_device_fini(adev);
1965 
1966 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
1967 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
1968 
1969 	/* need to disable SMC first */
1970 	for (i = 0; i < adev->num_ip_blocks; i++) {
1971 		if (!adev->ip_blocks[i].status.hw)
1972 			continue;
1973 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
1974 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
1975 			/* XXX handle errors */
1976 			if (r) {
1977 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
1978 					  adev->ip_blocks[i].version->funcs->name, r);
1979 			}
1980 			adev->ip_blocks[i].status.hw = false;
1981 			break;
1982 		}
1983 	}
1984 
1985 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
1986 		if (!adev->ip_blocks[i].status.hw)
1987 			continue;
1988 
1989 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
1990 		/* XXX handle errors */
1991 		if (r) {
1992 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
1993 				  adev->ip_blocks[i].version->funcs->name, r);
1994 		}
1995 
1996 		adev->ip_blocks[i].status.hw = false;
1997 	}
1998 
1999 
2000 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2001 		if (!adev->ip_blocks[i].status.sw)
2002 			continue;
2003 
2004 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2005 			amdgpu_ucode_free_bo(adev);
2006 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2007 			amdgpu_device_wb_fini(adev);
2008 			amdgpu_device_vram_scratch_fini(adev);
2009 			amdgpu_ib_pool_fini(adev);
2010 		}
2011 
2012 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2013 		/* XXX handle errors */
2014 		if (r) {
2015 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2016 				  adev->ip_blocks[i].version->funcs->name, r);
2017 		}
2018 		adev->ip_blocks[i].status.sw = false;
2019 		adev->ip_blocks[i].status.valid = false;
2020 	}
2021 
2022 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2023 		if (!adev->ip_blocks[i].status.late_initialized)
2024 			continue;
2025 		if (adev->ip_blocks[i].version->funcs->late_fini)
2026 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2027 		adev->ip_blocks[i].status.late_initialized = false;
2028 	}
2029 
2030 	amdgpu_ras_fini(adev);
2031 
2032 	if (amdgpu_sriov_vf(adev))
2033 		if (amdgpu_virt_release_full_gpu(adev, false))
2034 			DRM_ERROR("failed to release exclusive mode on fini\n");
2035 
2036 	return 0;
2037 }
2038 
2039 /**
2040  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2041  *
2042  * @work: work_struct.
2043  */
2044 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2045 {
2046 	struct amdgpu_device *adev =
2047 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2048 	int r;
2049 
2050 	r = amdgpu_ib_ring_tests(adev);
2051 	if (r)
2052 		DRM_ERROR("ib ring test failed (%d).\n", r);
2053 }
2054 
2055 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2056 {
2057 	struct amdgpu_device *adev =
2058 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2059 
2060 	mutex_lock(&adev->gfx.gfx_off_mutex);
2061 	if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2062 		if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2063 			adev->gfx.gfx_off_state = true;
2064 	}
2065 	mutex_unlock(&adev->gfx.gfx_off_mutex);
2066 }
2067 
2068 /**
2069  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2070  *
2071  * @adev: amdgpu_device pointer
2072  *
2073  * Main suspend function for hardware IPs.  The list of all the hardware
2074  * IPs that make up the asic is walked, clockgating is disabled and the
2075  * suspend callbacks are run.  suspend puts the hardware and software state
2076  * in each IP into a state suitable for suspend.
2077  * Returns 0 on success, negative error code on failure.
2078  */
2079 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2080 {
2081 	int i, r;
2082 
2083 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2084 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2085 
2086 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2087 		if (!adev->ip_blocks[i].status.valid)
2088 			continue;
2089 		/* displays are handled separately */
2090 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
2091 			/* XXX handle errors */
2092 			r = adev->ip_blocks[i].version->funcs->suspend(adev);
2093 			/* XXX handle errors */
2094 			if (r) {
2095 				DRM_ERROR("suspend of IP block <%s> failed %d\n",
2096 					  adev->ip_blocks[i].version->funcs->name, r);
2097 			}
2098 		}
2099 	}
2100 
2101 	return 0;
2102 }
2103 
2104 /**
2105  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2106  *
2107  * @adev: amdgpu_device pointer
2108  *
2109  * Main suspend function for hardware IPs.  The list of all the hardware
2110  * IPs that make up the asic is walked, clockgating is disabled and the
2111  * suspend callbacks are run.  suspend puts the hardware and software state
2112  * in each IP into a state suitable for suspend.
2113  * Returns 0 on success, negative error code on failure.
2114  */
2115 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2116 {
2117 	int i, r;
2118 
2119 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2120 		if (!adev->ip_blocks[i].status.valid)
2121 			continue;
2122 		/* displays are handled in phase1 */
2123 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2124 			continue;
2125 		/* XXX handle errors */
2126 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2127 		/* XXX handle errors */
2128 		if (r) {
2129 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2130 				  adev->ip_blocks[i].version->funcs->name, r);
2131 		}
2132 	}
2133 
2134 	return 0;
2135 }
2136 
2137 /**
2138  * amdgpu_device_ip_suspend - run suspend for hardware IPs
2139  *
2140  * @adev: amdgpu_device pointer
2141  *
2142  * Main suspend function for hardware IPs.  The list of all the hardware
2143  * IPs that make up the asic is walked, clockgating is disabled and the
2144  * suspend callbacks are run.  suspend puts the hardware and software state
2145  * in each IP into a state suitable for suspend.
2146  * Returns 0 on success, negative error code on failure.
2147  */
2148 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2149 {
2150 	int r;
2151 
2152 	if (amdgpu_sriov_vf(adev))
2153 		amdgpu_virt_request_full_gpu(adev, false);
2154 
2155 	r = amdgpu_device_ip_suspend_phase1(adev);
2156 	if (r)
2157 		return r;
2158 	r = amdgpu_device_ip_suspend_phase2(adev);
2159 
2160 	if (amdgpu_sriov_vf(adev))
2161 		amdgpu_virt_release_full_gpu(adev, false);
2162 
2163 	return r;
2164 }
2165 
2166 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2167 {
2168 	int i, r;
2169 
2170 	static enum amd_ip_block_type ip_order[] = {
2171 		AMD_IP_BLOCK_TYPE_GMC,
2172 		AMD_IP_BLOCK_TYPE_COMMON,
2173 		AMD_IP_BLOCK_TYPE_PSP,
2174 		AMD_IP_BLOCK_TYPE_IH,
2175 	};
2176 
2177 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2178 		int j;
2179 		struct amdgpu_ip_block *block;
2180 
2181 		for (j = 0; j < adev->num_ip_blocks; j++) {
2182 			block = &adev->ip_blocks[j];
2183 
2184 			if (block->version->type != ip_order[i] ||
2185 				!block->status.valid)
2186 				continue;
2187 
2188 			r = block->version->funcs->hw_init(adev);
2189 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2190 			if (r)
2191 				return r;
2192 		}
2193 	}
2194 
2195 	return 0;
2196 }
2197 
2198 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2199 {
2200 	int i, r;
2201 
2202 	static enum amd_ip_block_type ip_order[] = {
2203 		AMD_IP_BLOCK_TYPE_SMC,
2204 		AMD_IP_BLOCK_TYPE_DCE,
2205 		AMD_IP_BLOCK_TYPE_GFX,
2206 		AMD_IP_BLOCK_TYPE_SDMA,
2207 		AMD_IP_BLOCK_TYPE_UVD,
2208 		AMD_IP_BLOCK_TYPE_VCE
2209 	};
2210 
2211 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2212 		int j;
2213 		struct amdgpu_ip_block *block;
2214 
2215 		for (j = 0; j < adev->num_ip_blocks; j++) {
2216 			block = &adev->ip_blocks[j];
2217 
2218 			if (block->version->type != ip_order[i] ||
2219 				!block->status.valid)
2220 				continue;
2221 
2222 			r = block->version->funcs->hw_init(adev);
2223 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2224 			if (r)
2225 				return r;
2226 		}
2227 	}
2228 
2229 	return 0;
2230 }
2231 
2232 /**
2233  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2234  *
2235  * @adev: amdgpu_device pointer
2236  *
2237  * First resume function for hardware IPs.  The list of all the hardware
2238  * IPs that make up the asic is walked and the resume callbacks are run for
2239  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
2240  * after a suspend and updates the software state as necessary.  This
2241  * function is also used for restoring the GPU after a GPU reset.
2242  * Returns 0 on success, negative error code on failure.
2243  */
2244 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
2245 {
2246 	int i, r;
2247 
2248 	for (i = 0; i < adev->num_ip_blocks; i++) {
2249 		if (!adev->ip_blocks[i].status.valid)
2250 			continue;
2251 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2252 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2253 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2254 			r = adev->ip_blocks[i].version->funcs->resume(adev);
2255 			if (r) {
2256 				DRM_ERROR("resume of IP block <%s> failed %d\n",
2257 					  adev->ip_blocks[i].version->funcs->name, r);
2258 				return r;
2259 			}
2260 		}
2261 	}
2262 
2263 	return 0;
2264 }
2265 
2266 /**
2267  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2268  *
2269  * @adev: amdgpu_device pointer
2270  *
2271  * First resume function for hardware IPs.  The list of all the hardware
2272  * IPs that make up the asic is walked and the resume callbacks are run for
2273  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
2274  * functional state after a suspend and updates the software state as
2275  * necessary.  This function is also used for restoring the GPU after a GPU
2276  * reset.
2277  * Returns 0 on success, negative error code on failure.
2278  */
2279 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
2280 {
2281 	int i, r;
2282 
2283 	for (i = 0; i < adev->num_ip_blocks; i++) {
2284 		if (!adev->ip_blocks[i].status.valid)
2285 			continue;
2286 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2287 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2288 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2289 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
2290 			continue;
2291 		r = adev->ip_blocks[i].version->funcs->resume(adev);
2292 		if (r) {
2293 			DRM_ERROR("resume of IP block <%s> failed %d\n",
2294 				  adev->ip_blocks[i].version->funcs->name, r);
2295 			return r;
2296 		}
2297 	}
2298 
2299 	return 0;
2300 }
2301 
2302 /**
2303  * amdgpu_device_ip_resume - run resume for hardware IPs
2304  *
2305  * @adev: amdgpu_device pointer
2306  *
2307  * Main resume function for hardware IPs.  The hardware IPs
2308  * are split into two resume functions because they are
2309  * are also used in in recovering from a GPU reset and some additional
2310  * steps need to be take between them.  In this case (S3/S4) they are
2311  * run sequentially.
2312  * Returns 0 on success, negative error code on failure.
2313  */
2314 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
2315 {
2316 	int r;
2317 
2318 	r = amdgpu_device_ip_resume_phase1(adev);
2319 	if (r)
2320 		return r;
2321 
2322 	r = amdgpu_device_fw_loading(adev);
2323 	if (r)
2324 		return r;
2325 
2326 	r = amdgpu_device_ip_resume_phase2(adev);
2327 
2328 	return r;
2329 }
2330 
2331 /**
2332  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2333  *
2334  * @adev: amdgpu_device pointer
2335  *
2336  * Query the VBIOS data tables to determine if the board supports SR-IOV.
2337  */
2338 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
2339 {
2340 	if (amdgpu_sriov_vf(adev)) {
2341 		if (adev->is_atom_fw) {
2342 			if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2343 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2344 		} else {
2345 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2346 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2347 		}
2348 
2349 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2350 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2351 	}
2352 }
2353 
2354 /**
2355  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2356  *
2357  * @asic_type: AMD asic type
2358  *
2359  * Check if there is DC (new modesetting infrastructre) support for an asic.
2360  * returns true if DC has support, false if not.
2361  */
2362 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2363 {
2364 	switch (asic_type) {
2365 #if defined(CONFIG_DRM_AMD_DC)
2366 	case CHIP_BONAIRE:
2367 	case CHIP_KAVERI:
2368 	case CHIP_KABINI:
2369 	case CHIP_MULLINS:
2370 		/*
2371 		 * We have systems in the wild with these ASICs that require
2372 		 * LVDS and VGA support which is not supported with DC.
2373 		 *
2374 		 * Fallback to the non-DC driver here by default so as not to
2375 		 * cause regressions.
2376 		 */
2377 		return amdgpu_dc > 0;
2378 	case CHIP_HAWAII:
2379 	case CHIP_CARRIZO:
2380 	case CHIP_STONEY:
2381 	case CHIP_POLARIS10:
2382 	case CHIP_POLARIS11:
2383 	case CHIP_POLARIS12:
2384 	case CHIP_VEGAM:
2385 	case CHIP_TONGA:
2386 	case CHIP_FIJI:
2387 	case CHIP_VEGA10:
2388 	case CHIP_VEGA12:
2389 	case CHIP_VEGA20:
2390 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
2391 	case CHIP_RAVEN:
2392 #endif
2393 		return amdgpu_dc != 0;
2394 #endif
2395 	default:
2396 		return false;
2397 	}
2398 }
2399 
2400 /**
2401  * amdgpu_device_has_dc_support - check if dc is supported
2402  *
2403  * @adev: amdgpu_device_pointer
2404  *
2405  * Returns true for supported, false for not supported
2406  */
2407 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2408 {
2409 	if (amdgpu_sriov_vf(adev))
2410 		return false;
2411 
2412 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
2413 }
2414 
2415 
2416 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2417 {
2418 	struct amdgpu_device *adev =
2419 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
2420 
2421 	adev->asic_reset_res =  amdgpu_asic_reset(adev);
2422 	if (adev->asic_reset_res)
2423 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
2424 			 adev->asic_reset_res, adev->ddev->unique);
2425 }
2426 
2427 
2428 /**
2429  * amdgpu_device_init - initialize the driver
2430  *
2431  * @adev: amdgpu_device pointer
2432  * @ddev: drm dev pointer
2433  * @pdev: pci dev pointer
2434  * @flags: driver flags
2435  *
2436  * Initializes the driver info and hw (all asics).
2437  * Returns 0 for success or an error on failure.
2438  * Called at driver startup.
2439  */
2440 int amdgpu_device_init(struct amdgpu_device *adev,
2441 		       struct drm_device *ddev,
2442 		       struct pci_dev *pdev,
2443 		       uint32_t flags)
2444 {
2445 	int r, i;
2446 	bool runtime = false;
2447 	u32 max_MBps;
2448 
2449 	adev->shutdown = false;
2450 	adev->dev = &pdev->dev;
2451 	adev->ddev = ddev;
2452 	adev->pdev = pdev;
2453 	adev->flags = flags;
2454 	adev->asic_type = flags & AMD_ASIC_MASK;
2455 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
2456 	if (amdgpu_emu_mode == 1)
2457 		adev->usec_timeout *= 2;
2458 	adev->gmc.gart_size = 512 * 1024 * 1024;
2459 	adev->accel_working = false;
2460 	adev->num_rings = 0;
2461 	adev->mman.buffer_funcs = NULL;
2462 	adev->mman.buffer_funcs_ring = NULL;
2463 	adev->vm_manager.vm_pte_funcs = NULL;
2464 	adev->vm_manager.vm_pte_num_rqs = 0;
2465 	adev->gmc.gmc_funcs = NULL;
2466 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2467 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2468 
2469 	adev->smc_rreg = &amdgpu_invalid_rreg;
2470 	adev->smc_wreg = &amdgpu_invalid_wreg;
2471 	adev->pcie_rreg = &amdgpu_invalid_rreg;
2472 	adev->pcie_wreg = &amdgpu_invalid_wreg;
2473 	adev->pciep_rreg = &amdgpu_invalid_rreg;
2474 	adev->pciep_wreg = &amdgpu_invalid_wreg;
2475 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2476 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2477 	adev->didt_rreg = &amdgpu_invalid_rreg;
2478 	adev->didt_wreg = &amdgpu_invalid_wreg;
2479 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2480 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
2481 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2482 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2483 
2484 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2485 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2486 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
2487 
2488 	/* mutex initialization are all done here so we
2489 	 * can recall function without having locking issues */
2490 	atomic_set(&adev->irq.ih.lock, 0);
2491 	mutex_init(&adev->firmware.mutex);
2492 	mutex_init(&adev->pm.mutex);
2493 	mutex_init(&adev->gfx.gpu_clock_mutex);
2494 	mutex_init(&adev->srbm_mutex);
2495 	mutex_init(&adev->gfx.pipe_reserve_mutex);
2496 	mutex_init(&adev->gfx.gfx_off_mutex);
2497 	mutex_init(&adev->grbm_idx_mutex);
2498 	mutex_init(&adev->mn_lock);
2499 	mutex_init(&adev->virt.vf_errors.lock);
2500 	hash_init(adev->mn_hash);
2501 	mutex_init(&adev->lock_reset);
2502 	mutex_init(&adev->virt.dpm_mutex);
2503 
2504 	r = amdgpu_device_check_arguments(adev);
2505 	if (r)
2506 		return r;
2507 
2508 	spin_lock_init(&adev->mmio_idx_lock);
2509 	spin_lock_init(&adev->smc_idx_lock);
2510 	spin_lock_init(&adev->pcie_idx_lock);
2511 	spin_lock_init(&adev->uvd_ctx_idx_lock);
2512 	spin_lock_init(&adev->didt_idx_lock);
2513 	spin_lock_init(&adev->gc_cac_idx_lock);
2514 	spin_lock_init(&adev->se_cac_idx_lock);
2515 	spin_lock_init(&adev->audio_endpt_idx_lock);
2516 	spin_lock_init(&adev->mm_stats.lock);
2517 
2518 	INIT_LIST_HEAD(&adev->shadow_list);
2519 	mutex_init(&adev->shadow_list_lock);
2520 
2521 	INIT_LIST_HEAD(&adev->ring_lru_list);
2522 	spin_lock_init(&adev->ring_lru_list_lock);
2523 
2524 	INIT_DELAYED_WORK(&adev->delayed_init_work,
2525 			  amdgpu_device_delayed_init_work_handler);
2526 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2527 			  amdgpu_device_delay_enable_gfx_off);
2528 
2529 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2530 
2531 	adev->gfx.gfx_off_req_count = 1;
2532 	adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2533 
2534 	/* Registers mapping */
2535 	/* TODO: block userspace mapping of io register */
2536 	if (adev->asic_type >= CHIP_BONAIRE) {
2537 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2538 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2539 	} else {
2540 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2541 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2542 	}
2543 
2544 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2545 	if (adev->rmmio == NULL) {
2546 		return -ENOMEM;
2547 	}
2548 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2549 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2550 
2551 	/* io port mapping */
2552 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2553 		if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2554 			adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2555 			adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2556 			break;
2557 		}
2558 	}
2559 	if (adev->rio_mem == NULL)
2560 		DRM_INFO("PCI I/O BAR is not found.\n");
2561 
2562 	amdgpu_device_get_pcie_info(adev);
2563 
2564 	/* early init functions */
2565 	r = amdgpu_device_ip_early_init(adev);
2566 	if (r)
2567 		return r;
2568 
2569 	/* doorbell bar mapping and doorbell index init*/
2570 	amdgpu_device_doorbell_init(adev);
2571 
2572 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2573 	/* this will fail for cards that aren't VGA class devices, just
2574 	 * ignore it */
2575 	vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
2576 
2577 	if (amdgpu_device_is_px(ddev))
2578 		runtime = true;
2579 	if (!pci_is_thunderbolt_attached(adev->pdev))
2580 		vga_switcheroo_register_client(adev->pdev,
2581 					       &amdgpu_switcheroo_ops, runtime);
2582 	if (runtime)
2583 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2584 
2585 	if (amdgpu_emu_mode == 1) {
2586 		/* post the asic on emulation mode */
2587 		emu_soc_asic_init(adev);
2588 		goto fence_driver_init;
2589 	}
2590 
2591 	/* detect if we are with an SRIOV vbios */
2592 	amdgpu_device_detect_sriov_bios(adev);
2593 
2594 	/* check if we need to reset the asic
2595 	 *  E.g., driver was not cleanly unloaded previously, etc.
2596 	 */
2597 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
2598 		r = amdgpu_asic_reset(adev);
2599 		if (r) {
2600 			dev_err(adev->dev, "asic reset on init failed\n");
2601 			goto failed;
2602 		}
2603 	}
2604 
2605 	/* Post card if necessary */
2606 	if (amdgpu_device_need_post(adev)) {
2607 		if (!adev->bios) {
2608 			dev_err(adev->dev, "no vBIOS found\n");
2609 			r = -EINVAL;
2610 			goto failed;
2611 		}
2612 		DRM_INFO("GPU posting now...\n");
2613 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2614 		if (r) {
2615 			dev_err(adev->dev, "gpu post error!\n");
2616 			goto failed;
2617 		}
2618 	}
2619 
2620 	if (adev->is_atom_fw) {
2621 		/* Initialize clocks */
2622 		r = amdgpu_atomfirmware_get_clock_info(adev);
2623 		if (r) {
2624 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
2625 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2626 			goto failed;
2627 		}
2628 	} else {
2629 		/* Initialize clocks */
2630 		r = amdgpu_atombios_get_clock_info(adev);
2631 		if (r) {
2632 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
2633 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2634 			goto failed;
2635 		}
2636 		/* init i2c buses */
2637 		if (!amdgpu_device_has_dc_support(adev))
2638 			amdgpu_atombios_i2c_init(adev);
2639 	}
2640 
2641 fence_driver_init:
2642 	/* Fence driver */
2643 	r = amdgpu_fence_driver_init(adev);
2644 	if (r) {
2645 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
2646 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
2647 		goto failed;
2648 	}
2649 
2650 	/* init the mode config */
2651 	drm_mode_config_init(adev->ddev);
2652 
2653 	r = amdgpu_device_ip_init(adev);
2654 	if (r) {
2655 		/* failed in exclusive mode due to timeout */
2656 		if (amdgpu_sriov_vf(adev) &&
2657 		    !amdgpu_sriov_runtime(adev) &&
2658 		    amdgpu_virt_mmio_blocked(adev) &&
2659 		    !amdgpu_virt_wait_reset(adev)) {
2660 			dev_err(adev->dev, "VF exclusive mode timeout\n");
2661 			/* Don't send request since VF is inactive. */
2662 			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2663 			adev->virt.ops = NULL;
2664 			r = -EAGAIN;
2665 			goto failed;
2666 		}
2667 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
2668 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
2669 		if (amdgpu_virt_request_full_gpu(adev, false))
2670 			amdgpu_virt_release_full_gpu(adev, false);
2671 		goto failed;
2672 	}
2673 
2674 	adev->accel_working = true;
2675 
2676 	amdgpu_vm_check_compute_bug(adev);
2677 
2678 	/* Initialize the buffer migration limit. */
2679 	if (amdgpu_moverate >= 0)
2680 		max_MBps = amdgpu_moverate;
2681 	else
2682 		max_MBps = 8; /* Allow 8 MB/s. */
2683 	/* Get a log2 for easy divisions. */
2684 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2685 
2686 	amdgpu_fbdev_init(adev);
2687 
2688 	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2689 		amdgpu_pm_virt_sysfs_init(adev);
2690 
2691 	r = amdgpu_pm_sysfs_init(adev);
2692 	if (r)
2693 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2694 
2695 	r = amdgpu_ucode_sysfs_init(adev);
2696 	if (r)
2697 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2698 
2699 	r = amdgpu_debugfs_gem_init(adev);
2700 	if (r)
2701 		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
2702 
2703 	r = amdgpu_debugfs_regs_init(adev);
2704 	if (r)
2705 		DRM_ERROR("registering register debugfs failed (%d).\n", r);
2706 
2707 	r = amdgpu_debugfs_firmware_init(adev);
2708 	if (r)
2709 		DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
2710 
2711 	r = amdgpu_debugfs_init(adev);
2712 	if (r)
2713 		DRM_ERROR("Creating debugfs files failed (%d).\n", r);
2714 
2715 	if ((amdgpu_testing & 1)) {
2716 		if (adev->accel_working)
2717 			amdgpu_test_moves(adev);
2718 		else
2719 			DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2720 	}
2721 	if (amdgpu_benchmarking) {
2722 		if (adev->accel_working)
2723 			amdgpu_benchmark(adev, amdgpu_benchmarking);
2724 		else
2725 			DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2726 	}
2727 
2728 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
2729 	 * explicit gating rather than handling it automatically.
2730 	 */
2731 	r = amdgpu_device_ip_late_init(adev);
2732 	if (r) {
2733 		dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
2734 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
2735 		goto failed;
2736 	}
2737 
2738 	/* must succeed. */
2739 	amdgpu_ras_resume(adev);
2740 
2741 	queue_delayed_work(system_wq, &adev->delayed_init_work,
2742 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
2743 
2744 	r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2745 	if (r) {
2746 		dev_err(adev->dev, "Could not create pcie_replay_count");
2747 		return r;
2748 	}
2749 
2750 	r = amdgpu_pmu_init(adev);
2751 	if (r)
2752 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
2753 
2754 	return 0;
2755 
2756 failed:
2757 	amdgpu_vf_error_trans_all(adev);
2758 	if (runtime)
2759 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
2760 
2761 	return r;
2762 }
2763 
2764 /**
2765  * amdgpu_device_fini - tear down the driver
2766  *
2767  * @adev: amdgpu_device pointer
2768  *
2769  * Tear down the driver info (all asics).
2770  * Called at driver shutdown.
2771  */
2772 void amdgpu_device_fini(struct amdgpu_device *adev)
2773 {
2774 	int r;
2775 
2776 	DRM_INFO("amdgpu: finishing device.\n");
2777 	adev->shutdown = true;
2778 	/* disable all interrupts */
2779 	amdgpu_irq_disable_all(adev);
2780 	if (adev->mode_info.mode_config_initialized){
2781 		if (!amdgpu_device_has_dc_support(adev))
2782 			drm_helper_force_disable_all(adev->ddev);
2783 		else
2784 			drm_atomic_helper_shutdown(adev->ddev);
2785 	}
2786 	amdgpu_fence_driver_fini(adev);
2787 	amdgpu_pm_sysfs_fini(adev);
2788 	amdgpu_fbdev_fini(adev);
2789 	r = amdgpu_device_ip_fini(adev);
2790 	if (adev->firmware.gpu_info_fw) {
2791 		release_firmware(adev->firmware.gpu_info_fw);
2792 		adev->firmware.gpu_info_fw = NULL;
2793 	}
2794 	adev->accel_working = false;
2795 	cancel_delayed_work_sync(&adev->delayed_init_work);
2796 	/* free i2c buses */
2797 	if (!amdgpu_device_has_dc_support(adev))
2798 		amdgpu_i2c_fini(adev);
2799 
2800 	if (amdgpu_emu_mode != 1)
2801 		amdgpu_atombios_fini(adev);
2802 
2803 	kfree(adev->bios);
2804 	adev->bios = NULL;
2805 	if (!pci_is_thunderbolt_attached(adev->pdev))
2806 		vga_switcheroo_unregister_client(adev->pdev);
2807 	if (adev->flags & AMD_IS_PX)
2808 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
2809 	vga_client_register(adev->pdev, NULL, NULL, NULL);
2810 	if (adev->rio_mem)
2811 		pci_iounmap(adev->pdev, adev->rio_mem);
2812 	adev->rio_mem = NULL;
2813 	iounmap(adev->rmmio);
2814 	adev->rmmio = NULL;
2815 	amdgpu_device_doorbell_fini(adev);
2816 	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2817 		amdgpu_pm_virt_sysfs_fini(adev);
2818 
2819 	amdgpu_debugfs_regs_cleanup(adev);
2820 	device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
2821 	amdgpu_ucode_sysfs_fini(adev);
2822 	amdgpu_pmu_fini(adev);
2823 }
2824 
2825 
2826 /*
2827  * Suspend & resume.
2828  */
2829 /**
2830  * amdgpu_device_suspend - initiate device suspend
2831  *
2832  * @dev: drm dev pointer
2833  * @suspend: suspend state
2834  * @fbcon : notify the fbdev of suspend
2835  *
2836  * Puts the hw in the suspend state (all asics).
2837  * Returns 0 for success or an error on failure.
2838  * Called at driver suspend.
2839  */
2840 int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
2841 {
2842 	struct amdgpu_device *adev;
2843 	struct drm_crtc *crtc;
2844 	struct drm_connector *connector;
2845 	int r;
2846 
2847 	if (dev == NULL || dev->dev_private == NULL) {
2848 		return -ENODEV;
2849 	}
2850 
2851 	adev = dev->dev_private;
2852 
2853 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2854 		return 0;
2855 
2856 	adev->in_suspend = true;
2857 	drm_kms_helper_poll_disable(dev);
2858 
2859 	if (fbcon)
2860 		amdgpu_fbdev_set_suspend(adev, 1);
2861 
2862 	cancel_delayed_work_sync(&adev->delayed_init_work);
2863 
2864 	if (!amdgpu_device_has_dc_support(adev)) {
2865 		/* turn off display hw */
2866 		drm_modeset_lock_all(dev);
2867 		list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2868 			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2869 		}
2870 		drm_modeset_unlock_all(dev);
2871 			/* unpin the front buffers and cursors */
2872 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2873 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2874 			struct drm_framebuffer *fb = crtc->primary->fb;
2875 			struct amdgpu_bo *robj;
2876 
2877 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
2878 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2879 				r = amdgpu_bo_reserve(aobj, true);
2880 				if (r == 0) {
2881 					amdgpu_bo_unpin(aobj);
2882 					amdgpu_bo_unreserve(aobj);
2883 				}
2884 			}
2885 
2886 			if (fb == NULL || fb->obj[0] == NULL) {
2887 				continue;
2888 			}
2889 			robj = gem_to_amdgpu_bo(fb->obj[0]);
2890 			/* don't unpin kernel fb objects */
2891 			if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2892 				r = amdgpu_bo_reserve(robj, true);
2893 				if (r == 0) {
2894 					amdgpu_bo_unpin(robj);
2895 					amdgpu_bo_unreserve(robj);
2896 				}
2897 			}
2898 		}
2899 	}
2900 
2901 	amdgpu_amdkfd_suspend(adev);
2902 
2903 	amdgpu_ras_suspend(adev);
2904 
2905 	r = amdgpu_device_ip_suspend_phase1(adev);
2906 
2907 	/* evict vram memory */
2908 	amdgpu_bo_evict_vram(adev);
2909 
2910 	amdgpu_fence_driver_suspend(adev);
2911 
2912 	r = amdgpu_device_ip_suspend_phase2(adev);
2913 
2914 	/* evict remaining vram memory
2915 	 * This second call to evict vram is to evict the gart page table
2916 	 * using the CPU.
2917 	 */
2918 	amdgpu_bo_evict_vram(adev);
2919 
2920 	pci_save_state(dev->pdev);
2921 	if (suspend) {
2922 		/* Shut down the device */
2923 		pci_disable_device(dev->pdev);
2924 		pci_set_power_state(dev->pdev, PCI_D3hot);
2925 	} else {
2926 		r = amdgpu_asic_reset(adev);
2927 		if (r)
2928 			DRM_ERROR("amdgpu asic reset failed\n");
2929 	}
2930 
2931 	return 0;
2932 }
2933 
2934 /**
2935  * amdgpu_device_resume - initiate device resume
2936  *
2937  * @dev: drm dev pointer
2938  * @resume: resume state
2939  * @fbcon : notify the fbdev of resume
2940  *
2941  * Bring the hw back to operating state (all asics).
2942  * Returns 0 for success or an error on failure.
2943  * Called at driver resume.
2944  */
2945 int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
2946 {
2947 	struct drm_connector *connector;
2948 	struct amdgpu_device *adev = dev->dev_private;
2949 	struct drm_crtc *crtc;
2950 	int r = 0;
2951 
2952 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2953 		return 0;
2954 
2955 	if (resume) {
2956 		pci_set_power_state(dev->pdev, PCI_D0);
2957 		pci_restore_state(dev->pdev);
2958 		r = pci_enable_device(dev->pdev);
2959 		if (r)
2960 			return r;
2961 	}
2962 
2963 	/* post card */
2964 	if (amdgpu_device_need_post(adev)) {
2965 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2966 		if (r)
2967 			DRM_ERROR("amdgpu asic init failed\n");
2968 	}
2969 
2970 	r = amdgpu_device_ip_resume(adev);
2971 	if (r) {
2972 		DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
2973 		return r;
2974 	}
2975 	amdgpu_fence_driver_resume(adev);
2976 
2977 
2978 	r = amdgpu_device_ip_late_init(adev);
2979 	if (r)
2980 		return r;
2981 
2982 	queue_delayed_work(system_wq, &adev->delayed_init_work,
2983 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
2984 
2985 	if (!amdgpu_device_has_dc_support(adev)) {
2986 		/* pin cursors */
2987 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2988 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2989 
2990 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
2991 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2992 				r = amdgpu_bo_reserve(aobj, true);
2993 				if (r == 0) {
2994 					r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
2995 					if (r != 0)
2996 						DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
2997 					amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
2998 					amdgpu_bo_unreserve(aobj);
2999 				}
3000 			}
3001 		}
3002 	}
3003 	r = amdgpu_amdkfd_resume(adev);
3004 	if (r)
3005 		return r;
3006 
3007 	/* Make sure IB tests flushed */
3008 	flush_delayed_work(&adev->delayed_init_work);
3009 
3010 	/* blat the mode back in */
3011 	if (fbcon) {
3012 		if (!amdgpu_device_has_dc_support(adev)) {
3013 			/* pre DCE11 */
3014 			drm_helper_resume_force_mode(dev);
3015 
3016 			/* turn on display hw */
3017 			drm_modeset_lock_all(dev);
3018 			list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3019 				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
3020 			}
3021 			drm_modeset_unlock_all(dev);
3022 		}
3023 		amdgpu_fbdev_set_suspend(adev, 0);
3024 	}
3025 
3026 	drm_kms_helper_poll_enable(dev);
3027 
3028 	amdgpu_ras_resume(adev);
3029 
3030 	/*
3031 	 * Most of the connector probing functions try to acquire runtime pm
3032 	 * refs to ensure that the GPU is powered on when connector polling is
3033 	 * performed. Since we're calling this from a runtime PM callback,
3034 	 * trying to acquire rpm refs will cause us to deadlock.
3035 	 *
3036 	 * Since we're guaranteed to be holding the rpm lock, it's safe to
3037 	 * temporarily disable the rpm helpers so this doesn't deadlock us.
3038 	 */
3039 #ifdef CONFIG_PM
3040 	dev->dev->power.disable_depth++;
3041 #endif
3042 	if (!amdgpu_device_has_dc_support(adev))
3043 		drm_helper_hpd_irq_event(dev);
3044 	else
3045 		drm_kms_helper_hotplug_event(dev);
3046 #ifdef CONFIG_PM
3047 	dev->dev->power.disable_depth--;
3048 #endif
3049 	adev->in_suspend = false;
3050 
3051 	return 0;
3052 }
3053 
3054 /**
3055  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3056  *
3057  * @adev: amdgpu_device pointer
3058  *
3059  * The list of all the hardware IPs that make up the asic is walked and
3060  * the check_soft_reset callbacks are run.  check_soft_reset determines
3061  * if the asic is still hung or not.
3062  * Returns true if any of the IPs are still in a hung state, false if not.
3063  */
3064 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
3065 {
3066 	int i;
3067 	bool asic_hang = false;
3068 
3069 	if (amdgpu_sriov_vf(adev))
3070 		return true;
3071 
3072 	if (amdgpu_asic_need_full_reset(adev))
3073 		return true;
3074 
3075 	for (i = 0; i < adev->num_ip_blocks; i++) {
3076 		if (!adev->ip_blocks[i].status.valid)
3077 			continue;
3078 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3079 			adev->ip_blocks[i].status.hang =
3080 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3081 		if (adev->ip_blocks[i].status.hang) {
3082 			DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
3083 			asic_hang = true;
3084 		}
3085 	}
3086 	return asic_hang;
3087 }
3088 
3089 /**
3090  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3091  *
3092  * @adev: amdgpu_device pointer
3093  *
3094  * The list of all the hardware IPs that make up the asic is walked and the
3095  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
3096  * handles any IP specific hardware or software state changes that are
3097  * necessary for a soft reset to succeed.
3098  * Returns 0 on success, negative error code on failure.
3099  */
3100 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
3101 {
3102 	int i, r = 0;
3103 
3104 	for (i = 0; i < adev->num_ip_blocks; i++) {
3105 		if (!adev->ip_blocks[i].status.valid)
3106 			continue;
3107 		if (adev->ip_blocks[i].status.hang &&
3108 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3109 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
3110 			if (r)
3111 				return r;
3112 		}
3113 	}
3114 
3115 	return 0;
3116 }
3117 
3118 /**
3119  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3120  *
3121  * @adev: amdgpu_device pointer
3122  *
3123  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
3124  * reset is necessary to recover.
3125  * Returns true if a full asic reset is required, false if not.
3126  */
3127 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
3128 {
3129 	int i;
3130 
3131 	if (amdgpu_asic_need_full_reset(adev))
3132 		return true;
3133 
3134 	for (i = 0; i < adev->num_ip_blocks; i++) {
3135 		if (!adev->ip_blocks[i].status.valid)
3136 			continue;
3137 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3138 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3139 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
3140 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3141 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3142 			if (adev->ip_blocks[i].status.hang) {
3143 				DRM_INFO("Some block need full reset!\n");
3144 				return true;
3145 			}
3146 		}
3147 	}
3148 	return false;
3149 }
3150 
3151 /**
3152  * amdgpu_device_ip_soft_reset - do a soft reset
3153  *
3154  * @adev: amdgpu_device pointer
3155  *
3156  * The list of all the hardware IPs that make up the asic is walked and the
3157  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
3158  * IP specific hardware or software state changes that are necessary to soft
3159  * reset the IP.
3160  * Returns 0 on success, negative error code on failure.
3161  */
3162 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
3163 {
3164 	int i, r = 0;
3165 
3166 	for (i = 0; i < adev->num_ip_blocks; i++) {
3167 		if (!adev->ip_blocks[i].status.valid)
3168 			continue;
3169 		if (adev->ip_blocks[i].status.hang &&
3170 		    adev->ip_blocks[i].version->funcs->soft_reset) {
3171 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
3172 			if (r)
3173 				return r;
3174 		}
3175 	}
3176 
3177 	return 0;
3178 }
3179 
3180 /**
3181  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3182  *
3183  * @adev: amdgpu_device pointer
3184  *
3185  * The list of all the hardware IPs that make up the asic is walked and the
3186  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
3187  * handles any IP specific hardware or software state changes that are
3188  * necessary after the IP has been soft reset.
3189  * Returns 0 on success, negative error code on failure.
3190  */
3191 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
3192 {
3193 	int i, r = 0;
3194 
3195 	for (i = 0; i < adev->num_ip_blocks; i++) {
3196 		if (!adev->ip_blocks[i].status.valid)
3197 			continue;
3198 		if (adev->ip_blocks[i].status.hang &&
3199 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
3200 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
3201 		if (r)
3202 			return r;
3203 	}
3204 
3205 	return 0;
3206 }
3207 
3208 /**
3209  * amdgpu_device_recover_vram - Recover some VRAM contents
3210  *
3211  * @adev: amdgpu_device pointer
3212  *
3213  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
3214  * restore things like GPUVM page tables after a GPU reset where
3215  * the contents of VRAM might be lost.
3216  *
3217  * Returns:
3218  * 0 on success, negative error code on failure.
3219  */
3220 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
3221 {
3222 	struct dma_fence *fence = NULL, *next = NULL;
3223 	struct amdgpu_bo *shadow;
3224 	long r = 1, tmo;
3225 
3226 	if (amdgpu_sriov_runtime(adev))
3227 		tmo = msecs_to_jiffies(8000);
3228 	else
3229 		tmo = msecs_to_jiffies(100);
3230 
3231 	DRM_INFO("recover vram bo from shadow start\n");
3232 	mutex_lock(&adev->shadow_list_lock);
3233 	list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3234 
3235 		/* No need to recover an evicted BO */
3236 		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3237 		    shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
3238 		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3239 			continue;
3240 
3241 		r = amdgpu_bo_restore_shadow(shadow, &next);
3242 		if (r)
3243 			break;
3244 
3245 		if (fence) {
3246 			tmo = dma_fence_wait_timeout(fence, false, tmo);
3247 			dma_fence_put(fence);
3248 			fence = next;
3249 			if (tmo == 0) {
3250 				r = -ETIMEDOUT;
3251 				break;
3252 			} else if (tmo < 0) {
3253 				r = tmo;
3254 				break;
3255 			}
3256 		} else {
3257 			fence = next;
3258 		}
3259 	}
3260 	mutex_unlock(&adev->shadow_list_lock);
3261 
3262 	if (fence)
3263 		tmo = dma_fence_wait_timeout(fence, false, tmo);
3264 	dma_fence_put(fence);
3265 
3266 	if (r < 0 || tmo <= 0) {
3267 		DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
3268 		return -EIO;
3269 	}
3270 
3271 	DRM_INFO("recover vram bo from shadow done\n");
3272 	return 0;
3273 }
3274 
3275 
3276 /**
3277  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
3278  *
3279  * @adev: amdgpu device pointer
3280  * @from_hypervisor: request from hypervisor
3281  *
3282  * do VF FLR and reinitialize Asic
3283  * return 0 means succeeded otherwise failed
3284  */
3285 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3286 				     bool from_hypervisor)
3287 {
3288 	int r;
3289 
3290 	if (from_hypervisor)
3291 		r = amdgpu_virt_request_full_gpu(adev, true);
3292 	else
3293 		r = amdgpu_virt_reset_gpu(adev);
3294 	if (r)
3295 		return r;
3296 
3297 	amdgpu_amdkfd_pre_reset(adev);
3298 
3299 	/* Resume IP prior to SMC */
3300 	r = amdgpu_device_ip_reinit_early_sriov(adev);
3301 	if (r)
3302 		goto error;
3303 
3304 	/* we need recover gart prior to run SMC/CP/SDMA resume */
3305 	amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
3306 
3307 	r = amdgpu_device_fw_loading(adev);
3308 	if (r)
3309 		return r;
3310 
3311 	/* now we are okay to resume SMC/CP/SDMA */
3312 	r = amdgpu_device_ip_reinit_late_sriov(adev);
3313 	if (r)
3314 		goto error;
3315 
3316 	amdgpu_irq_gpu_reset_resume_helper(adev);
3317 	r = amdgpu_ib_ring_tests(adev);
3318 	amdgpu_amdkfd_post_reset(adev);
3319 
3320 error:
3321 	amdgpu_virt_init_data_exchange(adev);
3322 	amdgpu_virt_release_full_gpu(adev, true);
3323 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3324 		atomic_inc(&adev->vram_lost_counter);
3325 		r = amdgpu_device_recover_vram(adev);
3326 	}
3327 
3328 	return r;
3329 }
3330 
3331 /**
3332  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3333  *
3334  * @adev: amdgpu device pointer
3335  *
3336  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3337  * a hung GPU.
3338  */
3339 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3340 {
3341 	if (!amdgpu_device_ip_check_soft_reset(adev)) {
3342 		DRM_INFO("Timeout, but no hardware hang detected.\n");
3343 		return false;
3344 	}
3345 
3346 	if (amdgpu_gpu_recovery == 0)
3347 		goto disabled;
3348 
3349 	if (amdgpu_sriov_vf(adev))
3350 		return true;
3351 
3352 	if (amdgpu_gpu_recovery == -1) {
3353 		switch (adev->asic_type) {
3354 		case CHIP_BONAIRE:
3355 		case CHIP_HAWAII:
3356 		case CHIP_TOPAZ:
3357 		case CHIP_TONGA:
3358 		case CHIP_FIJI:
3359 		case CHIP_POLARIS10:
3360 		case CHIP_POLARIS11:
3361 		case CHIP_POLARIS12:
3362 		case CHIP_VEGAM:
3363 		case CHIP_VEGA20:
3364 		case CHIP_VEGA10:
3365 		case CHIP_VEGA12:
3366 			break;
3367 		default:
3368 			goto disabled;
3369 		}
3370 	}
3371 
3372 	return true;
3373 
3374 disabled:
3375 		DRM_INFO("GPU recovery disabled.\n");
3376 		return false;
3377 }
3378 
3379 
3380 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3381 					struct amdgpu_job *job,
3382 					bool *need_full_reset_arg)
3383 {
3384 	int i, r = 0;
3385 	bool need_full_reset  = *need_full_reset_arg;
3386 
3387 	/* block all schedulers and reset given job's ring */
3388 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3389 		struct amdgpu_ring *ring = adev->rings[i];
3390 
3391 		if (!ring || !ring->sched.thread)
3392 			continue;
3393 
3394 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3395 		amdgpu_fence_driver_force_completion(ring);
3396 	}
3397 
3398 	if(job)
3399 		drm_sched_increase_karma(&job->base);
3400 
3401 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
3402 	if (!amdgpu_sriov_vf(adev)) {
3403 
3404 		if (!need_full_reset)
3405 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3406 
3407 		if (!need_full_reset) {
3408 			amdgpu_device_ip_pre_soft_reset(adev);
3409 			r = amdgpu_device_ip_soft_reset(adev);
3410 			amdgpu_device_ip_post_soft_reset(adev);
3411 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3412 				DRM_INFO("soft reset failed, will fallback to full reset!\n");
3413 				need_full_reset = true;
3414 			}
3415 		}
3416 
3417 		if (need_full_reset)
3418 			r = amdgpu_device_ip_suspend(adev);
3419 
3420 		*need_full_reset_arg = need_full_reset;
3421 	}
3422 
3423 	return r;
3424 }
3425 
3426 static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3427 			       struct list_head *device_list_handle,
3428 			       bool *need_full_reset_arg)
3429 {
3430 	struct amdgpu_device *tmp_adev = NULL;
3431 	bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3432 	int r = 0;
3433 
3434 	/*
3435 	 * ASIC reset has to be done on all HGMI hive nodes ASAP
3436 	 * to allow proper links negotiation in FW (within 1 sec)
3437 	 */
3438 	if (need_full_reset) {
3439 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3440 			/* For XGMI run all resets in parallel to speed up the process */
3441 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3442 				if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3443 					r = -EALREADY;
3444 			} else
3445 				r = amdgpu_asic_reset(tmp_adev);
3446 
3447 			if (r) {
3448 				DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3449 					 r, tmp_adev->ddev->unique);
3450 				break;
3451 			}
3452 		}
3453 
3454 		/* For XGMI wait for all PSP resets to complete before proceed */
3455 		if (!r) {
3456 			list_for_each_entry(tmp_adev, device_list_handle,
3457 					    gmc.xgmi.head) {
3458 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3459 					flush_work(&tmp_adev->xgmi_reset_work);
3460 					r = tmp_adev->asic_reset_res;
3461 					if (r)
3462 						break;
3463 				}
3464 			}
3465 
3466 			list_for_each_entry(tmp_adev, device_list_handle,
3467 					gmc.xgmi.head) {
3468 				amdgpu_ras_reserve_bad_pages(tmp_adev);
3469 			}
3470 		}
3471 	}
3472 
3473 
3474 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3475 		if (need_full_reset) {
3476 			/* post card */
3477 			if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3478 				DRM_WARN("asic atom init failed!");
3479 
3480 			if (!r) {
3481 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3482 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
3483 				if (r)
3484 					goto out;
3485 
3486 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3487 				if (vram_lost) {
3488 					DRM_INFO("VRAM is lost due to GPU reset!\n");
3489 					atomic_inc(&tmp_adev->vram_lost_counter);
3490 				}
3491 
3492 				r = amdgpu_gtt_mgr_recover(
3493 					&tmp_adev->mman.bdev.man[TTM_PL_TT]);
3494 				if (r)
3495 					goto out;
3496 
3497 				r = amdgpu_device_fw_loading(tmp_adev);
3498 				if (r)
3499 					return r;
3500 
3501 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
3502 				if (r)
3503 					goto out;
3504 
3505 				if (vram_lost)
3506 					amdgpu_device_fill_reset_magic(tmp_adev);
3507 
3508 				r = amdgpu_device_ip_late_init(tmp_adev);
3509 				if (r)
3510 					goto out;
3511 
3512 				/* must succeed. */
3513 				amdgpu_ras_resume(tmp_adev);
3514 
3515 				/* Update PSP FW topology after reset */
3516 				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3517 					r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3518 			}
3519 		}
3520 
3521 
3522 out:
3523 		if (!r) {
3524 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3525 			r = amdgpu_ib_ring_tests(tmp_adev);
3526 			if (r) {
3527 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3528 				r = amdgpu_device_ip_suspend(tmp_adev);
3529 				need_full_reset = true;
3530 				r = -EAGAIN;
3531 				goto end;
3532 			}
3533 		}
3534 
3535 		if (!r)
3536 			r = amdgpu_device_recover_vram(tmp_adev);
3537 		else
3538 			tmp_adev->asic_reset_res = r;
3539 	}
3540 
3541 end:
3542 	*need_full_reset_arg = need_full_reset;
3543 	return r;
3544 }
3545 
3546 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
3547 {
3548 	if (trylock) {
3549 		if (!mutex_trylock(&adev->lock_reset))
3550 			return false;
3551 	} else
3552 		mutex_lock(&adev->lock_reset);
3553 
3554 	atomic_inc(&adev->gpu_reset_counter);
3555 	adev->in_gpu_reset = 1;
3556 	/* Block kfd: SRIOV would do it separately */
3557 	if (!amdgpu_sriov_vf(adev))
3558                 amdgpu_amdkfd_pre_reset(adev);
3559 
3560 	return true;
3561 }
3562 
3563 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3564 {
3565 	/*unlock kfd: SRIOV would do it separately */
3566 	if (!amdgpu_sriov_vf(adev))
3567                 amdgpu_amdkfd_post_reset(adev);
3568 	amdgpu_vf_error_trans_all(adev);
3569 	adev->in_gpu_reset = 0;
3570 	mutex_unlock(&adev->lock_reset);
3571 }
3572 
3573 
3574 /**
3575  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3576  *
3577  * @adev: amdgpu device pointer
3578  * @job: which job trigger hang
3579  *
3580  * Attempt to reset the GPU if it has hung (all asics).
3581  * Attempt to do soft-reset or full-reset and reinitialize Asic
3582  * Returns 0 for success or an error on failure.
3583  */
3584 
3585 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3586 			      struct amdgpu_job *job)
3587 {
3588 	struct list_head device_list, *device_list_handle =  NULL;
3589 	bool need_full_reset, job_signaled;
3590 	struct amdgpu_hive_info *hive = NULL;
3591 	struct amdgpu_device *tmp_adev = NULL;
3592 	int i, r = 0;
3593 
3594 	need_full_reset = job_signaled = false;
3595 	INIT_LIST_HEAD(&device_list);
3596 
3597 	dev_info(adev->dev, "GPU reset begin!\n");
3598 
3599 	cancel_delayed_work_sync(&adev->delayed_init_work);
3600 
3601 	hive = amdgpu_get_xgmi_hive(adev, false);
3602 
3603 	/*
3604 	 * Here we trylock to avoid chain of resets executing from
3605 	 * either trigger by jobs on different adevs in XGMI hive or jobs on
3606 	 * different schedulers for same device while this TO handler is running.
3607 	 * We always reset all schedulers for device and all devices for XGMI
3608 	 * hive so that should take care of them too.
3609 	 */
3610 
3611 	if (hive && !mutex_trylock(&hive->reset_lock)) {
3612 		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
3613 			 job->base.id, hive->hive_id);
3614 		return 0;
3615 	}
3616 
3617 	/* Start with adev pre asic reset first for soft reset check.*/
3618 	if (!amdgpu_device_lock_adev(adev, !hive)) {
3619 		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
3620 					 job->base.id);
3621 		return 0;
3622 	}
3623 
3624 	/* Build list of devices to reset */
3625 	if  (adev->gmc.xgmi.num_physical_nodes > 1) {
3626 		if (!hive) {
3627 			amdgpu_device_unlock_adev(adev);
3628 			return -ENODEV;
3629 		}
3630 
3631 		/*
3632 		 * In case we are in XGMI hive mode device reset is done for all the
3633 		 * nodes in the hive to retrain all XGMI links and hence the reset
3634 		 * sequence is executed in loop on all nodes.
3635 		 */
3636 		device_list_handle = &hive->device_list;
3637 	} else {
3638 		list_add_tail(&adev->gmc.xgmi.head, &device_list);
3639 		device_list_handle = &device_list;
3640 	}
3641 
3642 	/* block all schedulers and reset given job's ring */
3643 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3644 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3645 			struct amdgpu_ring *ring = tmp_adev->rings[i];
3646 
3647 			if (!ring || !ring->sched.thread)
3648 				continue;
3649 
3650 			drm_sched_stop(&ring->sched, &job->base);
3651 		}
3652 	}
3653 
3654 
3655 	/*
3656 	 * Must check guilty signal here since after this point all old
3657 	 * HW fences are force signaled.
3658 	 *
3659 	 * job->base holds a reference to parent fence
3660 	 */
3661 	if (job && job->base.s_fence->parent &&
3662 	    dma_fence_is_signaled(job->base.s_fence->parent))
3663 		job_signaled = true;
3664 
3665 	if (!amdgpu_device_ip_need_full_reset(adev))
3666 		device_list_handle = &device_list;
3667 
3668 	if (job_signaled) {
3669 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
3670 		goto skip_hw_reset;
3671 	}
3672 
3673 
3674 	/* Guilty job will be freed after this*/
3675 	r = amdgpu_device_pre_asic_reset(adev,
3676 					 job,
3677 					 &need_full_reset);
3678 	if (r) {
3679 		/*TODO Should we stop ?*/
3680 		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3681 			  r, adev->ddev->unique);
3682 		adev->asic_reset_res = r;
3683 	}
3684 
3685 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
3686 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3687 
3688 		if (tmp_adev == adev)
3689 			continue;
3690 
3691 		amdgpu_device_lock_adev(tmp_adev, false);
3692 		r = amdgpu_device_pre_asic_reset(tmp_adev,
3693 						 NULL,
3694 						 &need_full_reset);
3695 		/*TODO Should we stop ?*/
3696 		if (r) {
3697 			DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3698 				  r, tmp_adev->ddev->unique);
3699 			tmp_adev->asic_reset_res = r;
3700 		}
3701 	}
3702 
3703 	/* Actual ASIC resets if needed.*/
3704 	/* TODO Implement XGMI hive reset logic for SRIOV */
3705 	if (amdgpu_sriov_vf(adev)) {
3706 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
3707 		if (r)
3708 			adev->asic_reset_res = r;
3709 	} else {
3710 		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3711 		if (r && r == -EAGAIN)
3712 			goto retry;
3713 	}
3714 
3715 skip_hw_reset:
3716 
3717 	/* Post ASIC reset for all devs .*/
3718 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3719 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3720 			struct amdgpu_ring *ring = tmp_adev->rings[i];
3721 
3722 			if (!ring || !ring->sched.thread)
3723 				continue;
3724 
3725 			/* No point to resubmit jobs if we didn't HW reset*/
3726 			if (!tmp_adev->asic_reset_res && !job_signaled)
3727 				drm_sched_resubmit_jobs(&ring->sched);
3728 
3729 			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
3730 		}
3731 
3732 		if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
3733 			drm_helper_resume_force_mode(tmp_adev->ddev);
3734 		}
3735 
3736 		tmp_adev->asic_reset_res = 0;
3737 
3738 		if (r) {
3739 			/* bad news, how to tell it to userspace ? */
3740 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3741 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3742 		} else {
3743 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3744 		}
3745 
3746 		amdgpu_device_unlock_adev(tmp_adev);
3747 	}
3748 
3749 	if (hive)
3750 		mutex_unlock(&hive->reset_lock);
3751 
3752 	if (r)
3753 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
3754 	return r;
3755 }
3756 
3757 /**
3758  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3759  *
3760  * @adev: amdgpu_device pointer
3761  *
3762  * Fetchs and stores in the driver the PCIE capabilities (gen speed
3763  * and lanes) of the slot the device is in. Handles APUs and
3764  * virtualized environments where PCIE config space may not be available.
3765  */
3766 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
3767 {
3768 	struct pci_dev *pdev;
3769 	enum pci_bus_speed speed_cap, platform_speed_cap;
3770 	enum pcie_link_width platform_link_width;
3771 
3772 	if (amdgpu_pcie_gen_cap)
3773 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
3774 
3775 	if (amdgpu_pcie_lane_cap)
3776 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
3777 
3778 	/* covers APUs as well */
3779 	if (pci_is_root_bus(adev->pdev->bus)) {
3780 		if (adev->pm.pcie_gen_mask == 0)
3781 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3782 		if (adev->pm.pcie_mlw_mask == 0)
3783 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
3784 		return;
3785 	}
3786 
3787 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
3788 		return;
3789 
3790 	pcie_bandwidth_available(adev->pdev, NULL,
3791 				 &platform_speed_cap, &platform_link_width);
3792 
3793 	if (adev->pm.pcie_gen_mask == 0) {
3794 		/* asic caps */
3795 		pdev = adev->pdev;
3796 		speed_cap = pcie_get_speed_cap(pdev);
3797 		if (speed_cap == PCI_SPEED_UNKNOWN) {
3798 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3799 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3800 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3801 		} else {
3802 			if (speed_cap == PCIE_SPEED_16_0GT)
3803 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3804 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3805 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3806 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3807 			else if (speed_cap == PCIE_SPEED_8_0GT)
3808 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3809 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3810 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3811 			else if (speed_cap == PCIE_SPEED_5_0GT)
3812 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3813 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3814 			else
3815 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3816 		}
3817 		/* platform caps */
3818 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
3819 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3820 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3821 		} else {
3822 			if (platform_speed_cap == PCIE_SPEED_16_0GT)
3823 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3824 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3825 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3826 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
3827 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
3828 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3829 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3830 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
3831 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
3832 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3833 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3834 			else
3835 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3836 
3837 		}
3838 	}
3839 	if (adev->pm.pcie_mlw_mask == 0) {
3840 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
3841 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3842 		} else {
3843 			switch (platform_link_width) {
3844 			case PCIE_LNK_X32:
3845 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3846 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3847 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3848 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3849 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3850 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3851 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3852 				break;
3853 			case PCIE_LNK_X16:
3854 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3855 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3856 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3857 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3858 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3859 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3860 				break;
3861 			case PCIE_LNK_X12:
3862 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3863 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3864 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3865 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3866 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3867 				break;
3868 			case PCIE_LNK_X8:
3869 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3870 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3871 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3872 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3873 				break;
3874 			case PCIE_LNK_X4:
3875 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3876 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3877 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3878 				break;
3879 			case PCIE_LNK_X2:
3880 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3881 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3882 				break;
3883 			case PCIE_LNK_X1:
3884 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3885 				break;
3886 			default:
3887 				break;
3888 			}
3889 		}
3890 	}
3891 }
3892 
3893