1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 
34 #include <drm/drm_atomic_helper.h>
35 #include <drm/drm_probe_helper.h>
36 #include <drm/amdgpu_drm.h>
37 #include <linux/vgaarb.h>
38 #include <linux/vga_switcheroo.h>
39 #include <linux/efi.h>
40 #include "amdgpu.h"
41 #include "amdgpu_trace.h"
42 #include "amdgpu_i2c.h"
43 #include "atom.h"
44 #include "amdgpu_atombios.h"
45 #include "amdgpu_atomfirmware.h"
46 #include "amd_pcie.h"
47 #ifdef CONFIG_DRM_AMDGPU_SI
48 #include "si.h"
49 #endif
50 #ifdef CONFIG_DRM_AMDGPU_CIK
51 #include "cik.h"
52 #endif
53 #include "vi.h"
54 #include "soc15.h"
55 #include "nv.h"
56 #include "bif/bif_4_1_d.h"
57 #include <linux/pci.h>
58 #include <linux/firmware.h>
59 #include "amdgpu_vf_error.h"
60 
61 #include "amdgpu_amdkfd.h"
62 #include "amdgpu_pm.h"
63 
64 #include "amdgpu_xgmi.h"
65 #include "amdgpu_ras.h"
66 #include "amdgpu_pmu.h"
67 #include "amdgpu_fru_eeprom.h"
68 
69 #include <linux/suspend.h>
70 #include <drm/task_barrier.h>
71 #include <linux/pm_runtime.h>
72 
73 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
75 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
76 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
77 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
78 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
79 MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
80 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
81 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
82 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
83 
84 #define AMDGPU_RESUME_MS		2000
85 
86 const char *amdgpu_asic_name[] = {
87 	"TAHITI",
88 	"PITCAIRN",
89 	"VERDE",
90 	"OLAND",
91 	"HAINAN",
92 	"BONAIRE",
93 	"KAVERI",
94 	"KABINI",
95 	"HAWAII",
96 	"MULLINS",
97 	"TOPAZ",
98 	"TONGA",
99 	"FIJI",
100 	"CARRIZO",
101 	"STONEY",
102 	"POLARIS10",
103 	"POLARIS11",
104 	"POLARIS12",
105 	"VEGAM",
106 	"VEGA10",
107 	"VEGA12",
108 	"VEGA20",
109 	"RAVEN",
110 	"ARCTURUS",
111 	"RENOIR",
112 	"NAVI10",
113 	"NAVI14",
114 	"NAVI12",
115 	"LAST",
116 };
117 
118 /**
119  * DOC: pcie_replay_count
120  *
121  * The amdgpu driver provides a sysfs API for reporting the total number
122  * of PCIe replays (NAKs)
123  * The file pcie_replay_count is used for this and returns the total
124  * number of replays as a sum of the NAKs generated and NAKs received
125  */
126 
127 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
128 		struct device_attribute *attr, char *buf)
129 {
130 	struct drm_device *ddev = dev_get_drvdata(dev);
131 	struct amdgpu_device *adev = ddev->dev_private;
132 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
133 
134 	return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
135 }
136 
137 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
138 		amdgpu_device_get_pcie_replay_count, NULL);
139 
140 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
141 
142 /**
143  * DOC: product_name
144  *
145  * The amdgpu driver provides a sysfs API for reporting the product name
146  * for the device
147  * The file serial_number is used for this and returns the product name
148  * as returned from the FRU.
149  * NOTE: This is only available for certain server cards
150  */
151 
152 static ssize_t amdgpu_device_get_product_name(struct device *dev,
153 		struct device_attribute *attr, char *buf)
154 {
155 	struct drm_device *ddev = dev_get_drvdata(dev);
156 	struct amdgpu_device *adev = ddev->dev_private;
157 
158 	return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
159 }
160 
161 static DEVICE_ATTR(product_name, S_IRUGO,
162 		amdgpu_device_get_product_name, NULL);
163 
164 /**
165  * DOC: product_number
166  *
167  * The amdgpu driver provides a sysfs API for reporting the part number
168  * for the device
169  * The file serial_number is used for this and returns the part number
170  * as returned from the FRU.
171  * NOTE: This is only available for certain server cards
172  */
173 
174 static ssize_t amdgpu_device_get_product_number(struct device *dev,
175 		struct device_attribute *attr, char *buf)
176 {
177 	struct drm_device *ddev = dev_get_drvdata(dev);
178 	struct amdgpu_device *adev = ddev->dev_private;
179 
180 	return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
181 }
182 
183 static DEVICE_ATTR(product_number, S_IRUGO,
184 		amdgpu_device_get_product_number, NULL);
185 
186 /**
187  * DOC: serial_number
188  *
189  * The amdgpu driver provides a sysfs API for reporting the serial number
190  * for the device
191  * The file serial_number is used for this and returns the serial number
192  * as returned from the FRU.
193  * NOTE: This is only available for certain server cards
194  */
195 
196 static ssize_t amdgpu_device_get_serial_number(struct device *dev,
197 		struct device_attribute *attr, char *buf)
198 {
199 	struct drm_device *ddev = dev_get_drvdata(dev);
200 	struct amdgpu_device *adev = ddev->dev_private;
201 
202 	return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
203 }
204 
205 static DEVICE_ATTR(serial_number, S_IRUGO,
206 		amdgpu_device_get_serial_number, NULL);
207 
208 /**
209  * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
210  *
211  * @dev: drm_device pointer
212  *
213  * Returns true if the device is a dGPU with HG/PX power control,
214  * otherwise return false.
215  */
216 bool amdgpu_device_supports_boco(struct drm_device *dev)
217 {
218 	struct amdgpu_device *adev = dev->dev_private;
219 
220 	if (adev->flags & AMD_IS_PX)
221 		return true;
222 	return false;
223 }
224 
225 /**
226  * amdgpu_device_supports_baco - Does the device support BACO
227  *
228  * @dev: drm_device pointer
229  *
230  * Returns true if the device supporte BACO,
231  * otherwise return false.
232  */
233 bool amdgpu_device_supports_baco(struct drm_device *dev)
234 {
235 	struct amdgpu_device *adev = dev->dev_private;
236 
237 	return amdgpu_asic_supports_baco(adev);
238 }
239 
240 /**
241  * VRAM access helper functions.
242  *
243  * amdgpu_device_vram_access - read/write a buffer in vram
244  *
245  * @adev: amdgpu_device pointer
246  * @pos: offset of the buffer in vram
247  * @buf: virtual address of the buffer in system memory
248  * @size: read/write size, sizeof(@buf) must > @size
249  * @write: true - write to vram, otherwise - read from vram
250  */
251 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
252 			       uint32_t *buf, size_t size, bool write)
253 {
254 	unsigned long flags;
255 	uint32_t hi = ~0;
256 	uint64_t last;
257 
258 
259 #ifdef CONFIG_64BIT
260 	last = min(pos + size, adev->gmc.visible_vram_size);
261 	if (last > pos) {
262 		void __iomem *addr = adev->mman.aper_base_kaddr + pos;
263 		size_t count = last - pos;
264 
265 		if (write) {
266 			memcpy_toio(addr, buf, count);
267 			mb();
268 			amdgpu_asic_flush_hdp(adev, NULL);
269 		} else {
270 			amdgpu_asic_invalidate_hdp(adev, NULL);
271 			mb();
272 			memcpy_fromio(buf, addr, count);
273 		}
274 
275 		if (count == size)
276 			return;
277 
278 		pos += count;
279 		buf += count / 4;
280 		size -= count;
281 	}
282 #endif
283 
284 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
285 	for (last = pos + size; pos < last; pos += 4) {
286 		uint32_t tmp = pos >> 31;
287 
288 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
289 		if (tmp != hi) {
290 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
291 			hi = tmp;
292 		}
293 		if (write)
294 			WREG32_NO_KIQ(mmMM_DATA, *buf++);
295 		else
296 			*buf++ = RREG32_NO_KIQ(mmMM_DATA);
297 	}
298 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
299 }
300 
301 /*
302  * device register access helper functions.
303  */
304 /**
305  * amdgpu_device_rreg - read a register
306  *
307  * @adev: amdgpu_device pointer
308  * @reg: dword aligned register offset
309  * @acc_flags: access flags which require special behavior
310  *
311  * Returns the 32 bit value from the offset specified.
312  */
313 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
314 			    uint32_t acc_flags)
315 {
316 	uint32_t ret;
317 
318 	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
319 		return amdgpu_kiq_rreg(adev, reg);
320 
321 	if ((reg * 4) < adev->rmmio_size)
322 		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
323 	else
324 		ret = adev->pcie_rreg(adev, (reg * 4));
325 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
326 	return ret;
327 }
328 
329 /*
330  * MMIO register read with bytes helper functions
331  * @offset:bytes offset from MMIO start
332  *
333 */
334 
335 /**
336  * amdgpu_mm_rreg8 - read a memory mapped IO register
337  *
338  * @adev: amdgpu_device pointer
339  * @offset: byte aligned register offset
340  *
341  * Returns the 8 bit value from the offset specified.
342  */
343 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
344 	if (offset < adev->rmmio_size)
345 		return (readb(adev->rmmio + offset));
346 	BUG();
347 }
348 
349 /*
350  * MMIO register write with bytes helper functions
351  * @offset:bytes offset from MMIO start
352  * @value: the value want to be written to the register
353  *
354 */
355 /**
356  * amdgpu_mm_wreg8 - read a memory mapped IO register
357  *
358  * @adev: amdgpu_device pointer
359  * @offset: byte aligned register offset
360  * @value: 8 bit value to write
361  *
362  * Writes the value specified to the offset specified.
363  */
364 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
365 	if (offset < adev->rmmio_size)
366 		writeb(value, adev->rmmio + offset);
367 	else
368 		BUG();
369 }
370 
371 void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg,
372 					     uint32_t v, uint32_t acc_flags)
373 {
374 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
375 
376 	if ((reg * 4) < adev->rmmio_size)
377 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
378 	else
379 		adev->pcie_wreg(adev, (reg * 4), v);
380 }
381 
382 /**
383  * amdgpu_device_wreg - write to a register
384  *
385  * @adev: amdgpu_device pointer
386  * @reg: dword aligned register offset
387  * @v: 32 bit value to write to the register
388  * @acc_flags: access flags which require special behavior
389  *
390  * Writes the value specified to the offset specified.
391  */
392 void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
393 			uint32_t acc_flags)
394 {
395 	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
396 		return amdgpu_kiq_wreg(adev, reg, v);
397 
398 	amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
399 }
400 
401 /*
402  * amdgpu_mm_wreg_mmio_rlc -  write register either with mmio or with RLC path if in range
403  *
404  * this function is invoked only the debugfs register access
405  * */
406 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
407 		    uint32_t acc_flags)
408 {
409 	if (amdgpu_sriov_fullaccess(adev) &&
410 		adev->gfx.rlc.funcs &&
411 		adev->gfx.rlc.funcs->is_rlcg_access_range) {
412 
413 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
414 			return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
415 	}
416 
417 	amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
418 }
419 
420 /**
421  * amdgpu_io_rreg - read an IO register
422  *
423  * @adev: amdgpu_device pointer
424  * @reg: dword aligned register offset
425  *
426  * Returns the 32 bit value from the offset specified.
427  */
428 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
429 {
430 	if ((reg * 4) < adev->rio_mem_size)
431 		return ioread32(adev->rio_mem + (reg * 4));
432 	else {
433 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
434 		return ioread32(adev->rio_mem + (mmMM_DATA * 4));
435 	}
436 }
437 
438 /**
439  * amdgpu_io_wreg - write to an IO register
440  *
441  * @adev: amdgpu_device pointer
442  * @reg: dword aligned register offset
443  * @v: 32 bit value to write to the register
444  *
445  * Writes the value specified to the offset specified.
446  */
447 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
448 {
449 	if ((reg * 4) < adev->rio_mem_size)
450 		iowrite32(v, adev->rio_mem + (reg * 4));
451 	else {
452 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
453 		iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
454 	}
455 }
456 
457 /**
458  * amdgpu_mm_rdoorbell - read a doorbell dword
459  *
460  * @adev: amdgpu_device pointer
461  * @index: doorbell index
462  *
463  * Returns the value in the doorbell aperture at the
464  * requested doorbell index (CIK).
465  */
466 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
467 {
468 	if (index < adev->doorbell.num_doorbells) {
469 		return readl(adev->doorbell.ptr + index);
470 	} else {
471 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
472 		return 0;
473 	}
474 }
475 
476 /**
477  * amdgpu_mm_wdoorbell - write a doorbell dword
478  *
479  * @adev: amdgpu_device pointer
480  * @index: doorbell index
481  * @v: value to write
482  *
483  * Writes @v to the doorbell aperture at the
484  * requested doorbell index (CIK).
485  */
486 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
487 {
488 	if (index < adev->doorbell.num_doorbells) {
489 		writel(v, adev->doorbell.ptr + index);
490 	} else {
491 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
492 	}
493 }
494 
495 /**
496  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
497  *
498  * @adev: amdgpu_device pointer
499  * @index: doorbell index
500  *
501  * Returns the value in the doorbell aperture at the
502  * requested doorbell index (VEGA10+).
503  */
504 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
505 {
506 	if (index < adev->doorbell.num_doorbells) {
507 		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
508 	} else {
509 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
510 		return 0;
511 	}
512 }
513 
514 /**
515  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
516  *
517  * @adev: amdgpu_device pointer
518  * @index: doorbell index
519  * @v: value to write
520  *
521  * Writes @v to the doorbell aperture at the
522  * requested doorbell index (VEGA10+).
523  */
524 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
525 {
526 	if (index < adev->doorbell.num_doorbells) {
527 		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
528 	} else {
529 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
530 	}
531 }
532 
533 /**
534  * amdgpu_invalid_rreg - dummy reg read function
535  *
536  * @adev: amdgpu device pointer
537  * @reg: offset of register
538  *
539  * Dummy register read function.  Used for register blocks
540  * that certain asics don't have (all asics).
541  * Returns the value in the register.
542  */
543 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
544 {
545 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
546 	BUG();
547 	return 0;
548 }
549 
550 /**
551  * amdgpu_invalid_wreg - dummy reg write function
552  *
553  * @adev: amdgpu device pointer
554  * @reg: offset of register
555  * @v: value to write to the register
556  *
557  * Dummy register read function.  Used for register blocks
558  * that certain asics don't have (all asics).
559  */
560 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
561 {
562 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
563 		  reg, v);
564 	BUG();
565 }
566 
567 /**
568  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
569  *
570  * @adev: amdgpu device pointer
571  * @reg: offset of register
572  *
573  * Dummy register read function.  Used for register blocks
574  * that certain asics don't have (all asics).
575  * Returns the value in the register.
576  */
577 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
578 {
579 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
580 	BUG();
581 	return 0;
582 }
583 
584 /**
585  * amdgpu_invalid_wreg64 - dummy reg write function
586  *
587  * @adev: amdgpu device pointer
588  * @reg: offset of register
589  * @v: value to write to the register
590  *
591  * Dummy register read function.  Used for register blocks
592  * that certain asics don't have (all asics).
593  */
594 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
595 {
596 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
597 		  reg, v);
598 	BUG();
599 }
600 
601 /**
602  * amdgpu_block_invalid_rreg - dummy reg read function
603  *
604  * @adev: amdgpu device pointer
605  * @block: offset of instance
606  * @reg: offset of register
607  *
608  * Dummy register read function.  Used for register blocks
609  * that certain asics don't have (all asics).
610  * Returns the value in the register.
611  */
612 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
613 					  uint32_t block, uint32_t reg)
614 {
615 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
616 		  reg, block);
617 	BUG();
618 	return 0;
619 }
620 
621 /**
622  * amdgpu_block_invalid_wreg - dummy reg write function
623  *
624  * @adev: amdgpu device pointer
625  * @block: offset of instance
626  * @reg: offset of register
627  * @v: value to write to the register
628  *
629  * Dummy register read function.  Used for register blocks
630  * that certain asics don't have (all asics).
631  */
632 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
633 				      uint32_t block,
634 				      uint32_t reg, uint32_t v)
635 {
636 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
637 		  reg, block, v);
638 	BUG();
639 }
640 
641 /**
642  * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
643  *
644  * @adev: amdgpu device pointer
645  *
646  * Allocates a scratch page of VRAM for use by various things in the
647  * driver.
648  */
649 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
650 {
651 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
652 				       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
653 				       &adev->vram_scratch.robj,
654 				       &adev->vram_scratch.gpu_addr,
655 				       (void **)&adev->vram_scratch.ptr);
656 }
657 
658 /**
659  * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
660  *
661  * @adev: amdgpu device pointer
662  *
663  * Frees the VRAM scratch page.
664  */
665 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
666 {
667 	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
668 }
669 
670 /**
671  * amdgpu_device_program_register_sequence - program an array of registers.
672  *
673  * @adev: amdgpu_device pointer
674  * @registers: pointer to the register array
675  * @array_size: size of the register array
676  *
677  * Programs an array or registers with and and or masks.
678  * This is a helper for setting golden registers.
679  */
680 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
681 					     const u32 *registers,
682 					     const u32 array_size)
683 {
684 	u32 tmp, reg, and_mask, or_mask;
685 	int i;
686 
687 	if (array_size % 3)
688 		return;
689 
690 	for (i = 0; i < array_size; i +=3) {
691 		reg = registers[i + 0];
692 		and_mask = registers[i + 1];
693 		or_mask = registers[i + 2];
694 
695 		if (and_mask == 0xffffffff) {
696 			tmp = or_mask;
697 		} else {
698 			tmp = RREG32(reg);
699 			tmp &= ~and_mask;
700 			if (adev->family >= AMDGPU_FAMILY_AI)
701 				tmp |= (or_mask & and_mask);
702 			else
703 				tmp |= or_mask;
704 		}
705 		WREG32(reg, tmp);
706 	}
707 }
708 
709 /**
710  * amdgpu_device_pci_config_reset - reset the GPU
711  *
712  * @adev: amdgpu_device pointer
713  *
714  * Resets the GPU using the pci config reset sequence.
715  * Only applicable to asics prior to vega10.
716  */
717 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
718 {
719 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
720 }
721 
722 /*
723  * GPU doorbell aperture helpers function.
724  */
725 /**
726  * amdgpu_device_doorbell_init - Init doorbell driver information.
727  *
728  * @adev: amdgpu_device pointer
729  *
730  * Init doorbell driver information (CIK)
731  * Returns 0 on success, error on failure.
732  */
733 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
734 {
735 
736 	/* No doorbell on SI hardware generation */
737 	if (adev->asic_type < CHIP_BONAIRE) {
738 		adev->doorbell.base = 0;
739 		adev->doorbell.size = 0;
740 		adev->doorbell.num_doorbells = 0;
741 		adev->doorbell.ptr = NULL;
742 		return 0;
743 	}
744 
745 	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
746 		return -EINVAL;
747 
748 	amdgpu_asic_init_doorbell_index(adev);
749 
750 	/* doorbell bar mapping */
751 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
752 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
753 
754 	adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
755 					     adev->doorbell_index.max_assignment+1);
756 	if (adev->doorbell.num_doorbells == 0)
757 		return -EINVAL;
758 
759 	/* For Vega, reserve and map two pages on doorbell BAR since SDMA
760 	 * paging queue doorbell use the second page. The
761 	 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
762 	 * doorbells are in the first page. So with paging queue enabled,
763 	 * the max num_doorbells should + 1 page (0x400 in dword)
764 	 */
765 	if (adev->asic_type >= CHIP_VEGA10)
766 		adev->doorbell.num_doorbells += 0x400;
767 
768 	adev->doorbell.ptr = ioremap(adev->doorbell.base,
769 				     adev->doorbell.num_doorbells *
770 				     sizeof(u32));
771 	if (adev->doorbell.ptr == NULL)
772 		return -ENOMEM;
773 
774 	return 0;
775 }
776 
777 /**
778  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
779  *
780  * @adev: amdgpu_device pointer
781  *
782  * Tear down doorbell driver information (CIK)
783  */
784 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
785 {
786 	iounmap(adev->doorbell.ptr);
787 	adev->doorbell.ptr = NULL;
788 }
789 
790 
791 
792 /*
793  * amdgpu_device_wb_*()
794  * Writeback is the method by which the GPU updates special pages in memory
795  * with the status of certain GPU events (fences, ring pointers,etc.).
796  */
797 
798 /**
799  * amdgpu_device_wb_fini - Disable Writeback and free memory
800  *
801  * @adev: amdgpu_device pointer
802  *
803  * Disables Writeback and frees the Writeback memory (all asics).
804  * Used at driver shutdown.
805  */
806 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
807 {
808 	if (adev->wb.wb_obj) {
809 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
810 				      &adev->wb.gpu_addr,
811 				      (void **)&adev->wb.wb);
812 		adev->wb.wb_obj = NULL;
813 	}
814 }
815 
816 /**
817  * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
818  *
819  * @adev: amdgpu_device pointer
820  *
821  * Initializes writeback and allocates writeback memory (all asics).
822  * Used at driver startup.
823  * Returns 0 on success or an -error on failure.
824  */
825 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
826 {
827 	int r;
828 
829 	if (adev->wb.wb_obj == NULL) {
830 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
831 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
832 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
833 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
834 					    (void **)&adev->wb.wb);
835 		if (r) {
836 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
837 			return r;
838 		}
839 
840 		adev->wb.num_wb = AMDGPU_MAX_WB;
841 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
842 
843 		/* clear wb memory */
844 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
845 	}
846 
847 	return 0;
848 }
849 
850 /**
851  * amdgpu_device_wb_get - Allocate a wb entry
852  *
853  * @adev: amdgpu_device pointer
854  * @wb: wb index
855  *
856  * Allocate a wb slot for use by the driver (all asics).
857  * Returns 0 on success or -EINVAL on failure.
858  */
859 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
860 {
861 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
862 
863 	if (offset < adev->wb.num_wb) {
864 		__set_bit(offset, adev->wb.used);
865 		*wb = offset << 3; /* convert to dw offset */
866 		return 0;
867 	} else {
868 		return -EINVAL;
869 	}
870 }
871 
872 /**
873  * amdgpu_device_wb_free - Free a wb entry
874  *
875  * @adev: amdgpu_device pointer
876  * @wb: wb index
877  *
878  * Free a wb slot allocated for use by the driver (all asics)
879  */
880 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
881 {
882 	wb >>= 3;
883 	if (wb < adev->wb.num_wb)
884 		__clear_bit(wb, adev->wb.used);
885 }
886 
887 /**
888  * amdgpu_device_resize_fb_bar - try to resize FB BAR
889  *
890  * @adev: amdgpu_device pointer
891  *
892  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
893  * to fail, but if any of the BARs is not accessible after the size we abort
894  * driver loading by returning -ENODEV.
895  */
896 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
897 {
898 	u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
899 	u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
900 	struct pci_bus *root;
901 	struct resource *res;
902 	unsigned i;
903 	u16 cmd;
904 	int r;
905 
906 	/* Bypass for VF */
907 	if (amdgpu_sriov_vf(adev))
908 		return 0;
909 
910 	/* Check if the root BUS has 64bit memory resources */
911 	root = adev->pdev->bus;
912 	while (root->parent)
913 		root = root->parent;
914 
915 	pci_bus_for_each_resource(root, res, i) {
916 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
917 		    res->start > 0x100000000ull)
918 			break;
919 	}
920 
921 	/* Trying to resize is pointless without a root hub window above 4GB */
922 	if (!res)
923 		return 0;
924 
925 	/* Disable memory decoding while we change the BAR addresses and size */
926 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
927 	pci_write_config_word(adev->pdev, PCI_COMMAND,
928 			      cmd & ~PCI_COMMAND_MEMORY);
929 
930 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
931 	amdgpu_device_doorbell_fini(adev);
932 	if (adev->asic_type >= CHIP_BONAIRE)
933 		pci_release_resource(adev->pdev, 2);
934 
935 	pci_release_resource(adev->pdev, 0);
936 
937 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
938 	if (r == -ENOSPC)
939 		DRM_INFO("Not enough PCI address space for a large BAR.");
940 	else if (r && r != -ENOTSUPP)
941 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
942 
943 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
944 
945 	/* When the doorbell or fb BAR isn't available we have no chance of
946 	 * using the device.
947 	 */
948 	r = amdgpu_device_doorbell_init(adev);
949 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
950 		return -ENODEV;
951 
952 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
953 
954 	return 0;
955 }
956 
957 /*
958  * GPU helpers function.
959  */
960 /**
961  * amdgpu_device_need_post - check if the hw need post or not
962  *
963  * @adev: amdgpu_device pointer
964  *
965  * Check if the asic has been initialized (all asics) at driver startup
966  * or post is needed if  hw reset is performed.
967  * Returns true if need or false if not.
968  */
969 bool amdgpu_device_need_post(struct amdgpu_device *adev)
970 {
971 	uint32_t reg;
972 
973 	if (amdgpu_sriov_vf(adev))
974 		return false;
975 
976 	if (amdgpu_passthrough(adev)) {
977 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
978 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
979 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
980 		 * vpost executed for smc version below 22.15
981 		 */
982 		if (adev->asic_type == CHIP_FIJI) {
983 			int err;
984 			uint32_t fw_ver;
985 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
986 			/* force vPost if error occured */
987 			if (err)
988 				return true;
989 
990 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
991 			if (fw_ver < 0x00160e00)
992 				return true;
993 		}
994 	}
995 
996 	if (adev->has_hw_reset) {
997 		adev->has_hw_reset = false;
998 		return true;
999 	}
1000 
1001 	/* bios scratch used on CIK+ */
1002 	if (adev->asic_type >= CHIP_BONAIRE)
1003 		return amdgpu_atombios_scratch_need_asic_init(adev);
1004 
1005 	/* check MEM_SIZE for older asics */
1006 	reg = amdgpu_asic_get_config_memsize(adev);
1007 
1008 	if ((reg != 0) && (reg != 0xffffffff))
1009 		return false;
1010 
1011 	return true;
1012 }
1013 
1014 /* if we get transitioned to only one device, take VGA back */
1015 /**
1016  * amdgpu_device_vga_set_decode - enable/disable vga decode
1017  *
1018  * @cookie: amdgpu_device pointer
1019  * @state: enable/disable vga decode
1020  *
1021  * Enable/disable vga decode (all asics).
1022  * Returns VGA resource flags.
1023  */
1024 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
1025 {
1026 	struct amdgpu_device *adev = cookie;
1027 	amdgpu_asic_set_vga_state(adev, state);
1028 	if (state)
1029 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1030 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1031 	else
1032 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1033 }
1034 
1035 /**
1036  * amdgpu_device_check_block_size - validate the vm block size
1037  *
1038  * @adev: amdgpu_device pointer
1039  *
1040  * Validates the vm block size specified via module parameter.
1041  * The vm block size defines number of bits in page table versus page directory,
1042  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1043  * page table and the remaining bits are in the page directory.
1044  */
1045 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1046 {
1047 	/* defines number of bits in page table versus page directory,
1048 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1049 	 * page table and the remaining bits are in the page directory */
1050 	if (amdgpu_vm_block_size == -1)
1051 		return;
1052 
1053 	if (amdgpu_vm_block_size < 9) {
1054 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1055 			 amdgpu_vm_block_size);
1056 		amdgpu_vm_block_size = -1;
1057 	}
1058 }
1059 
1060 /**
1061  * amdgpu_device_check_vm_size - validate the vm size
1062  *
1063  * @adev: amdgpu_device pointer
1064  *
1065  * Validates the vm size in GB specified via module parameter.
1066  * The VM size is the size of the GPU virtual memory space in GB.
1067  */
1068 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1069 {
1070 	/* no need to check the default value */
1071 	if (amdgpu_vm_size == -1)
1072 		return;
1073 
1074 	if (amdgpu_vm_size < 1) {
1075 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1076 			 amdgpu_vm_size);
1077 		amdgpu_vm_size = -1;
1078 	}
1079 }
1080 
1081 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1082 {
1083 	struct sysinfo si;
1084 	bool is_os_64 = (sizeof(void *) == 8);
1085 	uint64_t total_memory;
1086 	uint64_t dram_size_seven_GB = 0x1B8000000;
1087 	uint64_t dram_size_three_GB = 0xB8000000;
1088 
1089 	if (amdgpu_smu_memory_pool_size == 0)
1090 		return;
1091 
1092 	if (!is_os_64) {
1093 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1094 		goto def_value;
1095 	}
1096 	si_meminfo(&si);
1097 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1098 
1099 	if ((amdgpu_smu_memory_pool_size == 1) ||
1100 		(amdgpu_smu_memory_pool_size == 2)) {
1101 		if (total_memory < dram_size_three_GB)
1102 			goto def_value1;
1103 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1104 		(amdgpu_smu_memory_pool_size == 8)) {
1105 		if (total_memory < dram_size_seven_GB)
1106 			goto def_value1;
1107 	} else {
1108 		DRM_WARN("Smu memory pool size not supported\n");
1109 		goto def_value;
1110 	}
1111 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1112 
1113 	return;
1114 
1115 def_value1:
1116 	DRM_WARN("No enough system memory\n");
1117 def_value:
1118 	adev->pm.smu_prv_buffer_size = 0;
1119 }
1120 
1121 /**
1122  * amdgpu_device_check_arguments - validate module params
1123  *
1124  * @adev: amdgpu_device pointer
1125  *
1126  * Validates certain module parameters and updates
1127  * the associated values used by the driver (all asics).
1128  */
1129 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1130 {
1131 	if (amdgpu_sched_jobs < 4) {
1132 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1133 			 amdgpu_sched_jobs);
1134 		amdgpu_sched_jobs = 4;
1135 	} else if (!is_power_of_2(amdgpu_sched_jobs)){
1136 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1137 			 amdgpu_sched_jobs);
1138 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1139 	}
1140 
1141 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1142 		/* gart size must be greater or equal to 32M */
1143 		dev_warn(adev->dev, "gart size (%d) too small\n",
1144 			 amdgpu_gart_size);
1145 		amdgpu_gart_size = -1;
1146 	}
1147 
1148 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1149 		/* gtt size must be greater or equal to 32M */
1150 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1151 				 amdgpu_gtt_size);
1152 		amdgpu_gtt_size = -1;
1153 	}
1154 
1155 	/* valid range is between 4 and 9 inclusive */
1156 	if (amdgpu_vm_fragment_size != -1 &&
1157 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1158 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1159 		amdgpu_vm_fragment_size = -1;
1160 	}
1161 
1162 	amdgpu_device_check_smu_prv_buffer_size(adev);
1163 
1164 	amdgpu_device_check_vm_size(adev);
1165 
1166 	amdgpu_device_check_block_size(adev);
1167 
1168 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1169 
1170 	amdgpu_gmc_tmz_set(adev);
1171 
1172 	return 0;
1173 }
1174 
1175 /**
1176  * amdgpu_switcheroo_set_state - set switcheroo state
1177  *
1178  * @pdev: pci dev pointer
1179  * @state: vga_switcheroo state
1180  *
1181  * Callback for the switcheroo driver.  Suspends or resumes the
1182  * the asics before or after it is powered up using ACPI methods.
1183  */
1184 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1185 {
1186 	struct drm_device *dev = pci_get_drvdata(pdev);
1187 	int r;
1188 
1189 	if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
1190 		return;
1191 
1192 	if (state == VGA_SWITCHEROO_ON) {
1193 		pr_info("switched on\n");
1194 		/* don't suspend or resume card normally */
1195 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1196 
1197 		pci_set_power_state(dev->pdev, PCI_D0);
1198 		pci_restore_state(dev->pdev);
1199 		r = pci_enable_device(dev->pdev);
1200 		if (r)
1201 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1202 		amdgpu_device_resume(dev, true);
1203 
1204 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1205 		drm_kms_helper_poll_enable(dev);
1206 	} else {
1207 		pr_info("switched off\n");
1208 		drm_kms_helper_poll_disable(dev);
1209 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1210 		amdgpu_device_suspend(dev, true);
1211 		pci_save_state(dev->pdev);
1212 		/* Shut down the device */
1213 		pci_disable_device(dev->pdev);
1214 		pci_set_power_state(dev->pdev, PCI_D3cold);
1215 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1216 	}
1217 }
1218 
1219 /**
1220  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1221  *
1222  * @pdev: pci dev pointer
1223  *
1224  * Callback for the switcheroo driver.  Check of the switcheroo
1225  * state can be changed.
1226  * Returns true if the state can be changed, false if not.
1227  */
1228 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1229 {
1230 	struct drm_device *dev = pci_get_drvdata(pdev);
1231 
1232 	/*
1233 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1234 	* locking inversion with the driver load path. And the access here is
1235 	* completely racy anyway. So don't bother with locking for now.
1236 	*/
1237 	return atomic_read(&dev->open_count) == 0;
1238 }
1239 
1240 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1241 	.set_gpu_state = amdgpu_switcheroo_set_state,
1242 	.reprobe = NULL,
1243 	.can_switch = amdgpu_switcheroo_can_switch,
1244 };
1245 
1246 /**
1247  * amdgpu_device_ip_set_clockgating_state - set the CG state
1248  *
1249  * @dev: amdgpu_device pointer
1250  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1251  * @state: clockgating state (gate or ungate)
1252  *
1253  * Sets the requested clockgating state for all instances of
1254  * the hardware IP specified.
1255  * Returns the error code from the last instance.
1256  */
1257 int amdgpu_device_ip_set_clockgating_state(void *dev,
1258 					   enum amd_ip_block_type block_type,
1259 					   enum amd_clockgating_state state)
1260 {
1261 	struct amdgpu_device *adev = dev;
1262 	int i, r = 0;
1263 
1264 	for (i = 0; i < adev->num_ip_blocks; i++) {
1265 		if (!adev->ip_blocks[i].status.valid)
1266 			continue;
1267 		if (adev->ip_blocks[i].version->type != block_type)
1268 			continue;
1269 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1270 			continue;
1271 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1272 			(void *)adev, state);
1273 		if (r)
1274 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1275 				  adev->ip_blocks[i].version->funcs->name, r);
1276 	}
1277 	return r;
1278 }
1279 
1280 /**
1281  * amdgpu_device_ip_set_powergating_state - set the PG state
1282  *
1283  * @dev: amdgpu_device pointer
1284  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1285  * @state: powergating state (gate or ungate)
1286  *
1287  * Sets the requested powergating state for all instances of
1288  * the hardware IP specified.
1289  * Returns the error code from the last instance.
1290  */
1291 int amdgpu_device_ip_set_powergating_state(void *dev,
1292 					   enum amd_ip_block_type block_type,
1293 					   enum amd_powergating_state state)
1294 {
1295 	struct amdgpu_device *adev = dev;
1296 	int i, r = 0;
1297 
1298 	for (i = 0; i < adev->num_ip_blocks; i++) {
1299 		if (!adev->ip_blocks[i].status.valid)
1300 			continue;
1301 		if (adev->ip_blocks[i].version->type != block_type)
1302 			continue;
1303 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1304 			continue;
1305 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1306 			(void *)adev, state);
1307 		if (r)
1308 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1309 				  adev->ip_blocks[i].version->funcs->name, r);
1310 	}
1311 	return r;
1312 }
1313 
1314 /**
1315  * amdgpu_device_ip_get_clockgating_state - get the CG state
1316  *
1317  * @adev: amdgpu_device pointer
1318  * @flags: clockgating feature flags
1319  *
1320  * Walks the list of IPs on the device and updates the clockgating
1321  * flags for each IP.
1322  * Updates @flags with the feature flags for each hardware IP where
1323  * clockgating is enabled.
1324  */
1325 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1326 					    u32 *flags)
1327 {
1328 	int i;
1329 
1330 	for (i = 0; i < adev->num_ip_blocks; i++) {
1331 		if (!adev->ip_blocks[i].status.valid)
1332 			continue;
1333 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1334 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1335 	}
1336 }
1337 
1338 /**
1339  * amdgpu_device_ip_wait_for_idle - wait for idle
1340  *
1341  * @adev: amdgpu_device pointer
1342  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1343  *
1344  * Waits for the request hardware IP to be idle.
1345  * Returns 0 for success or a negative error code on failure.
1346  */
1347 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1348 				   enum amd_ip_block_type block_type)
1349 {
1350 	int i, r;
1351 
1352 	for (i = 0; i < adev->num_ip_blocks; i++) {
1353 		if (!adev->ip_blocks[i].status.valid)
1354 			continue;
1355 		if (adev->ip_blocks[i].version->type == block_type) {
1356 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1357 			if (r)
1358 				return r;
1359 			break;
1360 		}
1361 	}
1362 	return 0;
1363 
1364 }
1365 
1366 /**
1367  * amdgpu_device_ip_is_idle - is the hardware IP idle
1368  *
1369  * @adev: amdgpu_device pointer
1370  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1371  *
1372  * Check if the hardware IP is idle or not.
1373  * Returns true if it the IP is idle, false if not.
1374  */
1375 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1376 			      enum amd_ip_block_type block_type)
1377 {
1378 	int i;
1379 
1380 	for (i = 0; i < adev->num_ip_blocks; i++) {
1381 		if (!adev->ip_blocks[i].status.valid)
1382 			continue;
1383 		if (adev->ip_blocks[i].version->type == block_type)
1384 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1385 	}
1386 	return true;
1387 
1388 }
1389 
1390 /**
1391  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1392  *
1393  * @adev: amdgpu_device pointer
1394  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1395  *
1396  * Returns a pointer to the hardware IP block structure
1397  * if it exists for the asic, otherwise NULL.
1398  */
1399 struct amdgpu_ip_block *
1400 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1401 			      enum amd_ip_block_type type)
1402 {
1403 	int i;
1404 
1405 	for (i = 0; i < adev->num_ip_blocks; i++)
1406 		if (adev->ip_blocks[i].version->type == type)
1407 			return &adev->ip_blocks[i];
1408 
1409 	return NULL;
1410 }
1411 
1412 /**
1413  * amdgpu_device_ip_block_version_cmp
1414  *
1415  * @adev: amdgpu_device pointer
1416  * @type: enum amd_ip_block_type
1417  * @major: major version
1418  * @minor: minor version
1419  *
1420  * return 0 if equal or greater
1421  * return 1 if smaller or the ip_block doesn't exist
1422  */
1423 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1424 				       enum amd_ip_block_type type,
1425 				       u32 major, u32 minor)
1426 {
1427 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1428 
1429 	if (ip_block && ((ip_block->version->major > major) ||
1430 			((ip_block->version->major == major) &&
1431 			(ip_block->version->minor >= minor))))
1432 		return 0;
1433 
1434 	return 1;
1435 }
1436 
1437 /**
1438  * amdgpu_device_ip_block_add
1439  *
1440  * @adev: amdgpu_device pointer
1441  * @ip_block_version: pointer to the IP to add
1442  *
1443  * Adds the IP block driver information to the collection of IPs
1444  * on the asic.
1445  */
1446 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1447 			       const struct amdgpu_ip_block_version *ip_block_version)
1448 {
1449 	if (!ip_block_version)
1450 		return -EINVAL;
1451 
1452 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1453 		  ip_block_version->funcs->name);
1454 
1455 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1456 
1457 	return 0;
1458 }
1459 
1460 /**
1461  * amdgpu_device_enable_virtual_display - enable virtual display feature
1462  *
1463  * @adev: amdgpu_device pointer
1464  *
1465  * Enabled the virtual display feature if the user has enabled it via
1466  * the module parameter virtual_display.  This feature provides a virtual
1467  * display hardware on headless boards or in virtualized environments.
1468  * This function parses and validates the configuration string specified by
1469  * the user and configues the virtual display configuration (number of
1470  * virtual connectors, crtcs, etc.) specified.
1471  */
1472 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1473 {
1474 	adev->enable_virtual_display = false;
1475 
1476 	if (amdgpu_virtual_display) {
1477 		struct drm_device *ddev = adev->ddev;
1478 		const char *pci_address_name = pci_name(ddev->pdev);
1479 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1480 
1481 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1482 		pciaddstr_tmp = pciaddstr;
1483 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1484 			pciaddname = strsep(&pciaddname_tmp, ",");
1485 			if (!strcmp("all", pciaddname)
1486 			    || !strcmp(pci_address_name, pciaddname)) {
1487 				long num_crtc;
1488 				int res = -1;
1489 
1490 				adev->enable_virtual_display = true;
1491 
1492 				if (pciaddname_tmp)
1493 					res = kstrtol(pciaddname_tmp, 10,
1494 						      &num_crtc);
1495 
1496 				if (!res) {
1497 					if (num_crtc < 1)
1498 						num_crtc = 1;
1499 					if (num_crtc > 6)
1500 						num_crtc = 6;
1501 					adev->mode_info.num_crtc = num_crtc;
1502 				} else {
1503 					adev->mode_info.num_crtc = 1;
1504 				}
1505 				break;
1506 			}
1507 		}
1508 
1509 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1510 			 amdgpu_virtual_display, pci_address_name,
1511 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1512 
1513 		kfree(pciaddstr);
1514 	}
1515 }
1516 
1517 /**
1518  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1519  *
1520  * @adev: amdgpu_device pointer
1521  *
1522  * Parses the asic configuration parameters specified in the gpu info
1523  * firmware and makes them availale to the driver for use in configuring
1524  * the asic.
1525  * Returns 0 on success, -EINVAL on failure.
1526  */
1527 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1528 {
1529 	const char *chip_name;
1530 	char fw_name[30];
1531 	int err;
1532 	const struct gpu_info_firmware_header_v1_0 *hdr;
1533 
1534 	adev->firmware.gpu_info_fw = NULL;
1535 
1536 	switch (adev->asic_type) {
1537 	case CHIP_TOPAZ:
1538 	case CHIP_TONGA:
1539 	case CHIP_FIJI:
1540 	case CHIP_POLARIS10:
1541 	case CHIP_POLARIS11:
1542 	case CHIP_POLARIS12:
1543 	case CHIP_VEGAM:
1544 	case CHIP_CARRIZO:
1545 	case CHIP_STONEY:
1546 #ifdef CONFIG_DRM_AMDGPU_SI
1547 	case CHIP_VERDE:
1548 	case CHIP_TAHITI:
1549 	case CHIP_PITCAIRN:
1550 	case CHIP_OLAND:
1551 	case CHIP_HAINAN:
1552 #endif
1553 #ifdef CONFIG_DRM_AMDGPU_CIK
1554 	case CHIP_BONAIRE:
1555 	case CHIP_HAWAII:
1556 	case CHIP_KAVERI:
1557 	case CHIP_KABINI:
1558 	case CHIP_MULLINS:
1559 #endif
1560 	case CHIP_VEGA20:
1561 	default:
1562 		return 0;
1563 	case CHIP_VEGA10:
1564 		chip_name = "vega10";
1565 		break;
1566 	case CHIP_VEGA12:
1567 		chip_name = "vega12";
1568 		break;
1569 	case CHIP_RAVEN:
1570 		if (adev->rev_id >= 8)
1571 			chip_name = "raven2";
1572 		else if (adev->pdev->device == 0x15d8)
1573 			chip_name = "picasso";
1574 		else
1575 			chip_name = "raven";
1576 		break;
1577 	case CHIP_ARCTURUS:
1578 		chip_name = "arcturus";
1579 		break;
1580 	case CHIP_RENOIR:
1581 		chip_name = "renoir";
1582 		break;
1583 	case CHIP_NAVI10:
1584 		chip_name = "navi10";
1585 		break;
1586 	case CHIP_NAVI14:
1587 		chip_name = "navi14";
1588 		break;
1589 	case CHIP_NAVI12:
1590 		chip_name = "navi12";
1591 		break;
1592 	}
1593 
1594 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1595 	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1596 	if (err) {
1597 		dev_err(adev->dev,
1598 			"Failed to load gpu_info firmware \"%s\"\n",
1599 			fw_name);
1600 		goto out;
1601 	}
1602 	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1603 	if (err) {
1604 		dev_err(adev->dev,
1605 			"Failed to validate gpu_info firmware \"%s\"\n",
1606 			fw_name);
1607 		goto out;
1608 	}
1609 
1610 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1611 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1612 
1613 	switch (hdr->version_major) {
1614 	case 1:
1615 	{
1616 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1617 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1618 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1619 
1620 		if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1621 			goto parse_soc_bounding_box;
1622 
1623 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1624 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1625 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1626 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1627 		adev->gfx.config.max_texture_channel_caches =
1628 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1629 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1630 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1631 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1632 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1633 		adev->gfx.config.double_offchip_lds_buf =
1634 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1635 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1636 		adev->gfx.cu_info.max_waves_per_simd =
1637 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1638 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1639 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1640 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1641 		if (hdr->version_minor >= 1) {
1642 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1643 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1644 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1645 			adev->gfx.config.num_sc_per_sh =
1646 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1647 			adev->gfx.config.num_packer_per_sc =
1648 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1649 		}
1650 
1651 parse_soc_bounding_box:
1652 		/*
1653 		 * soc bounding box info is not integrated in disocovery table,
1654 		 * we always need to parse it from gpu info firmware.
1655 		 */
1656 		if (hdr->version_minor == 2) {
1657 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1658 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1659 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1660 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1661 		}
1662 		break;
1663 	}
1664 	default:
1665 		dev_err(adev->dev,
1666 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1667 		err = -EINVAL;
1668 		goto out;
1669 	}
1670 out:
1671 	return err;
1672 }
1673 
1674 /**
1675  * amdgpu_device_ip_early_init - run early init for hardware IPs
1676  *
1677  * @adev: amdgpu_device pointer
1678  *
1679  * Early initialization pass for hardware IPs.  The hardware IPs that make
1680  * up each asic are discovered each IP's early_init callback is run.  This
1681  * is the first stage in initializing the asic.
1682  * Returns 0 on success, negative error code on failure.
1683  */
1684 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1685 {
1686 	int i, r;
1687 
1688 	amdgpu_device_enable_virtual_display(adev);
1689 
1690 	switch (adev->asic_type) {
1691 	case CHIP_TOPAZ:
1692 	case CHIP_TONGA:
1693 	case CHIP_FIJI:
1694 	case CHIP_POLARIS10:
1695 	case CHIP_POLARIS11:
1696 	case CHIP_POLARIS12:
1697 	case CHIP_VEGAM:
1698 	case CHIP_CARRIZO:
1699 	case CHIP_STONEY:
1700 		if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
1701 			adev->family = AMDGPU_FAMILY_CZ;
1702 		else
1703 			adev->family = AMDGPU_FAMILY_VI;
1704 
1705 		r = vi_set_ip_blocks(adev);
1706 		if (r)
1707 			return r;
1708 		break;
1709 #ifdef CONFIG_DRM_AMDGPU_SI
1710 	case CHIP_VERDE:
1711 	case CHIP_TAHITI:
1712 	case CHIP_PITCAIRN:
1713 	case CHIP_OLAND:
1714 	case CHIP_HAINAN:
1715 		adev->family = AMDGPU_FAMILY_SI;
1716 		r = si_set_ip_blocks(adev);
1717 		if (r)
1718 			return r;
1719 		break;
1720 #endif
1721 #ifdef CONFIG_DRM_AMDGPU_CIK
1722 	case CHIP_BONAIRE:
1723 	case CHIP_HAWAII:
1724 	case CHIP_KAVERI:
1725 	case CHIP_KABINI:
1726 	case CHIP_MULLINS:
1727 		if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1728 			adev->family = AMDGPU_FAMILY_CI;
1729 		else
1730 			adev->family = AMDGPU_FAMILY_KV;
1731 
1732 		r = cik_set_ip_blocks(adev);
1733 		if (r)
1734 			return r;
1735 		break;
1736 #endif
1737 	case CHIP_VEGA10:
1738 	case CHIP_VEGA12:
1739 	case CHIP_VEGA20:
1740 	case CHIP_RAVEN:
1741 	case CHIP_ARCTURUS:
1742 	case CHIP_RENOIR:
1743 		if (adev->asic_type == CHIP_RAVEN ||
1744 		    adev->asic_type == CHIP_RENOIR)
1745 			adev->family = AMDGPU_FAMILY_RV;
1746 		else
1747 			adev->family = AMDGPU_FAMILY_AI;
1748 
1749 		r = soc15_set_ip_blocks(adev);
1750 		if (r)
1751 			return r;
1752 		break;
1753 	case  CHIP_NAVI10:
1754 	case  CHIP_NAVI14:
1755 	case  CHIP_NAVI12:
1756 		adev->family = AMDGPU_FAMILY_NV;
1757 
1758 		r = nv_set_ip_blocks(adev);
1759 		if (r)
1760 			return r;
1761 		break;
1762 	default:
1763 		/* FIXME: not supported yet */
1764 		return -EINVAL;
1765 	}
1766 
1767 	r = amdgpu_device_parse_gpu_info_fw(adev);
1768 	if (r)
1769 		return r;
1770 
1771 	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1772 		amdgpu_discovery_get_gfx_info(adev);
1773 
1774 	amdgpu_amdkfd_device_probe(adev);
1775 
1776 	if (amdgpu_sriov_vf(adev)) {
1777 		/* handle vbios stuff prior full access mode for new handshake */
1778 		if (adev->virt.req_init_data_ver == 1) {
1779 			if (!amdgpu_get_bios(adev)) {
1780 				DRM_ERROR("failed to get vbios\n");
1781 				return -EINVAL;
1782 			}
1783 
1784 			r = amdgpu_atombios_init(adev);
1785 			if (r) {
1786 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1787 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1788 				return r;
1789 			}
1790 		}
1791 	}
1792 
1793 	/* we need to send REQ_GPU here for legacy handshaker otherwise the vbios
1794 	 * will not be prepared by host for this VF */
1795 	if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
1796 		r = amdgpu_virt_request_full_gpu(adev, true);
1797 		if (r)
1798 			return r;
1799 	}
1800 
1801 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
1802 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
1803 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1804 
1805 	for (i = 0; i < adev->num_ip_blocks; i++) {
1806 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1807 			DRM_ERROR("disabled ip block: %d <%s>\n",
1808 				  i, adev->ip_blocks[i].version->funcs->name);
1809 			adev->ip_blocks[i].status.valid = false;
1810 		} else {
1811 			if (adev->ip_blocks[i].version->funcs->early_init) {
1812 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
1813 				if (r == -ENOENT) {
1814 					adev->ip_blocks[i].status.valid = false;
1815 				} else if (r) {
1816 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
1817 						  adev->ip_blocks[i].version->funcs->name, r);
1818 					return r;
1819 				} else {
1820 					adev->ip_blocks[i].status.valid = true;
1821 				}
1822 			} else {
1823 				adev->ip_blocks[i].status.valid = true;
1824 			}
1825 		}
1826 		/* get the vbios after the asic_funcs are set up */
1827 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1828 			/* skip vbios handling for new handshake */
1829 			if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1)
1830 				continue;
1831 
1832 			/* Read BIOS */
1833 			if (!amdgpu_get_bios(adev))
1834 				return -EINVAL;
1835 
1836 			r = amdgpu_atombios_init(adev);
1837 			if (r) {
1838 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1839 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1840 				return r;
1841 			}
1842 		}
1843 	}
1844 
1845 	adev->cg_flags &= amdgpu_cg_mask;
1846 	adev->pg_flags &= amdgpu_pg_mask;
1847 
1848 	return 0;
1849 }
1850 
1851 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1852 {
1853 	int i, r;
1854 
1855 	for (i = 0; i < adev->num_ip_blocks; i++) {
1856 		if (!adev->ip_blocks[i].status.sw)
1857 			continue;
1858 		if (adev->ip_blocks[i].status.hw)
1859 			continue;
1860 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1861 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
1862 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1863 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1864 			if (r) {
1865 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1866 					  adev->ip_blocks[i].version->funcs->name, r);
1867 				return r;
1868 			}
1869 			adev->ip_blocks[i].status.hw = true;
1870 		}
1871 	}
1872 
1873 	return 0;
1874 }
1875 
1876 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1877 {
1878 	int i, r;
1879 
1880 	for (i = 0; i < adev->num_ip_blocks; i++) {
1881 		if (!adev->ip_blocks[i].status.sw)
1882 			continue;
1883 		if (adev->ip_blocks[i].status.hw)
1884 			continue;
1885 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1886 		if (r) {
1887 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1888 				  adev->ip_blocks[i].version->funcs->name, r);
1889 			return r;
1890 		}
1891 		adev->ip_blocks[i].status.hw = true;
1892 	}
1893 
1894 	return 0;
1895 }
1896 
1897 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1898 {
1899 	int r = 0;
1900 	int i;
1901 	uint32_t smu_version;
1902 
1903 	if (adev->asic_type >= CHIP_VEGA10) {
1904 		for (i = 0; i < adev->num_ip_blocks; i++) {
1905 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1906 				continue;
1907 
1908 			/* no need to do the fw loading again if already done*/
1909 			if (adev->ip_blocks[i].status.hw == true)
1910 				break;
1911 
1912 			if (adev->in_gpu_reset || adev->in_suspend) {
1913 				r = adev->ip_blocks[i].version->funcs->resume(adev);
1914 				if (r) {
1915 					DRM_ERROR("resume of IP block <%s> failed %d\n",
1916 							  adev->ip_blocks[i].version->funcs->name, r);
1917 					return r;
1918 				}
1919 			} else {
1920 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1921 				if (r) {
1922 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1923 							  adev->ip_blocks[i].version->funcs->name, r);
1924 					return r;
1925 				}
1926 			}
1927 
1928 			adev->ip_blocks[i].status.hw = true;
1929 			break;
1930 		}
1931 	}
1932 
1933 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1934 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
1935 
1936 	return r;
1937 }
1938 
1939 /**
1940  * amdgpu_device_ip_init - run init for hardware IPs
1941  *
1942  * @adev: amdgpu_device pointer
1943  *
1944  * Main initialization pass for hardware IPs.  The list of all the hardware
1945  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1946  * are run.  sw_init initializes the software state associated with each IP
1947  * and hw_init initializes the hardware associated with each IP.
1948  * Returns 0 on success, negative error code on failure.
1949  */
1950 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1951 {
1952 	int i, r;
1953 
1954 	r = amdgpu_ras_init(adev);
1955 	if (r)
1956 		return r;
1957 
1958 	if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
1959 		r = amdgpu_virt_request_full_gpu(adev, true);
1960 		if (r)
1961 			return -EAGAIN;
1962 	}
1963 
1964 	for (i = 0; i < adev->num_ip_blocks; i++) {
1965 		if (!adev->ip_blocks[i].status.valid)
1966 			continue;
1967 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
1968 		if (r) {
1969 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1970 				  adev->ip_blocks[i].version->funcs->name, r);
1971 			goto init_failed;
1972 		}
1973 		adev->ip_blocks[i].status.sw = true;
1974 
1975 		/* need to do gmc hw init early so we can allocate gpu mem */
1976 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
1977 			r = amdgpu_device_vram_scratch_init(adev);
1978 			if (r) {
1979 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
1980 				goto init_failed;
1981 			}
1982 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
1983 			if (r) {
1984 				DRM_ERROR("hw_init %d failed %d\n", i, r);
1985 				goto init_failed;
1986 			}
1987 			r = amdgpu_device_wb_init(adev);
1988 			if (r) {
1989 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
1990 				goto init_failed;
1991 			}
1992 			adev->ip_blocks[i].status.hw = true;
1993 
1994 			/* right after GMC hw init, we create CSA */
1995 			if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1996 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1997 								AMDGPU_GEM_DOMAIN_VRAM,
1998 								AMDGPU_CSA_SIZE);
1999 				if (r) {
2000 					DRM_ERROR("allocate CSA failed %d\n", r);
2001 					goto init_failed;
2002 				}
2003 			}
2004 		}
2005 	}
2006 
2007 	if (amdgpu_sriov_vf(adev))
2008 		amdgpu_virt_init_data_exchange(adev);
2009 
2010 	r = amdgpu_ib_pool_init(adev);
2011 	if (r) {
2012 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2013 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2014 		goto init_failed;
2015 	}
2016 
2017 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2018 	if (r)
2019 		goto init_failed;
2020 
2021 	r = amdgpu_device_ip_hw_init_phase1(adev);
2022 	if (r)
2023 		goto init_failed;
2024 
2025 	r = amdgpu_device_fw_loading(adev);
2026 	if (r)
2027 		goto init_failed;
2028 
2029 	r = amdgpu_device_ip_hw_init_phase2(adev);
2030 	if (r)
2031 		goto init_failed;
2032 
2033 	/*
2034 	 * retired pages will be loaded from eeprom and reserved here,
2035 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2036 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2037 	 * for I2C communication which only true at this point.
2038 	 * recovery_init may fail, but it can free all resources allocated by
2039 	 * itself and its failure should not stop amdgpu init process.
2040 	 *
2041 	 * Note: theoretically, this should be called before all vram allocations
2042 	 * to protect retired page from abusing
2043 	 */
2044 	amdgpu_ras_recovery_init(adev);
2045 
2046 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2047 		amdgpu_xgmi_add_device(adev);
2048 	amdgpu_amdkfd_device_init(adev);
2049 
2050 	amdgpu_fru_get_product_info(adev);
2051 
2052 init_failed:
2053 	if (amdgpu_sriov_vf(adev))
2054 		amdgpu_virt_release_full_gpu(adev, true);
2055 
2056 	return r;
2057 }
2058 
2059 /**
2060  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2061  *
2062  * @adev: amdgpu_device pointer
2063  *
2064  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2065  * this function before a GPU reset.  If the value is retained after a
2066  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2067  */
2068 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2069 {
2070 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2071 }
2072 
2073 /**
2074  * amdgpu_device_check_vram_lost - check if vram is valid
2075  *
2076  * @adev: amdgpu_device pointer
2077  *
2078  * Checks the reset magic value written to the gart pointer in VRAM.
2079  * The driver calls this after a GPU reset to see if the contents of
2080  * VRAM is lost or now.
2081  * returns true if vram is lost, false if not.
2082  */
2083 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2084 {
2085 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2086 			AMDGPU_RESET_MAGIC_NUM))
2087 		return true;
2088 
2089 	if (!adev->in_gpu_reset)
2090 		return false;
2091 
2092 	/*
2093 	 * For all ASICs with baco/mode1 reset, the VRAM is
2094 	 * always assumed to be lost.
2095 	 */
2096 	switch (amdgpu_asic_reset_method(adev)) {
2097 	case AMD_RESET_METHOD_BACO:
2098 	case AMD_RESET_METHOD_MODE1:
2099 		return true;
2100 	default:
2101 		return false;
2102 	}
2103 }
2104 
2105 /**
2106  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2107  *
2108  * @adev: amdgpu_device pointer
2109  * @state: clockgating state (gate or ungate)
2110  *
2111  * The list of all the hardware IPs that make up the asic is walked and the
2112  * set_clockgating_state callbacks are run.
2113  * Late initialization pass enabling clockgating for hardware IPs.
2114  * Fini or suspend, pass disabling clockgating for hardware IPs.
2115  * Returns 0 on success, negative error code on failure.
2116  */
2117 
2118 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2119 						enum amd_clockgating_state state)
2120 {
2121 	int i, j, r;
2122 
2123 	if (amdgpu_emu_mode == 1)
2124 		return 0;
2125 
2126 	for (j = 0; j < adev->num_ip_blocks; j++) {
2127 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2128 		if (!adev->ip_blocks[i].status.late_initialized)
2129 			continue;
2130 		/* skip CG for VCE/UVD, it's handled specially */
2131 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2132 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2133 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2134 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2135 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2136 			/* enable clockgating to save power */
2137 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2138 										     state);
2139 			if (r) {
2140 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2141 					  adev->ip_blocks[i].version->funcs->name, r);
2142 				return r;
2143 			}
2144 		}
2145 	}
2146 
2147 	return 0;
2148 }
2149 
2150 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
2151 {
2152 	int i, j, r;
2153 
2154 	if (amdgpu_emu_mode == 1)
2155 		return 0;
2156 
2157 	for (j = 0; j < adev->num_ip_blocks; j++) {
2158 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2159 		if (!adev->ip_blocks[i].status.late_initialized)
2160 			continue;
2161 		/* skip CG for VCE/UVD, it's handled specially */
2162 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2163 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2164 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2165 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2166 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2167 			/* enable powergating to save power */
2168 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2169 											state);
2170 			if (r) {
2171 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2172 					  adev->ip_blocks[i].version->funcs->name, r);
2173 				return r;
2174 			}
2175 		}
2176 	}
2177 	return 0;
2178 }
2179 
2180 static int amdgpu_device_enable_mgpu_fan_boost(void)
2181 {
2182 	struct amdgpu_gpu_instance *gpu_ins;
2183 	struct amdgpu_device *adev;
2184 	int i, ret = 0;
2185 
2186 	mutex_lock(&mgpu_info.mutex);
2187 
2188 	/*
2189 	 * MGPU fan boost feature should be enabled
2190 	 * only when there are two or more dGPUs in
2191 	 * the system
2192 	 */
2193 	if (mgpu_info.num_dgpu < 2)
2194 		goto out;
2195 
2196 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2197 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2198 		adev = gpu_ins->adev;
2199 		if (!(adev->flags & AMD_IS_APU) &&
2200 		    !gpu_ins->mgpu_fan_enabled &&
2201 		    adev->powerplay.pp_funcs &&
2202 		    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2203 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2204 			if (ret)
2205 				break;
2206 
2207 			gpu_ins->mgpu_fan_enabled = 1;
2208 		}
2209 	}
2210 
2211 out:
2212 	mutex_unlock(&mgpu_info.mutex);
2213 
2214 	return ret;
2215 }
2216 
2217 /**
2218  * amdgpu_device_ip_late_init - run late init for hardware IPs
2219  *
2220  * @adev: amdgpu_device pointer
2221  *
2222  * Late initialization pass for hardware IPs.  The list of all the hardware
2223  * IPs that make up the asic is walked and the late_init callbacks are run.
2224  * late_init covers any special initialization that an IP requires
2225  * after all of the have been initialized or something that needs to happen
2226  * late in the init process.
2227  * Returns 0 on success, negative error code on failure.
2228  */
2229 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2230 {
2231 	struct amdgpu_gpu_instance *gpu_instance;
2232 	int i = 0, r;
2233 
2234 	for (i = 0; i < adev->num_ip_blocks; i++) {
2235 		if (!adev->ip_blocks[i].status.hw)
2236 			continue;
2237 		if (adev->ip_blocks[i].version->funcs->late_init) {
2238 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2239 			if (r) {
2240 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
2241 					  adev->ip_blocks[i].version->funcs->name, r);
2242 				return r;
2243 			}
2244 		}
2245 		adev->ip_blocks[i].status.late_initialized = true;
2246 	}
2247 
2248 	amdgpu_ras_set_error_query_ready(adev, true);
2249 
2250 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2251 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2252 
2253 	amdgpu_device_fill_reset_magic(adev);
2254 
2255 	r = amdgpu_device_enable_mgpu_fan_boost();
2256 	if (r)
2257 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2258 
2259 
2260 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2261 		mutex_lock(&mgpu_info.mutex);
2262 
2263 		/*
2264 		 * Reset device p-state to low as this was booted with high.
2265 		 *
2266 		 * This should be performed only after all devices from the same
2267 		 * hive get initialized.
2268 		 *
2269 		 * However, it's unknown how many device in the hive in advance.
2270 		 * As this is counted one by one during devices initializations.
2271 		 *
2272 		 * So, we wait for all XGMI interlinked devices initialized.
2273 		 * This may bring some delays as those devices may come from
2274 		 * different hives. But that should be OK.
2275 		 */
2276 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2277 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2278 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2279 				if (gpu_instance->adev->flags & AMD_IS_APU)
2280 					continue;
2281 
2282 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2283 						AMDGPU_XGMI_PSTATE_MIN);
2284 				if (r) {
2285 					DRM_ERROR("pstate setting failed (%d).\n", r);
2286 					break;
2287 				}
2288 			}
2289 		}
2290 
2291 		mutex_unlock(&mgpu_info.mutex);
2292 	}
2293 
2294 	return 0;
2295 }
2296 
2297 /**
2298  * amdgpu_device_ip_fini - run fini for hardware IPs
2299  *
2300  * @adev: amdgpu_device pointer
2301  *
2302  * Main teardown pass for hardware IPs.  The list of all the hardware
2303  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2304  * are run.  hw_fini tears down the hardware associated with each IP
2305  * and sw_fini tears down any software state associated with each IP.
2306  * Returns 0 on success, negative error code on failure.
2307  */
2308 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2309 {
2310 	int i, r;
2311 
2312 	amdgpu_ras_pre_fini(adev);
2313 
2314 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2315 		amdgpu_xgmi_remove_device(adev);
2316 
2317 	amdgpu_amdkfd_device_fini(adev);
2318 
2319 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2320 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2321 
2322 	/* need to disable SMC first */
2323 	for (i = 0; i < adev->num_ip_blocks; i++) {
2324 		if (!adev->ip_blocks[i].status.hw)
2325 			continue;
2326 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2327 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2328 			/* XXX handle errors */
2329 			if (r) {
2330 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2331 					  adev->ip_blocks[i].version->funcs->name, r);
2332 			}
2333 			adev->ip_blocks[i].status.hw = false;
2334 			break;
2335 		}
2336 	}
2337 
2338 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2339 		if (!adev->ip_blocks[i].status.hw)
2340 			continue;
2341 
2342 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2343 		/* XXX handle errors */
2344 		if (r) {
2345 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2346 				  adev->ip_blocks[i].version->funcs->name, r);
2347 		}
2348 
2349 		adev->ip_blocks[i].status.hw = false;
2350 	}
2351 
2352 
2353 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2354 		if (!adev->ip_blocks[i].status.sw)
2355 			continue;
2356 
2357 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2358 			amdgpu_ucode_free_bo(adev);
2359 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2360 			amdgpu_device_wb_fini(adev);
2361 			amdgpu_device_vram_scratch_fini(adev);
2362 			amdgpu_ib_pool_fini(adev);
2363 		}
2364 
2365 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2366 		/* XXX handle errors */
2367 		if (r) {
2368 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2369 				  adev->ip_blocks[i].version->funcs->name, r);
2370 		}
2371 		adev->ip_blocks[i].status.sw = false;
2372 		adev->ip_blocks[i].status.valid = false;
2373 	}
2374 
2375 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2376 		if (!adev->ip_blocks[i].status.late_initialized)
2377 			continue;
2378 		if (adev->ip_blocks[i].version->funcs->late_fini)
2379 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2380 		adev->ip_blocks[i].status.late_initialized = false;
2381 	}
2382 
2383 	amdgpu_ras_fini(adev);
2384 
2385 	if (amdgpu_sriov_vf(adev))
2386 		if (amdgpu_virt_release_full_gpu(adev, false))
2387 			DRM_ERROR("failed to release exclusive mode on fini\n");
2388 
2389 	return 0;
2390 }
2391 
2392 /**
2393  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2394  *
2395  * @work: work_struct.
2396  */
2397 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2398 {
2399 	struct amdgpu_device *adev =
2400 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2401 	int r;
2402 
2403 	r = amdgpu_ib_ring_tests(adev);
2404 	if (r)
2405 		DRM_ERROR("ib ring test failed (%d).\n", r);
2406 }
2407 
2408 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2409 {
2410 	struct amdgpu_device *adev =
2411 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2412 
2413 	mutex_lock(&adev->gfx.gfx_off_mutex);
2414 	if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2415 		if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2416 			adev->gfx.gfx_off_state = true;
2417 	}
2418 	mutex_unlock(&adev->gfx.gfx_off_mutex);
2419 }
2420 
2421 /**
2422  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2423  *
2424  * @adev: amdgpu_device pointer
2425  *
2426  * Main suspend function for hardware IPs.  The list of all the hardware
2427  * IPs that make up the asic is walked, clockgating is disabled and the
2428  * suspend callbacks are run.  suspend puts the hardware and software state
2429  * in each IP into a state suitable for suspend.
2430  * Returns 0 on success, negative error code on failure.
2431  */
2432 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2433 {
2434 	int i, r;
2435 
2436 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2437 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2438 
2439 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2440 		if (!adev->ip_blocks[i].status.valid)
2441 			continue;
2442 		/* displays are handled separately */
2443 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
2444 			/* XXX handle errors */
2445 			r = adev->ip_blocks[i].version->funcs->suspend(adev);
2446 			/* XXX handle errors */
2447 			if (r) {
2448 				DRM_ERROR("suspend of IP block <%s> failed %d\n",
2449 					  adev->ip_blocks[i].version->funcs->name, r);
2450 				return r;
2451 			}
2452 			adev->ip_blocks[i].status.hw = false;
2453 		}
2454 	}
2455 
2456 	return 0;
2457 }
2458 
2459 /**
2460  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2461  *
2462  * @adev: amdgpu_device pointer
2463  *
2464  * Main suspend function for hardware IPs.  The list of all the hardware
2465  * IPs that make up the asic is walked, clockgating is disabled and the
2466  * suspend callbacks are run.  suspend puts the hardware and software state
2467  * in each IP into a state suitable for suspend.
2468  * Returns 0 on success, negative error code on failure.
2469  */
2470 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2471 {
2472 	int i, r;
2473 
2474 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2475 		if (!adev->ip_blocks[i].status.valid)
2476 			continue;
2477 		/* displays are handled in phase1 */
2478 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2479 			continue;
2480 		/* PSP lost connection when err_event_athub occurs */
2481 		if (amdgpu_ras_intr_triggered() &&
2482 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2483 			adev->ip_blocks[i].status.hw = false;
2484 			continue;
2485 		}
2486 		/* XXX handle errors */
2487 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2488 		/* XXX handle errors */
2489 		if (r) {
2490 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2491 				  adev->ip_blocks[i].version->funcs->name, r);
2492 		}
2493 		adev->ip_blocks[i].status.hw = false;
2494 		/* handle putting the SMC in the appropriate state */
2495 		if(!amdgpu_sriov_vf(adev)){
2496 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2497 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2498 				if (r) {
2499 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2500 							adev->mp1_state, r);
2501 					return r;
2502 				}
2503 			}
2504 		}
2505 		adev->ip_blocks[i].status.hw = false;
2506 	}
2507 
2508 	return 0;
2509 }
2510 
2511 /**
2512  * amdgpu_device_ip_suspend - run suspend for hardware IPs
2513  *
2514  * @adev: amdgpu_device pointer
2515  *
2516  * Main suspend function for hardware IPs.  The list of all the hardware
2517  * IPs that make up the asic is walked, clockgating is disabled and the
2518  * suspend callbacks are run.  suspend puts the hardware and software state
2519  * in each IP into a state suitable for suspend.
2520  * Returns 0 on success, negative error code on failure.
2521  */
2522 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2523 {
2524 	int r;
2525 
2526 	if (amdgpu_sriov_vf(adev))
2527 		amdgpu_virt_request_full_gpu(adev, false);
2528 
2529 	r = amdgpu_device_ip_suspend_phase1(adev);
2530 	if (r)
2531 		return r;
2532 	r = amdgpu_device_ip_suspend_phase2(adev);
2533 
2534 	if (amdgpu_sriov_vf(adev))
2535 		amdgpu_virt_release_full_gpu(adev, false);
2536 
2537 	return r;
2538 }
2539 
2540 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2541 {
2542 	int i, r;
2543 
2544 	static enum amd_ip_block_type ip_order[] = {
2545 		AMD_IP_BLOCK_TYPE_GMC,
2546 		AMD_IP_BLOCK_TYPE_COMMON,
2547 		AMD_IP_BLOCK_TYPE_PSP,
2548 		AMD_IP_BLOCK_TYPE_IH,
2549 	};
2550 
2551 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2552 		int j;
2553 		struct amdgpu_ip_block *block;
2554 
2555 		for (j = 0; j < adev->num_ip_blocks; j++) {
2556 			block = &adev->ip_blocks[j];
2557 
2558 			block->status.hw = false;
2559 			if (block->version->type != ip_order[i] ||
2560 				!block->status.valid)
2561 				continue;
2562 
2563 			r = block->version->funcs->hw_init(adev);
2564 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2565 			if (r)
2566 				return r;
2567 			block->status.hw = true;
2568 		}
2569 	}
2570 
2571 	return 0;
2572 }
2573 
2574 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2575 {
2576 	int i, r;
2577 
2578 	static enum amd_ip_block_type ip_order[] = {
2579 		AMD_IP_BLOCK_TYPE_SMC,
2580 		AMD_IP_BLOCK_TYPE_DCE,
2581 		AMD_IP_BLOCK_TYPE_GFX,
2582 		AMD_IP_BLOCK_TYPE_SDMA,
2583 		AMD_IP_BLOCK_TYPE_UVD,
2584 		AMD_IP_BLOCK_TYPE_VCE,
2585 		AMD_IP_BLOCK_TYPE_VCN
2586 	};
2587 
2588 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2589 		int j;
2590 		struct amdgpu_ip_block *block;
2591 
2592 		for (j = 0; j < adev->num_ip_blocks; j++) {
2593 			block = &adev->ip_blocks[j];
2594 
2595 			if (block->version->type != ip_order[i] ||
2596 				!block->status.valid ||
2597 				block->status.hw)
2598 				continue;
2599 
2600 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2601 				r = block->version->funcs->resume(adev);
2602 			else
2603 				r = block->version->funcs->hw_init(adev);
2604 
2605 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2606 			if (r)
2607 				return r;
2608 			block->status.hw = true;
2609 		}
2610 	}
2611 
2612 	return 0;
2613 }
2614 
2615 /**
2616  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2617  *
2618  * @adev: amdgpu_device pointer
2619  *
2620  * First resume function for hardware IPs.  The list of all the hardware
2621  * IPs that make up the asic is walked and the resume callbacks are run for
2622  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
2623  * after a suspend and updates the software state as necessary.  This
2624  * function is also used for restoring the GPU after a GPU reset.
2625  * Returns 0 on success, negative error code on failure.
2626  */
2627 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
2628 {
2629 	int i, r;
2630 
2631 	for (i = 0; i < adev->num_ip_blocks; i++) {
2632 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2633 			continue;
2634 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2635 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2636 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2637 
2638 			r = adev->ip_blocks[i].version->funcs->resume(adev);
2639 			if (r) {
2640 				DRM_ERROR("resume of IP block <%s> failed %d\n",
2641 					  adev->ip_blocks[i].version->funcs->name, r);
2642 				return r;
2643 			}
2644 			adev->ip_blocks[i].status.hw = true;
2645 		}
2646 	}
2647 
2648 	return 0;
2649 }
2650 
2651 /**
2652  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2653  *
2654  * @adev: amdgpu_device pointer
2655  *
2656  * First resume function for hardware IPs.  The list of all the hardware
2657  * IPs that make up the asic is walked and the resume callbacks are run for
2658  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
2659  * functional state after a suspend and updates the software state as
2660  * necessary.  This function is also used for restoring the GPU after a GPU
2661  * reset.
2662  * Returns 0 on success, negative error code on failure.
2663  */
2664 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
2665 {
2666 	int i, r;
2667 
2668 	for (i = 0; i < adev->num_ip_blocks; i++) {
2669 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2670 			continue;
2671 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2672 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2673 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2674 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
2675 			continue;
2676 		r = adev->ip_blocks[i].version->funcs->resume(adev);
2677 		if (r) {
2678 			DRM_ERROR("resume of IP block <%s> failed %d\n",
2679 				  adev->ip_blocks[i].version->funcs->name, r);
2680 			return r;
2681 		}
2682 		adev->ip_blocks[i].status.hw = true;
2683 	}
2684 
2685 	return 0;
2686 }
2687 
2688 /**
2689  * amdgpu_device_ip_resume - run resume for hardware IPs
2690  *
2691  * @adev: amdgpu_device pointer
2692  *
2693  * Main resume function for hardware IPs.  The hardware IPs
2694  * are split into two resume functions because they are
2695  * are also used in in recovering from a GPU reset and some additional
2696  * steps need to be take between them.  In this case (S3/S4) they are
2697  * run sequentially.
2698  * Returns 0 on success, negative error code on failure.
2699  */
2700 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
2701 {
2702 	int r;
2703 
2704 	r = amdgpu_device_ip_resume_phase1(adev);
2705 	if (r)
2706 		return r;
2707 
2708 	r = amdgpu_device_fw_loading(adev);
2709 	if (r)
2710 		return r;
2711 
2712 	r = amdgpu_device_ip_resume_phase2(adev);
2713 
2714 	return r;
2715 }
2716 
2717 /**
2718  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2719  *
2720  * @adev: amdgpu_device pointer
2721  *
2722  * Query the VBIOS data tables to determine if the board supports SR-IOV.
2723  */
2724 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
2725 {
2726 	if (amdgpu_sriov_vf(adev)) {
2727 		if (adev->is_atom_fw) {
2728 			if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2729 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2730 		} else {
2731 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2732 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2733 		}
2734 
2735 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2736 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2737 	}
2738 }
2739 
2740 /**
2741  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2742  *
2743  * @asic_type: AMD asic type
2744  *
2745  * Check if there is DC (new modesetting infrastructre) support for an asic.
2746  * returns true if DC has support, false if not.
2747  */
2748 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2749 {
2750 	switch (asic_type) {
2751 #if defined(CONFIG_DRM_AMD_DC)
2752 	case CHIP_BONAIRE:
2753 	case CHIP_KAVERI:
2754 	case CHIP_KABINI:
2755 	case CHIP_MULLINS:
2756 		/*
2757 		 * We have systems in the wild with these ASICs that require
2758 		 * LVDS and VGA support which is not supported with DC.
2759 		 *
2760 		 * Fallback to the non-DC driver here by default so as not to
2761 		 * cause regressions.
2762 		 */
2763 		return amdgpu_dc > 0;
2764 	case CHIP_HAWAII:
2765 	case CHIP_CARRIZO:
2766 	case CHIP_STONEY:
2767 	case CHIP_POLARIS10:
2768 	case CHIP_POLARIS11:
2769 	case CHIP_POLARIS12:
2770 	case CHIP_VEGAM:
2771 	case CHIP_TONGA:
2772 	case CHIP_FIJI:
2773 	case CHIP_VEGA10:
2774 	case CHIP_VEGA12:
2775 	case CHIP_VEGA20:
2776 #if defined(CONFIG_DRM_AMD_DC_DCN)
2777 	case CHIP_RAVEN:
2778 	case CHIP_NAVI10:
2779 	case CHIP_NAVI14:
2780 	case CHIP_NAVI12:
2781 	case CHIP_RENOIR:
2782 #endif
2783 		return amdgpu_dc != 0;
2784 #endif
2785 	default:
2786 		if (amdgpu_dc > 0)
2787 			DRM_INFO("Display Core has been requested via kernel parameter "
2788 					 "but isn't supported by ASIC, ignoring\n");
2789 		return false;
2790 	}
2791 }
2792 
2793 /**
2794  * amdgpu_device_has_dc_support - check if dc is supported
2795  *
2796  * @adev: amdgpu_device_pointer
2797  *
2798  * Returns true for supported, false for not supported
2799  */
2800 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2801 {
2802 	if (amdgpu_sriov_vf(adev))
2803 		return false;
2804 
2805 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
2806 }
2807 
2808 
2809 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2810 {
2811 	struct amdgpu_device *adev =
2812 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
2813 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
2814 
2815 	/* It's a bug to not have a hive within this function */
2816 	if (WARN_ON(!hive))
2817 		return;
2818 
2819 	/*
2820 	 * Use task barrier to synchronize all xgmi reset works across the
2821 	 * hive. task_barrier_enter and task_barrier_exit will block
2822 	 * until all the threads running the xgmi reset works reach
2823 	 * those points. task_barrier_full will do both blocks.
2824 	 */
2825 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2826 
2827 		task_barrier_enter(&hive->tb);
2828 		adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2829 
2830 		if (adev->asic_reset_res)
2831 			goto fail;
2832 
2833 		task_barrier_exit(&hive->tb);
2834 		adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2835 
2836 		if (adev->asic_reset_res)
2837 			goto fail;
2838 
2839 		if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2840 			adev->mmhub.funcs->reset_ras_error_count(adev);
2841 	} else {
2842 
2843 		task_barrier_full(&hive->tb);
2844 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
2845 	}
2846 
2847 fail:
2848 	if (adev->asic_reset_res)
2849 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
2850 			 adev->asic_reset_res, adev->ddev->unique);
2851 }
2852 
2853 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2854 {
2855 	char *input = amdgpu_lockup_timeout;
2856 	char *timeout_setting = NULL;
2857 	int index = 0;
2858 	long timeout;
2859 	int ret = 0;
2860 
2861 	/*
2862 	 * By default timeout for non compute jobs is 10000.
2863 	 * And there is no timeout enforced on compute jobs.
2864 	 * In SR-IOV or passthrough mode, timeout for compute
2865 	 * jobs are 60000 by default.
2866 	 */
2867 	adev->gfx_timeout = msecs_to_jiffies(10000);
2868 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2869 	if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2870 		adev->compute_timeout =  msecs_to_jiffies(60000);
2871 	else
2872 		adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2873 
2874 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
2875 		while ((timeout_setting = strsep(&input, ",")) &&
2876 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
2877 			ret = kstrtol(timeout_setting, 0, &timeout);
2878 			if (ret)
2879 				return ret;
2880 
2881 			if (timeout == 0) {
2882 				index++;
2883 				continue;
2884 			} else if (timeout < 0) {
2885 				timeout = MAX_SCHEDULE_TIMEOUT;
2886 			} else {
2887 				timeout = msecs_to_jiffies(timeout);
2888 			}
2889 
2890 			switch (index++) {
2891 			case 0:
2892 				adev->gfx_timeout = timeout;
2893 				break;
2894 			case 1:
2895 				adev->compute_timeout = timeout;
2896 				break;
2897 			case 2:
2898 				adev->sdma_timeout = timeout;
2899 				break;
2900 			case 3:
2901 				adev->video_timeout = timeout;
2902 				break;
2903 			default:
2904 				break;
2905 			}
2906 		}
2907 		/*
2908 		 * There is only one value specified and
2909 		 * it should apply to all non-compute jobs.
2910 		 */
2911 		if (index == 1) {
2912 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2913 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2914 				adev->compute_timeout = adev->gfx_timeout;
2915 		}
2916 	}
2917 
2918 	return ret;
2919 }
2920 
2921 static const struct attribute *amdgpu_dev_attributes[] = {
2922 	&dev_attr_product_name.attr,
2923 	&dev_attr_product_number.attr,
2924 	&dev_attr_serial_number.attr,
2925 	&dev_attr_pcie_replay_count.attr,
2926 	NULL
2927 };
2928 
2929 /**
2930  * amdgpu_device_init - initialize the driver
2931  *
2932  * @adev: amdgpu_device pointer
2933  * @ddev: drm dev pointer
2934  * @pdev: pci dev pointer
2935  * @flags: driver flags
2936  *
2937  * Initializes the driver info and hw (all asics).
2938  * Returns 0 for success or an error on failure.
2939  * Called at driver startup.
2940  */
2941 int amdgpu_device_init(struct amdgpu_device *adev,
2942 		       struct drm_device *ddev,
2943 		       struct pci_dev *pdev,
2944 		       uint32_t flags)
2945 {
2946 	int r, i;
2947 	bool boco = false;
2948 	u32 max_MBps;
2949 
2950 	adev->shutdown = false;
2951 	adev->dev = &pdev->dev;
2952 	adev->ddev = ddev;
2953 	adev->pdev = pdev;
2954 	adev->flags = flags;
2955 
2956 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2957 		adev->asic_type = amdgpu_force_asic_type;
2958 	else
2959 		adev->asic_type = flags & AMD_ASIC_MASK;
2960 
2961 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
2962 	if (amdgpu_emu_mode == 1)
2963 		adev->usec_timeout *= 10;
2964 	adev->gmc.gart_size = 512 * 1024 * 1024;
2965 	adev->accel_working = false;
2966 	adev->num_rings = 0;
2967 	adev->mman.buffer_funcs = NULL;
2968 	adev->mman.buffer_funcs_ring = NULL;
2969 	adev->vm_manager.vm_pte_funcs = NULL;
2970 	adev->vm_manager.vm_pte_num_scheds = 0;
2971 	adev->gmc.gmc_funcs = NULL;
2972 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2973 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2974 
2975 	adev->smc_rreg = &amdgpu_invalid_rreg;
2976 	adev->smc_wreg = &amdgpu_invalid_wreg;
2977 	adev->pcie_rreg = &amdgpu_invalid_rreg;
2978 	adev->pcie_wreg = &amdgpu_invalid_wreg;
2979 	adev->pciep_rreg = &amdgpu_invalid_rreg;
2980 	adev->pciep_wreg = &amdgpu_invalid_wreg;
2981 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2982 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
2983 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2984 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2985 	adev->didt_rreg = &amdgpu_invalid_rreg;
2986 	adev->didt_wreg = &amdgpu_invalid_wreg;
2987 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2988 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
2989 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2990 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2991 
2992 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2993 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2994 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
2995 
2996 	/* mutex initialization are all done here so we
2997 	 * can recall function without having locking issues */
2998 	atomic_set(&adev->irq.ih.lock, 0);
2999 	mutex_init(&adev->firmware.mutex);
3000 	mutex_init(&adev->pm.mutex);
3001 	mutex_init(&adev->gfx.gpu_clock_mutex);
3002 	mutex_init(&adev->srbm_mutex);
3003 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3004 	mutex_init(&adev->gfx.gfx_off_mutex);
3005 	mutex_init(&adev->grbm_idx_mutex);
3006 	mutex_init(&adev->mn_lock);
3007 	mutex_init(&adev->virt.vf_errors.lock);
3008 	hash_init(adev->mn_hash);
3009 	mutex_init(&adev->lock_reset);
3010 	mutex_init(&adev->psp.mutex);
3011 	mutex_init(&adev->notifier_lock);
3012 
3013 	r = amdgpu_device_check_arguments(adev);
3014 	if (r)
3015 		return r;
3016 
3017 	spin_lock_init(&adev->mmio_idx_lock);
3018 	spin_lock_init(&adev->smc_idx_lock);
3019 	spin_lock_init(&adev->pcie_idx_lock);
3020 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3021 	spin_lock_init(&adev->didt_idx_lock);
3022 	spin_lock_init(&adev->gc_cac_idx_lock);
3023 	spin_lock_init(&adev->se_cac_idx_lock);
3024 	spin_lock_init(&adev->audio_endpt_idx_lock);
3025 	spin_lock_init(&adev->mm_stats.lock);
3026 
3027 	INIT_LIST_HEAD(&adev->shadow_list);
3028 	mutex_init(&adev->shadow_list_lock);
3029 
3030 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3031 			  amdgpu_device_delayed_init_work_handler);
3032 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3033 			  amdgpu_device_delay_enable_gfx_off);
3034 
3035 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3036 
3037 	adev->gfx.gfx_off_req_count = 1;
3038 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3039 
3040 	/* Registers mapping */
3041 	/* TODO: block userspace mapping of io register */
3042 	if (adev->asic_type >= CHIP_BONAIRE) {
3043 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3044 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3045 	} else {
3046 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3047 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3048 	}
3049 
3050 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3051 	if (adev->rmmio == NULL) {
3052 		return -ENOMEM;
3053 	}
3054 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3055 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3056 
3057 	/* io port mapping */
3058 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3059 		if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3060 			adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3061 			adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3062 			break;
3063 		}
3064 	}
3065 	if (adev->rio_mem == NULL)
3066 		DRM_INFO("PCI I/O BAR is not found.\n");
3067 
3068 	/* enable PCIE atomic ops */
3069 	r = pci_enable_atomic_ops_to_root(adev->pdev,
3070 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3071 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3072 	if (r) {
3073 		adev->have_atomics_support = false;
3074 		DRM_INFO("PCIE atomic ops is not supported\n");
3075 	} else {
3076 		adev->have_atomics_support = true;
3077 	}
3078 
3079 	amdgpu_device_get_pcie_info(adev);
3080 
3081 	if (amdgpu_mcbp)
3082 		DRM_INFO("MCBP is enabled\n");
3083 
3084 	if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3085 		adev->enable_mes = true;
3086 
3087 	/* detect hw virtualization here */
3088 	amdgpu_detect_virtualization(adev);
3089 
3090 	r = amdgpu_device_get_job_timeout_settings(adev);
3091 	if (r) {
3092 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3093 		return r;
3094 	}
3095 
3096 	/* early init functions */
3097 	r = amdgpu_device_ip_early_init(adev);
3098 	if (r)
3099 		return r;
3100 
3101 	/* doorbell bar mapping and doorbell index init*/
3102 	amdgpu_device_doorbell_init(adev);
3103 
3104 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3105 	/* this will fail for cards that aren't VGA class devices, just
3106 	 * ignore it */
3107 	vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
3108 
3109 	if (amdgpu_device_supports_boco(ddev))
3110 		boco = true;
3111 	if (amdgpu_has_atpx() &&
3112 	    (amdgpu_is_atpx_hybrid() ||
3113 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
3114 	    !pci_is_thunderbolt_attached(adev->pdev))
3115 		vga_switcheroo_register_client(adev->pdev,
3116 					       &amdgpu_switcheroo_ops, boco);
3117 	if (boco)
3118 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3119 
3120 	if (amdgpu_emu_mode == 1) {
3121 		/* post the asic on emulation mode */
3122 		emu_soc_asic_init(adev);
3123 		goto fence_driver_init;
3124 	}
3125 
3126 	/* detect if we are with an SRIOV vbios */
3127 	amdgpu_device_detect_sriov_bios(adev);
3128 
3129 	/* check if we need to reset the asic
3130 	 *  E.g., driver was not cleanly unloaded previously, etc.
3131 	 */
3132 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3133 		r = amdgpu_asic_reset(adev);
3134 		if (r) {
3135 			dev_err(adev->dev, "asic reset on init failed\n");
3136 			goto failed;
3137 		}
3138 	}
3139 
3140 	/* Post card if necessary */
3141 	if (amdgpu_device_need_post(adev)) {
3142 		if (!adev->bios) {
3143 			dev_err(adev->dev, "no vBIOS found\n");
3144 			r = -EINVAL;
3145 			goto failed;
3146 		}
3147 		DRM_INFO("GPU posting now...\n");
3148 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3149 		if (r) {
3150 			dev_err(adev->dev, "gpu post error!\n");
3151 			goto failed;
3152 		}
3153 	}
3154 
3155 	if (adev->is_atom_fw) {
3156 		/* Initialize clocks */
3157 		r = amdgpu_atomfirmware_get_clock_info(adev);
3158 		if (r) {
3159 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3160 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3161 			goto failed;
3162 		}
3163 	} else {
3164 		/* Initialize clocks */
3165 		r = amdgpu_atombios_get_clock_info(adev);
3166 		if (r) {
3167 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3168 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3169 			goto failed;
3170 		}
3171 		/* init i2c buses */
3172 		if (!amdgpu_device_has_dc_support(adev))
3173 			amdgpu_atombios_i2c_init(adev);
3174 	}
3175 
3176 fence_driver_init:
3177 	/* Fence driver */
3178 	r = amdgpu_fence_driver_init(adev);
3179 	if (r) {
3180 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
3181 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3182 		goto failed;
3183 	}
3184 
3185 	/* init the mode config */
3186 	drm_mode_config_init(adev->ddev);
3187 
3188 	r = amdgpu_device_ip_init(adev);
3189 	if (r) {
3190 		/* failed in exclusive mode due to timeout */
3191 		if (amdgpu_sriov_vf(adev) &&
3192 		    !amdgpu_sriov_runtime(adev) &&
3193 		    amdgpu_virt_mmio_blocked(adev) &&
3194 		    !amdgpu_virt_wait_reset(adev)) {
3195 			dev_err(adev->dev, "VF exclusive mode timeout\n");
3196 			/* Don't send request since VF is inactive. */
3197 			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3198 			adev->virt.ops = NULL;
3199 			r = -EAGAIN;
3200 			goto failed;
3201 		}
3202 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3203 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3204 		goto failed;
3205 	}
3206 
3207 	dev_info(adev->dev,
3208 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3209 			adev->gfx.config.max_shader_engines,
3210 			adev->gfx.config.max_sh_per_se,
3211 			adev->gfx.config.max_cu_per_sh,
3212 			adev->gfx.cu_info.number);
3213 
3214 	adev->accel_working = true;
3215 
3216 	amdgpu_vm_check_compute_bug(adev);
3217 
3218 	/* Initialize the buffer migration limit. */
3219 	if (amdgpu_moverate >= 0)
3220 		max_MBps = amdgpu_moverate;
3221 	else
3222 		max_MBps = 8; /* Allow 8 MB/s. */
3223 	/* Get a log2 for easy divisions. */
3224 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3225 
3226 	amdgpu_fbdev_init(adev);
3227 
3228 	r = amdgpu_pm_sysfs_init(adev);
3229 	if (r) {
3230 		adev->pm_sysfs_en = false;
3231 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3232 	} else
3233 		adev->pm_sysfs_en = true;
3234 
3235 	r = amdgpu_ucode_sysfs_init(adev);
3236 	if (r) {
3237 		adev->ucode_sysfs_en = false;
3238 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3239 	} else
3240 		adev->ucode_sysfs_en = true;
3241 
3242 	if ((amdgpu_testing & 1)) {
3243 		if (adev->accel_working)
3244 			amdgpu_test_moves(adev);
3245 		else
3246 			DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3247 	}
3248 	if (amdgpu_benchmarking) {
3249 		if (adev->accel_working)
3250 			amdgpu_benchmark(adev, amdgpu_benchmarking);
3251 		else
3252 			DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3253 	}
3254 
3255 	/*
3256 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3257 	 * Otherwise the mgpu fan boost feature will be skipped due to the
3258 	 * gpu instance is counted less.
3259 	 */
3260 	amdgpu_register_gpu_instance(adev);
3261 
3262 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
3263 	 * explicit gating rather than handling it automatically.
3264 	 */
3265 	r = amdgpu_device_ip_late_init(adev);
3266 	if (r) {
3267 		dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3268 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3269 		goto failed;
3270 	}
3271 
3272 	/* must succeed. */
3273 	amdgpu_ras_resume(adev);
3274 
3275 	queue_delayed_work(system_wq, &adev->delayed_init_work,
3276 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3277 
3278 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3279 	if (r) {
3280 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3281 		return r;
3282 	}
3283 
3284 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3285 		r = amdgpu_pmu_init(adev);
3286 	if (r)
3287 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3288 
3289 	return 0;
3290 
3291 failed:
3292 	amdgpu_vf_error_trans_all(adev);
3293 	if (boco)
3294 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
3295 
3296 	return r;
3297 }
3298 
3299 /**
3300  * amdgpu_device_fini - tear down the driver
3301  *
3302  * @adev: amdgpu_device pointer
3303  *
3304  * Tear down the driver info (all asics).
3305  * Called at driver shutdown.
3306  */
3307 void amdgpu_device_fini(struct amdgpu_device *adev)
3308 {
3309 	int r;
3310 
3311 	DRM_INFO("amdgpu: finishing device.\n");
3312 	flush_delayed_work(&adev->delayed_init_work);
3313 	adev->shutdown = true;
3314 
3315 	/* make sure IB test finished before entering exclusive mode
3316 	 * to avoid preemption on IB test
3317 	 * */
3318 	if (amdgpu_sriov_vf(adev))
3319 		amdgpu_virt_request_full_gpu(adev, false);
3320 
3321 	/* disable all interrupts */
3322 	amdgpu_irq_disable_all(adev);
3323 	if (adev->mode_info.mode_config_initialized){
3324 		if (!amdgpu_device_has_dc_support(adev))
3325 			drm_helper_force_disable_all(adev->ddev);
3326 		else
3327 			drm_atomic_helper_shutdown(adev->ddev);
3328 	}
3329 	amdgpu_fence_driver_fini(adev);
3330 	if (adev->pm_sysfs_en)
3331 		amdgpu_pm_sysfs_fini(adev);
3332 	amdgpu_fbdev_fini(adev);
3333 	r = amdgpu_device_ip_fini(adev);
3334 	if (adev->firmware.gpu_info_fw) {
3335 		release_firmware(adev->firmware.gpu_info_fw);
3336 		adev->firmware.gpu_info_fw = NULL;
3337 	}
3338 	adev->accel_working = false;
3339 	/* free i2c buses */
3340 	if (!amdgpu_device_has_dc_support(adev))
3341 		amdgpu_i2c_fini(adev);
3342 
3343 	if (amdgpu_emu_mode != 1)
3344 		amdgpu_atombios_fini(adev);
3345 
3346 	kfree(adev->bios);
3347 	adev->bios = NULL;
3348 	if (amdgpu_has_atpx() &&
3349 	    (amdgpu_is_atpx_hybrid() ||
3350 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
3351 	    !pci_is_thunderbolt_attached(adev->pdev))
3352 		vga_switcheroo_unregister_client(adev->pdev);
3353 	if (amdgpu_device_supports_boco(adev->ddev))
3354 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
3355 	vga_client_register(adev->pdev, NULL, NULL, NULL);
3356 	if (adev->rio_mem)
3357 		pci_iounmap(adev->pdev, adev->rio_mem);
3358 	adev->rio_mem = NULL;
3359 	iounmap(adev->rmmio);
3360 	adev->rmmio = NULL;
3361 	amdgpu_device_doorbell_fini(adev);
3362 
3363 	if (adev->ucode_sysfs_en)
3364 		amdgpu_ucode_sysfs_fini(adev);
3365 
3366 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
3367 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3368 		amdgpu_pmu_fini(adev);
3369 	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
3370 		amdgpu_discovery_fini(adev);
3371 }
3372 
3373 
3374 /*
3375  * Suspend & resume.
3376  */
3377 /**
3378  * amdgpu_device_suspend - initiate device suspend
3379  *
3380  * @dev: drm dev pointer
3381  * @suspend: suspend state
3382  * @fbcon : notify the fbdev of suspend
3383  *
3384  * Puts the hw in the suspend state (all asics).
3385  * Returns 0 for success or an error on failure.
3386  * Called at driver suspend.
3387  */
3388 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
3389 {
3390 	struct amdgpu_device *adev;
3391 	struct drm_crtc *crtc;
3392 	struct drm_connector *connector;
3393 	struct drm_connector_list_iter iter;
3394 	int r;
3395 
3396 	if (dev == NULL || dev->dev_private == NULL) {
3397 		return -ENODEV;
3398 	}
3399 
3400 	adev = dev->dev_private;
3401 
3402 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3403 		return 0;
3404 
3405 	adev->in_suspend = true;
3406 	drm_kms_helper_poll_disable(dev);
3407 
3408 	if (fbcon)
3409 		amdgpu_fbdev_set_suspend(adev, 1);
3410 
3411 	cancel_delayed_work_sync(&adev->delayed_init_work);
3412 
3413 	if (!amdgpu_device_has_dc_support(adev)) {
3414 		/* turn off display hw */
3415 		drm_modeset_lock_all(dev);
3416 		drm_connector_list_iter_begin(dev, &iter);
3417 		drm_for_each_connector_iter(connector, &iter)
3418 			drm_helper_connector_dpms(connector,
3419 						  DRM_MODE_DPMS_OFF);
3420 		drm_connector_list_iter_end(&iter);
3421 		drm_modeset_unlock_all(dev);
3422 			/* unpin the front buffers and cursors */
3423 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3424 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3425 			struct drm_framebuffer *fb = crtc->primary->fb;
3426 			struct amdgpu_bo *robj;
3427 
3428 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3429 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3430 				r = amdgpu_bo_reserve(aobj, true);
3431 				if (r == 0) {
3432 					amdgpu_bo_unpin(aobj);
3433 					amdgpu_bo_unreserve(aobj);
3434 				}
3435 			}
3436 
3437 			if (fb == NULL || fb->obj[0] == NULL) {
3438 				continue;
3439 			}
3440 			robj = gem_to_amdgpu_bo(fb->obj[0]);
3441 			/* don't unpin kernel fb objects */
3442 			if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3443 				r = amdgpu_bo_reserve(robj, true);
3444 				if (r == 0) {
3445 					amdgpu_bo_unpin(robj);
3446 					amdgpu_bo_unreserve(robj);
3447 				}
3448 			}
3449 		}
3450 	}
3451 
3452 	amdgpu_ras_suspend(adev);
3453 
3454 	r = amdgpu_device_ip_suspend_phase1(adev);
3455 
3456 	amdgpu_amdkfd_suspend(adev, !fbcon);
3457 
3458 	/* evict vram memory */
3459 	amdgpu_bo_evict_vram(adev);
3460 
3461 	amdgpu_fence_driver_suspend(adev);
3462 
3463 	r = amdgpu_device_ip_suspend_phase2(adev);
3464 
3465 	/* evict remaining vram memory
3466 	 * This second call to evict vram is to evict the gart page table
3467 	 * using the CPU.
3468 	 */
3469 	amdgpu_bo_evict_vram(adev);
3470 
3471 	return 0;
3472 }
3473 
3474 /**
3475  * amdgpu_device_resume - initiate device resume
3476  *
3477  * @dev: drm dev pointer
3478  * @resume: resume state
3479  * @fbcon : notify the fbdev of resume
3480  *
3481  * Bring the hw back to operating state (all asics).
3482  * Returns 0 for success or an error on failure.
3483  * Called at driver resume.
3484  */
3485 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
3486 {
3487 	struct drm_connector *connector;
3488 	struct drm_connector_list_iter iter;
3489 	struct amdgpu_device *adev = dev->dev_private;
3490 	struct drm_crtc *crtc;
3491 	int r = 0;
3492 
3493 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3494 		return 0;
3495 
3496 	/* post card */
3497 	if (amdgpu_device_need_post(adev)) {
3498 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3499 		if (r)
3500 			DRM_ERROR("amdgpu asic init failed\n");
3501 	}
3502 
3503 	r = amdgpu_device_ip_resume(adev);
3504 	if (r) {
3505 		DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
3506 		return r;
3507 	}
3508 	amdgpu_fence_driver_resume(adev);
3509 
3510 
3511 	r = amdgpu_device_ip_late_init(adev);
3512 	if (r)
3513 		return r;
3514 
3515 	queue_delayed_work(system_wq, &adev->delayed_init_work,
3516 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3517 
3518 	if (!amdgpu_device_has_dc_support(adev)) {
3519 		/* pin cursors */
3520 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3521 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3522 
3523 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3524 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3525 				r = amdgpu_bo_reserve(aobj, true);
3526 				if (r == 0) {
3527 					r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3528 					if (r != 0)
3529 						DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3530 					amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3531 					amdgpu_bo_unreserve(aobj);
3532 				}
3533 			}
3534 		}
3535 	}
3536 	r = amdgpu_amdkfd_resume(adev, !fbcon);
3537 	if (r)
3538 		return r;
3539 
3540 	/* Make sure IB tests flushed */
3541 	flush_delayed_work(&adev->delayed_init_work);
3542 
3543 	/* blat the mode back in */
3544 	if (fbcon) {
3545 		if (!amdgpu_device_has_dc_support(adev)) {
3546 			/* pre DCE11 */
3547 			drm_helper_resume_force_mode(dev);
3548 
3549 			/* turn on display hw */
3550 			drm_modeset_lock_all(dev);
3551 
3552 			drm_connector_list_iter_begin(dev, &iter);
3553 			drm_for_each_connector_iter(connector, &iter)
3554 				drm_helper_connector_dpms(connector,
3555 							  DRM_MODE_DPMS_ON);
3556 			drm_connector_list_iter_end(&iter);
3557 
3558 			drm_modeset_unlock_all(dev);
3559 		}
3560 		amdgpu_fbdev_set_suspend(adev, 0);
3561 	}
3562 
3563 	drm_kms_helper_poll_enable(dev);
3564 
3565 	amdgpu_ras_resume(adev);
3566 
3567 	/*
3568 	 * Most of the connector probing functions try to acquire runtime pm
3569 	 * refs to ensure that the GPU is powered on when connector polling is
3570 	 * performed. Since we're calling this from a runtime PM callback,
3571 	 * trying to acquire rpm refs will cause us to deadlock.
3572 	 *
3573 	 * Since we're guaranteed to be holding the rpm lock, it's safe to
3574 	 * temporarily disable the rpm helpers so this doesn't deadlock us.
3575 	 */
3576 #ifdef CONFIG_PM
3577 	dev->dev->power.disable_depth++;
3578 #endif
3579 	if (!amdgpu_device_has_dc_support(adev))
3580 		drm_helper_hpd_irq_event(dev);
3581 	else
3582 		drm_kms_helper_hotplug_event(dev);
3583 #ifdef CONFIG_PM
3584 	dev->dev->power.disable_depth--;
3585 #endif
3586 	adev->in_suspend = false;
3587 
3588 	return 0;
3589 }
3590 
3591 /**
3592  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3593  *
3594  * @adev: amdgpu_device pointer
3595  *
3596  * The list of all the hardware IPs that make up the asic is walked and
3597  * the check_soft_reset callbacks are run.  check_soft_reset determines
3598  * if the asic is still hung or not.
3599  * Returns true if any of the IPs are still in a hung state, false if not.
3600  */
3601 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
3602 {
3603 	int i;
3604 	bool asic_hang = false;
3605 
3606 	if (amdgpu_sriov_vf(adev))
3607 		return true;
3608 
3609 	if (amdgpu_asic_need_full_reset(adev))
3610 		return true;
3611 
3612 	for (i = 0; i < adev->num_ip_blocks; i++) {
3613 		if (!adev->ip_blocks[i].status.valid)
3614 			continue;
3615 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3616 			adev->ip_blocks[i].status.hang =
3617 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3618 		if (adev->ip_blocks[i].status.hang) {
3619 			DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
3620 			asic_hang = true;
3621 		}
3622 	}
3623 	return asic_hang;
3624 }
3625 
3626 /**
3627  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3628  *
3629  * @adev: amdgpu_device pointer
3630  *
3631  * The list of all the hardware IPs that make up the asic is walked and the
3632  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
3633  * handles any IP specific hardware or software state changes that are
3634  * necessary for a soft reset to succeed.
3635  * Returns 0 on success, negative error code on failure.
3636  */
3637 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
3638 {
3639 	int i, r = 0;
3640 
3641 	for (i = 0; i < adev->num_ip_blocks; i++) {
3642 		if (!adev->ip_blocks[i].status.valid)
3643 			continue;
3644 		if (adev->ip_blocks[i].status.hang &&
3645 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3646 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
3647 			if (r)
3648 				return r;
3649 		}
3650 	}
3651 
3652 	return 0;
3653 }
3654 
3655 /**
3656  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3657  *
3658  * @adev: amdgpu_device pointer
3659  *
3660  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
3661  * reset is necessary to recover.
3662  * Returns true if a full asic reset is required, false if not.
3663  */
3664 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
3665 {
3666 	int i;
3667 
3668 	if (amdgpu_asic_need_full_reset(adev))
3669 		return true;
3670 
3671 	for (i = 0; i < adev->num_ip_blocks; i++) {
3672 		if (!adev->ip_blocks[i].status.valid)
3673 			continue;
3674 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3675 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3676 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
3677 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3678 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3679 			if (adev->ip_blocks[i].status.hang) {
3680 				DRM_INFO("Some block need full reset!\n");
3681 				return true;
3682 			}
3683 		}
3684 	}
3685 	return false;
3686 }
3687 
3688 /**
3689  * amdgpu_device_ip_soft_reset - do a soft reset
3690  *
3691  * @adev: amdgpu_device pointer
3692  *
3693  * The list of all the hardware IPs that make up the asic is walked and the
3694  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
3695  * IP specific hardware or software state changes that are necessary to soft
3696  * reset the IP.
3697  * Returns 0 on success, negative error code on failure.
3698  */
3699 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
3700 {
3701 	int i, r = 0;
3702 
3703 	for (i = 0; i < adev->num_ip_blocks; i++) {
3704 		if (!adev->ip_blocks[i].status.valid)
3705 			continue;
3706 		if (adev->ip_blocks[i].status.hang &&
3707 		    adev->ip_blocks[i].version->funcs->soft_reset) {
3708 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
3709 			if (r)
3710 				return r;
3711 		}
3712 	}
3713 
3714 	return 0;
3715 }
3716 
3717 /**
3718  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3719  *
3720  * @adev: amdgpu_device pointer
3721  *
3722  * The list of all the hardware IPs that make up the asic is walked and the
3723  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
3724  * handles any IP specific hardware or software state changes that are
3725  * necessary after the IP has been soft reset.
3726  * Returns 0 on success, negative error code on failure.
3727  */
3728 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
3729 {
3730 	int i, r = 0;
3731 
3732 	for (i = 0; i < adev->num_ip_blocks; i++) {
3733 		if (!adev->ip_blocks[i].status.valid)
3734 			continue;
3735 		if (adev->ip_blocks[i].status.hang &&
3736 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
3737 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
3738 		if (r)
3739 			return r;
3740 	}
3741 
3742 	return 0;
3743 }
3744 
3745 /**
3746  * amdgpu_device_recover_vram - Recover some VRAM contents
3747  *
3748  * @adev: amdgpu_device pointer
3749  *
3750  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
3751  * restore things like GPUVM page tables after a GPU reset where
3752  * the contents of VRAM might be lost.
3753  *
3754  * Returns:
3755  * 0 on success, negative error code on failure.
3756  */
3757 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
3758 {
3759 	struct dma_fence *fence = NULL, *next = NULL;
3760 	struct amdgpu_bo *shadow;
3761 	long r = 1, tmo;
3762 
3763 	if (amdgpu_sriov_runtime(adev))
3764 		tmo = msecs_to_jiffies(8000);
3765 	else
3766 		tmo = msecs_to_jiffies(100);
3767 
3768 	DRM_INFO("recover vram bo from shadow start\n");
3769 	mutex_lock(&adev->shadow_list_lock);
3770 	list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3771 
3772 		/* No need to recover an evicted BO */
3773 		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3774 		    shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
3775 		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3776 			continue;
3777 
3778 		r = amdgpu_bo_restore_shadow(shadow, &next);
3779 		if (r)
3780 			break;
3781 
3782 		if (fence) {
3783 			tmo = dma_fence_wait_timeout(fence, false, tmo);
3784 			dma_fence_put(fence);
3785 			fence = next;
3786 			if (tmo == 0) {
3787 				r = -ETIMEDOUT;
3788 				break;
3789 			} else if (tmo < 0) {
3790 				r = tmo;
3791 				break;
3792 			}
3793 		} else {
3794 			fence = next;
3795 		}
3796 	}
3797 	mutex_unlock(&adev->shadow_list_lock);
3798 
3799 	if (fence)
3800 		tmo = dma_fence_wait_timeout(fence, false, tmo);
3801 	dma_fence_put(fence);
3802 
3803 	if (r < 0 || tmo <= 0) {
3804 		DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
3805 		return -EIO;
3806 	}
3807 
3808 	DRM_INFO("recover vram bo from shadow done\n");
3809 	return 0;
3810 }
3811 
3812 
3813 /**
3814  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
3815  *
3816  * @adev: amdgpu device pointer
3817  * @from_hypervisor: request from hypervisor
3818  *
3819  * do VF FLR and reinitialize Asic
3820  * return 0 means succeeded otherwise failed
3821  */
3822 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3823 				     bool from_hypervisor)
3824 {
3825 	int r;
3826 
3827 	if (from_hypervisor)
3828 		r = amdgpu_virt_request_full_gpu(adev, true);
3829 	else
3830 		r = amdgpu_virt_reset_gpu(adev);
3831 	if (r)
3832 		return r;
3833 
3834 	amdgpu_amdkfd_pre_reset(adev);
3835 
3836 	/* Resume IP prior to SMC */
3837 	r = amdgpu_device_ip_reinit_early_sriov(adev);
3838 	if (r)
3839 		goto error;
3840 
3841 	amdgpu_virt_init_data_exchange(adev);
3842 	/* we need recover gart prior to run SMC/CP/SDMA resume */
3843 	amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
3844 
3845 	r = amdgpu_device_fw_loading(adev);
3846 	if (r)
3847 		return r;
3848 
3849 	/* now we are okay to resume SMC/CP/SDMA */
3850 	r = amdgpu_device_ip_reinit_late_sriov(adev);
3851 	if (r)
3852 		goto error;
3853 
3854 	amdgpu_irq_gpu_reset_resume_helper(adev);
3855 	r = amdgpu_ib_ring_tests(adev);
3856 	amdgpu_amdkfd_post_reset(adev);
3857 
3858 error:
3859 	amdgpu_virt_release_full_gpu(adev, true);
3860 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3861 		amdgpu_inc_vram_lost(adev);
3862 		r = amdgpu_device_recover_vram(adev);
3863 	}
3864 
3865 	return r;
3866 }
3867 
3868 /**
3869  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3870  *
3871  * @adev: amdgpu device pointer
3872  *
3873  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3874  * a hung GPU.
3875  */
3876 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3877 {
3878 	if (!amdgpu_device_ip_check_soft_reset(adev)) {
3879 		DRM_INFO("Timeout, but no hardware hang detected.\n");
3880 		return false;
3881 	}
3882 
3883 	if (amdgpu_gpu_recovery == 0)
3884 		goto disabled;
3885 
3886 	if (amdgpu_sriov_vf(adev))
3887 		return true;
3888 
3889 	if (amdgpu_gpu_recovery == -1) {
3890 		switch (adev->asic_type) {
3891 		case CHIP_BONAIRE:
3892 		case CHIP_HAWAII:
3893 		case CHIP_TOPAZ:
3894 		case CHIP_TONGA:
3895 		case CHIP_FIJI:
3896 		case CHIP_POLARIS10:
3897 		case CHIP_POLARIS11:
3898 		case CHIP_POLARIS12:
3899 		case CHIP_VEGAM:
3900 		case CHIP_VEGA20:
3901 		case CHIP_VEGA10:
3902 		case CHIP_VEGA12:
3903 		case CHIP_RAVEN:
3904 		case CHIP_ARCTURUS:
3905 		case CHIP_RENOIR:
3906 		case CHIP_NAVI10:
3907 		case CHIP_NAVI14:
3908 		case CHIP_NAVI12:
3909 			break;
3910 		default:
3911 			goto disabled;
3912 		}
3913 	}
3914 
3915 	return true;
3916 
3917 disabled:
3918 		DRM_INFO("GPU recovery disabled.\n");
3919 		return false;
3920 }
3921 
3922 
3923 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3924 					struct amdgpu_job *job,
3925 					bool *need_full_reset_arg)
3926 {
3927 	int i, r = 0;
3928 	bool need_full_reset  = *need_full_reset_arg;
3929 
3930 	amdgpu_debugfs_wait_dump(adev);
3931 
3932 	/* block all schedulers and reset given job's ring */
3933 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3934 		struct amdgpu_ring *ring = adev->rings[i];
3935 
3936 		if (!ring || !ring->sched.thread)
3937 			continue;
3938 
3939 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3940 		amdgpu_fence_driver_force_completion(ring);
3941 	}
3942 
3943 	if(job)
3944 		drm_sched_increase_karma(&job->base);
3945 
3946 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
3947 	if (!amdgpu_sriov_vf(adev)) {
3948 
3949 		if (!need_full_reset)
3950 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3951 
3952 		if (!need_full_reset) {
3953 			amdgpu_device_ip_pre_soft_reset(adev);
3954 			r = amdgpu_device_ip_soft_reset(adev);
3955 			amdgpu_device_ip_post_soft_reset(adev);
3956 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3957 				DRM_INFO("soft reset failed, will fallback to full reset!\n");
3958 				need_full_reset = true;
3959 			}
3960 		}
3961 
3962 		if (need_full_reset)
3963 			r = amdgpu_device_ip_suspend(adev);
3964 
3965 		*need_full_reset_arg = need_full_reset;
3966 	}
3967 
3968 	return r;
3969 }
3970 
3971 static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3972 			       struct list_head *device_list_handle,
3973 			       bool *need_full_reset_arg)
3974 {
3975 	struct amdgpu_device *tmp_adev = NULL;
3976 	bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3977 	int r = 0;
3978 
3979 	/*
3980 	 * ASIC reset has to be done on all HGMI hive nodes ASAP
3981 	 * to allow proper links negotiation in FW (within 1 sec)
3982 	 */
3983 	if (need_full_reset) {
3984 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3985 			/* For XGMI run all resets in parallel to speed up the process */
3986 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3987 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
3988 					r = -EALREADY;
3989 			} else
3990 				r = amdgpu_asic_reset(tmp_adev);
3991 
3992 			if (r) {
3993 				DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3994 					 r, tmp_adev->ddev->unique);
3995 				break;
3996 			}
3997 		}
3998 
3999 		/* For XGMI wait for all resets to complete before proceed */
4000 		if (!r) {
4001 			list_for_each_entry(tmp_adev, device_list_handle,
4002 					    gmc.xgmi.head) {
4003 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4004 					flush_work(&tmp_adev->xgmi_reset_work);
4005 					r = tmp_adev->asic_reset_res;
4006 					if (r)
4007 						break;
4008 				}
4009 			}
4010 		}
4011 	}
4012 
4013 	if (!r && amdgpu_ras_intr_triggered()) {
4014 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4015 			if (tmp_adev->mmhub.funcs &&
4016 			    tmp_adev->mmhub.funcs->reset_ras_error_count)
4017 				tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4018 		}
4019 
4020 		amdgpu_ras_intr_cleared();
4021 	}
4022 
4023 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4024 		if (need_full_reset) {
4025 			/* post card */
4026 			if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4027 				DRM_WARN("asic atom init failed!");
4028 
4029 			if (!r) {
4030 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4031 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
4032 				if (r)
4033 					goto out;
4034 
4035 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4036 				if (vram_lost) {
4037 					DRM_INFO("VRAM is lost due to GPU reset!\n");
4038 					amdgpu_inc_vram_lost(tmp_adev);
4039 				}
4040 
4041 				r = amdgpu_gtt_mgr_recover(
4042 					&tmp_adev->mman.bdev.man[TTM_PL_TT]);
4043 				if (r)
4044 					goto out;
4045 
4046 				r = amdgpu_device_fw_loading(tmp_adev);
4047 				if (r)
4048 					return r;
4049 
4050 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
4051 				if (r)
4052 					goto out;
4053 
4054 				if (vram_lost)
4055 					amdgpu_device_fill_reset_magic(tmp_adev);
4056 
4057 				/*
4058 				 * Add this ASIC as tracked as reset was already
4059 				 * complete successfully.
4060 				 */
4061 				amdgpu_register_gpu_instance(tmp_adev);
4062 
4063 				r = amdgpu_device_ip_late_init(tmp_adev);
4064 				if (r)
4065 					goto out;
4066 
4067 				amdgpu_fbdev_set_suspend(tmp_adev, 0);
4068 
4069 				/* must succeed. */
4070 				amdgpu_ras_resume(tmp_adev);
4071 
4072 				/* Update PSP FW topology after reset */
4073 				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4074 					r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4075 			}
4076 		}
4077 
4078 
4079 out:
4080 		if (!r) {
4081 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4082 			r = amdgpu_ib_ring_tests(tmp_adev);
4083 			if (r) {
4084 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4085 				r = amdgpu_device_ip_suspend(tmp_adev);
4086 				need_full_reset = true;
4087 				r = -EAGAIN;
4088 				goto end;
4089 			}
4090 		}
4091 
4092 		if (!r)
4093 			r = amdgpu_device_recover_vram(tmp_adev);
4094 		else
4095 			tmp_adev->asic_reset_res = r;
4096 	}
4097 
4098 end:
4099 	*need_full_reset_arg = need_full_reset;
4100 	return r;
4101 }
4102 
4103 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
4104 {
4105 	if (trylock) {
4106 		if (!mutex_trylock(&adev->lock_reset))
4107 			return false;
4108 	} else
4109 		mutex_lock(&adev->lock_reset);
4110 
4111 	atomic_inc(&adev->gpu_reset_counter);
4112 	adev->in_gpu_reset = true;
4113 	switch (amdgpu_asic_reset_method(adev)) {
4114 	case AMD_RESET_METHOD_MODE1:
4115 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4116 		break;
4117 	case AMD_RESET_METHOD_MODE2:
4118 		adev->mp1_state = PP_MP1_STATE_RESET;
4119 		break;
4120 	default:
4121 		adev->mp1_state = PP_MP1_STATE_NONE;
4122 		break;
4123 	}
4124 
4125 	return true;
4126 }
4127 
4128 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4129 {
4130 	amdgpu_vf_error_trans_all(adev);
4131 	adev->mp1_state = PP_MP1_STATE_NONE;
4132 	adev->in_gpu_reset = false;
4133 	mutex_unlock(&adev->lock_reset);
4134 }
4135 
4136 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4137 {
4138 	struct pci_dev *p = NULL;
4139 
4140 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4141 			adev->pdev->bus->number, 1);
4142 	if (p) {
4143 		pm_runtime_enable(&(p->dev));
4144 		pm_runtime_resume(&(p->dev));
4145 	}
4146 }
4147 
4148 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4149 {
4150 	enum amd_reset_method reset_method;
4151 	struct pci_dev *p = NULL;
4152 	u64 expires;
4153 
4154 	/*
4155 	 * For now, only BACO and mode1 reset are confirmed
4156 	 * to suffer the audio issue without proper suspended.
4157 	 */
4158 	reset_method = amdgpu_asic_reset_method(adev);
4159 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
4160 	     (reset_method != AMD_RESET_METHOD_MODE1))
4161 		return -EINVAL;
4162 
4163 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4164 			adev->pdev->bus->number, 1);
4165 	if (!p)
4166 		return -ENODEV;
4167 
4168 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
4169 	if (!expires)
4170 		/*
4171 		 * If we cannot get the audio device autosuspend delay,
4172 		 * a fixed 4S interval will be used. Considering 3S is
4173 		 * the audio controller default autosuspend delay setting.
4174 		 * 4S used here is guaranteed to cover that.
4175 		 */
4176 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
4177 
4178 	while (!pm_runtime_status_suspended(&(p->dev))) {
4179 		if (!pm_runtime_suspend(&(p->dev)))
4180 			break;
4181 
4182 		if (expires < ktime_get_mono_fast_ns()) {
4183 			dev_warn(adev->dev, "failed to suspend display audio\n");
4184 			/* TODO: abort the succeeding gpu reset? */
4185 			return -ETIMEDOUT;
4186 		}
4187 	}
4188 
4189 	pm_runtime_disable(&(p->dev));
4190 
4191 	return 0;
4192 }
4193 
4194 /**
4195  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4196  *
4197  * @adev: amdgpu device pointer
4198  * @job: which job trigger hang
4199  *
4200  * Attempt to reset the GPU if it has hung (all asics).
4201  * Attempt to do soft-reset or full-reset and reinitialize Asic
4202  * Returns 0 for success or an error on failure.
4203  */
4204 
4205 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4206 			      struct amdgpu_job *job)
4207 {
4208 	struct list_head device_list, *device_list_handle =  NULL;
4209 	bool need_full_reset = false;
4210 	bool job_signaled = false;
4211 	struct amdgpu_hive_info *hive = NULL;
4212 	struct amdgpu_device *tmp_adev = NULL;
4213 	int i, r = 0;
4214 	bool in_ras_intr = amdgpu_ras_intr_triggered();
4215 	bool use_baco =
4216 		(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4217 		true : false;
4218 	bool audio_suspended = false;
4219 
4220 	/*
4221 	 * Flush RAM to disk so that after reboot
4222 	 * the user can read log and see why the system rebooted.
4223 	 */
4224 	if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
4225 
4226 		DRM_WARN("Emergency reboot.");
4227 
4228 		ksys_sync_helper();
4229 		emergency_restart();
4230 	}
4231 
4232 	dev_info(adev->dev, "GPU %s begin!\n",
4233 		(in_ras_intr && !use_baco) ? "jobs stop":"reset");
4234 
4235 	/*
4236 	 * Here we trylock to avoid chain of resets executing from
4237 	 * either trigger by jobs on different adevs in XGMI hive or jobs on
4238 	 * different schedulers for same device while this TO handler is running.
4239 	 * We always reset all schedulers for device and all devices for XGMI
4240 	 * hive so that should take care of them too.
4241 	 */
4242 	hive = amdgpu_get_xgmi_hive(adev, true);
4243 	if (hive && !mutex_trylock(&hive->reset_lock)) {
4244 		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4245 			  job ? job->base.id : -1, hive->hive_id);
4246 		mutex_unlock(&hive->hive_lock);
4247 		return 0;
4248 	}
4249 
4250 	/*
4251 	 * Build list of devices to reset.
4252 	 * In case we are in XGMI hive mode, resort the device list
4253 	 * to put adev in the 1st position.
4254 	 */
4255 	INIT_LIST_HEAD(&device_list);
4256 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
4257 		if (!hive)
4258 			return -ENODEV;
4259 		if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4260 			list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
4261 		device_list_handle = &hive->device_list;
4262 	} else {
4263 		list_add_tail(&adev->gmc.xgmi.head, &device_list);
4264 		device_list_handle = &device_list;
4265 	}
4266 
4267 	/* block all schedulers and reset given job's ring */
4268 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4269 		if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
4270 			DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
4271 				  job ? job->base.id : -1);
4272 			mutex_unlock(&hive->hive_lock);
4273 			return 0;
4274 		}
4275 
4276 		/*
4277 		 * Try to put the audio codec into suspend state
4278 		 * before gpu reset started.
4279 		 *
4280 		 * Due to the power domain of the graphics device
4281 		 * is shared with AZ power domain. Without this,
4282 		 * we may change the audio hardware from behind
4283 		 * the audio driver's back. That will trigger
4284 		 * some audio codec errors.
4285 		 */
4286 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
4287 			audio_suspended = true;
4288 
4289 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
4290 
4291 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4292 
4293 		if (!amdgpu_sriov_vf(tmp_adev))
4294 			amdgpu_amdkfd_pre_reset(tmp_adev);
4295 
4296 		/*
4297 		 * Mark these ASICs to be reseted as untracked first
4298 		 * And add them back after reset completed
4299 		 */
4300 		amdgpu_unregister_gpu_instance(tmp_adev);
4301 
4302 		amdgpu_fbdev_set_suspend(tmp_adev, 1);
4303 
4304 		/* disable ras on ALL IPs */
4305 		if (!(in_ras_intr && !use_baco) &&
4306 		      amdgpu_device_ip_need_full_reset(tmp_adev))
4307 			amdgpu_ras_suspend(tmp_adev);
4308 
4309 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4310 			struct amdgpu_ring *ring = tmp_adev->rings[i];
4311 
4312 			if (!ring || !ring->sched.thread)
4313 				continue;
4314 
4315 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
4316 
4317 			if (in_ras_intr && !use_baco)
4318 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
4319 		}
4320 	}
4321 
4322 	if (in_ras_intr && !use_baco)
4323 		goto skip_sched_resume;
4324 
4325 	/*
4326 	 * Must check guilty signal here since after this point all old
4327 	 * HW fences are force signaled.
4328 	 *
4329 	 * job->base holds a reference to parent fence
4330 	 */
4331 	if (job && job->base.s_fence->parent &&
4332 	    dma_fence_is_signaled(job->base.s_fence->parent)) {
4333 		job_signaled = true;
4334 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4335 		goto skip_hw_reset;
4336 	}
4337 
4338 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
4339 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4340 		r = amdgpu_device_pre_asic_reset(tmp_adev,
4341 						 NULL,
4342 						 &need_full_reset);
4343 		/*TODO Should we stop ?*/
4344 		if (r) {
4345 			DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4346 				  r, tmp_adev->ddev->unique);
4347 			tmp_adev->asic_reset_res = r;
4348 		}
4349 	}
4350 
4351 	/* Actual ASIC resets if needed.*/
4352 	/* TODO Implement XGMI hive reset logic for SRIOV */
4353 	if (amdgpu_sriov_vf(adev)) {
4354 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
4355 		if (r)
4356 			adev->asic_reset_res = r;
4357 	} else {
4358 		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
4359 		if (r && r == -EAGAIN)
4360 			goto retry;
4361 	}
4362 
4363 skip_hw_reset:
4364 
4365 	/* Post ASIC reset for all devs .*/
4366 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4367 
4368 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4369 			struct amdgpu_ring *ring = tmp_adev->rings[i];
4370 
4371 			if (!ring || !ring->sched.thread)
4372 				continue;
4373 
4374 			/* No point to resubmit jobs if we didn't HW reset*/
4375 			if (!tmp_adev->asic_reset_res && !job_signaled)
4376 				drm_sched_resubmit_jobs(&ring->sched);
4377 
4378 			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4379 		}
4380 
4381 		if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4382 			drm_helper_resume_force_mode(tmp_adev->ddev);
4383 		}
4384 
4385 		tmp_adev->asic_reset_res = 0;
4386 
4387 		if (r) {
4388 			/* bad news, how to tell it to userspace ? */
4389 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
4390 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4391 		} else {
4392 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
4393 		}
4394 	}
4395 
4396 skip_sched_resume:
4397 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4398 		/*unlock kfd: SRIOV would do it separately */
4399 		if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
4400 	                amdgpu_amdkfd_post_reset(tmp_adev);
4401 		if (audio_suspended)
4402 			amdgpu_device_resume_display_audio(tmp_adev);
4403 		amdgpu_device_unlock_adev(tmp_adev);
4404 	}
4405 
4406 	if (hive) {
4407 		mutex_unlock(&hive->reset_lock);
4408 		mutex_unlock(&hive->hive_lock);
4409 	}
4410 
4411 	if (r)
4412 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
4413 	return r;
4414 }
4415 
4416 /**
4417  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4418  *
4419  * @adev: amdgpu_device pointer
4420  *
4421  * Fetchs and stores in the driver the PCIE capabilities (gen speed
4422  * and lanes) of the slot the device is in. Handles APUs and
4423  * virtualized environments where PCIE config space may not be available.
4424  */
4425 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
4426 {
4427 	struct pci_dev *pdev;
4428 	enum pci_bus_speed speed_cap, platform_speed_cap;
4429 	enum pcie_link_width platform_link_width;
4430 
4431 	if (amdgpu_pcie_gen_cap)
4432 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
4433 
4434 	if (amdgpu_pcie_lane_cap)
4435 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
4436 
4437 	/* covers APUs as well */
4438 	if (pci_is_root_bus(adev->pdev->bus)) {
4439 		if (adev->pm.pcie_gen_mask == 0)
4440 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4441 		if (adev->pm.pcie_mlw_mask == 0)
4442 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
4443 		return;
4444 	}
4445 
4446 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4447 		return;
4448 
4449 	pcie_bandwidth_available(adev->pdev, NULL,
4450 				 &platform_speed_cap, &platform_link_width);
4451 
4452 	if (adev->pm.pcie_gen_mask == 0) {
4453 		/* asic caps */
4454 		pdev = adev->pdev;
4455 		speed_cap = pcie_get_speed_cap(pdev);
4456 		if (speed_cap == PCI_SPEED_UNKNOWN) {
4457 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4458 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4459 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4460 		} else {
4461 			if (speed_cap == PCIE_SPEED_16_0GT)
4462 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4463 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4464 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4465 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4466 			else if (speed_cap == PCIE_SPEED_8_0GT)
4467 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4468 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4469 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4470 			else if (speed_cap == PCIE_SPEED_5_0GT)
4471 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4472 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4473 			else
4474 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4475 		}
4476 		/* platform caps */
4477 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
4478 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4479 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4480 		} else {
4481 			if (platform_speed_cap == PCIE_SPEED_16_0GT)
4482 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4483 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4484 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4485 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
4486 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
4487 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4488 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4489 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
4490 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
4491 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4492 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4493 			else
4494 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4495 
4496 		}
4497 	}
4498 	if (adev->pm.pcie_mlw_mask == 0) {
4499 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
4500 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4501 		} else {
4502 			switch (platform_link_width) {
4503 			case PCIE_LNK_X32:
4504 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4505 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4506 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4507 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4508 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4509 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4510 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4511 				break;
4512 			case PCIE_LNK_X16:
4513 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4514 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4515 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4516 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4517 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4518 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4519 				break;
4520 			case PCIE_LNK_X12:
4521 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4522 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4523 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4524 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4525 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4526 				break;
4527 			case PCIE_LNK_X8:
4528 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4529 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4530 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4531 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4532 				break;
4533 			case PCIE_LNK_X4:
4534 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4535 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4536 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4537 				break;
4538 			case PCIE_LNK_X2:
4539 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4540 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4541 				break;
4542 			case PCIE_LNK_X1:
4543 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4544 				break;
4545 			default:
4546 				break;
4547 			}
4548 		}
4549 	}
4550 }
4551 
4552 int amdgpu_device_baco_enter(struct drm_device *dev)
4553 {
4554 	struct amdgpu_device *adev = dev->dev_private;
4555 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4556 
4557 	if (!amdgpu_device_supports_baco(adev->ddev))
4558 		return -ENOTSUPP;
4559 
4560 	if (ras && ras->supported)
4561 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4562 
4563 	return amdgpu_dpm_baco_enter(adev);
4564 }
4565 
4566 int amdgpu_device_baco_exit(struct drm_device *dev)
4567 {
4568 	struct amdgpu_device *adev = dev->dev_private;
4569 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4570 	int ret = 0;
4571 
4572 	if (!amdgpu_device_supports_baco(adev->ddev))
4573 		return -ENOTSUPP;
4574 
4575 	ret = amdgpu_dpm_baco_exit(adev);
4576 	if (ret)
4577 		return ret;
4578 
4579 	if (ras && ras->supported)
4580 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4581 
4582 	return 0;
4583 }
4584