1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/pci-p2pdma.h>
36 #include <linux/apple-gmux.h>
37 
38 #include <drm/drm_aperture.h>
39 #include <drm/drm_atomic_helper.h>
40 #include <drm/drm_crtc_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/device.h>
45 #include <linux/vgaarb.h>
46 #include <linux/vga_switcheroo.h>
47 #include <linux/efi.h>
48 #include "amdgpu.h"
49 #include "amdgpu_trace.h"
50 #include "amdgpu_i2c.h"
51 #include "atom.h"
52 #include "amdgpu_atombios.h"
53 #include "amdgpu_atomfirmware.h"
54 #include "amd_pcie.h"
55 #ifdef CONFIG_DRM_AMDGPU_SI
56 #include "si.h"
57 #endif
58 #ifdef CONFIG_DRM_AMDGPU_CIK
59 #include "cik.h"
60 #endif
61 #include "vi.h"
62 #include "soc15.h"
63 #include "nv.h"
64 #include "bif/bif_4_1_d.h"
65 #include <linux/firmware.h>
66 #include "amdgpu_vf_error.h"
67 
68 #include "amdgpu_amdkfd.h"
69 #include "amdgpu_pm.h"
70 
71 #include "amdgpu_xgmi.h"
72 #include "amdgpu_ras.h"
73 #include "amdgpu_pmu.h"
74 #include "amdgpu_fru_eeprom.h"
75 #include "amdgpu_reset.h"
76 #include "amdgpu_virt.h"
77 #include "amdgpu_dev_coredump.h"
78 
79 #include <linux/suspend.h>
80 #include <drm/task_barrier.h>
81 #include <linux/pm_runtime.h>
82 
83 #include <drm/drm_drv.h>
84 
85 #if IS_ENABLED(CONFIG_X86)
86 #include <asm/intel-family.h>
87 #endif
88 
89 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
90 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
91 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
96 
97 #define AMDGPU_RESUME_MS		2000
98 #define AMDGPU_MAX_RETRY_LIMIT		2
99 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
100 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
101 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
102 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
103 
104 static const struct drm_driver amdgpu_kms_driver;
105 
106 const char *amdgpu_asic_name[] = {
107 	"TAHITI",
108 	"PITCAIRN",
109 	"VERDE",
110 	"OLAND",
111 	"HAINAN",
112 	"BONAIRE",
113 	"KAVERI",
114 	"KABINI",
115 	"HAWAII",
116 	"MULLINS",
117 	"TOPAZ",
118 	"TONGA",
119 	"FIJI",
120 	"CARRIZO",
121 	"STONEY",
122 	"POLARIS10",
123 	"POLARIS11",
124 	"POLARIS12",
125 	"VEGAM",
126 	"VEGA10",
127 	"VEGA12",
128 	"VEGA20",
129 	"RAVEN",
130 	"ARCTURUS",
131 	"RENOIR",
132 	"ALDEBARAN",
133 	"NAVI10",
134 	"CYAN_SKILLFISH",
135 	"NAVI14",
136 	"NAVI12",
137 	"SIENNA_CICHLID",
138 	"NAVY_FLOUNDER",
139 	"VANGOGH",
140 	"DIMGREY_CAVEFISH",
141 	"BEIGE_GOBY",
142 	"YELLOW_CARP",
143 	"IP DISCOVERY",
144 	"LAST",
145 };
146 
147 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
148 
149 /**
150  * DOC: pcie_replay_count
151  *
152  * The amdgpu driver provides a sysfs API for reporting the total number
153  * of PCIe replays (NAKs)
154  * The file pcie_replay_count is used for this and returns the total
155  * number of replays as a sum of the NAKs generated and NAKs received
156  */
157 
158 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
159 		struct device_attribute *attr, char *buf)
160 {
161 	struct drm_device *ddev = dev_get_drvdata(dev);
162 	struct amdgpu_device *adev = drm_to_adev(ddev);
163 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
164 
165 	return sysfs_emit(buf, "%llu\n", cnt);
166 }
167 
168 static DEVICE_ATTR(pcie_replay_count, 0444,
169 		amdgpu_device_get_pcie_replay_count, NULL);
170 
171 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
172 					  struct bin_attribute *attr, char *buf,
173 					  loff_t ppos, size_t count)
174 {
175 	struct device *dev = kobj_to_dev(kobj);
176 	struct drm_device *ddev = dev_get_drvdata(dev);
177 	struct amdgpu_device *adev = drm_to_adev(ddev);
178 	ssize_t bytes_read;
179 
180 	switch (ppos) {
181 	case AMDGPU_SYS_REG_STATE_XGMI:
182 		bytes_read = amdgpu_asic_get_reg_state(
183 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
184 		break;
185 	case AMDGPU_SYS_REG_STATE_WAFL:
186 		bytes_read = amdgpu_asic_get_reg_state(
187 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
188 		break;
189 	case AMDGPU_SYS_REG_STATE_PCIE:
190 		bytes_read = amdgpu_asic_get_reg_state(
191 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
192 		break;
193 	case AMDGPU_SYS_REG_STATE_USR:
194 		bytes_read = amdgpu_asic_get_reg_state(
195 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
196 		break;
197 	case AMDGPU_SYS_REG_STATE_USR_1:
198 		bytes_read = amdgpu_asic_get_reg_state(
199 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
200 		break;
201 	default:
202 		return -EINVAL;
203 	}
204 
205 	return bytes_read;
206 }
207 
208 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
209 	 AMDGPU_SYS_REG_STATE_END);
210 
211 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
212 {
213 	int ret;
214 
215 	if (!amdgpu_asic_get_reg_state_supported(adev))
216 		return 0;
217 
218 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
219 
220 	return ret;
221 }
222 
223 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
224 {
225 	if (!amdgpu_asic_get_reg_state_supported(adev))
226 		return;
227 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
228 }
229 
230 /**
231  * DOC: board_info
232  *
233  * The amdgpu driver provides a sysfs API for giving board related information.
234  * It provides the form factor information in the format
235  *
236  *   type : form factor
237  *
238  * Possible form factor values
239  *
240  * - "cem"		- PCIE CEM card
241  * - "oam"		- Open Compute Accelerator Module
242  * - "unknown"	- Not known
243  *
244  */
245 
246 static ssize_t amdgpu_device_get_board_info(struct device *dev,
247 					    struct device_attribute *attr,
248 					    char *buf)
249 {
250 	struct drm_device *ddev = dev_get_drvdata(dev);
251 	struct amdgpu_device *adev = drm_to_adev(ddev);
252 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
253 	const char *pkg;
254 
255 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
256 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
257 
258 	switch (pkg_type) {
259 	case AMDGPU_PKG_TYPE_CEM:
260 		pkg = "cem";
261 		break;
262 	case AMDGPU_PKG_TYPE_OAM:
263 		pkg = "oam";
264 		break;
265 	default:
266 		pkg = "unknown";
267 		break;
268 	}
269 
270 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
271 }
272 
273 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
274 
275 static struct attribute *amdgpu_board_attrs[] = {
276 	&dev_attr_board_info.attr,
277 	NULL,
278 };
279 
280 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
281 					     struct attribute *attr, int n)
282 {
283 	struct device *dev = kobj_to_dev(kobj);
284 	struct drm_device *ddev = dev_get_drvdata(dev);
285 	struct amdgpu_device *adev = drm_to_adev(ddev);
286 
287 	if (adev->flags & AMD_IS_APU)
288 		return 0;
289 
290 	return attr->mode;
291 }
292 
293 static const struct attribute_group amdgpu_board_attrs_group = {
294 	.attrs = amdgpu_board_attrs,
295 	.is_visible = amdgpu_board_attrs_is_visible
296 };
297 
298 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
299 
300 
301 /**
302  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
303  *
304  * @dev: drm_device pointer
305  *
306  * Returns true if the device is a dGPU with ATPX power control,
307  * otherwise return false.
308  */
309 bool amdgpu_device_supports_px(struct drm_device *dev)
310 {
311 	struct amdgpu_device *adev = drm_to_adev(dev);
312 
313 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
314 		return true;
315 	return false;
316 }
317 
318 /**
319  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
320  *
321  * @dev: drm_device pointer
322  *
323  * Returns true if the device is a dGPU with ACPI power control,
324  * otherwise return false.
325  */
326 bool amdgpu_device_supports_boco(struct drm_device *dev)
327 {
328 	struct amdgpu_device *adev = drm_to_adev(dev);
329 
330 	if (adev->has_pr3 ||
331 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
332 		return true;
333 	return false;
334 }
335 
336 /**
337  * amdgpu_device_supports_baco - Does the device support BACO
338  *
339  * @dev: drm_device pointer
340  *
341  * Returns true if the device supporte BACO,
342  * otherwise return false.
343  */
344 bool amdgpu_device_supports_baco(struct drm_device *dev)
345 {
346 	struct amdgpu_device *adev = drm_to_adev(dev);
347 
348 	return amdgpu_asic_supports_baco(adev);
349 }
350 
351 /**
352  * amdgpu_device_supports_smart_shift - Is the device dGPU with
353  * smart shift support
354  *
355  * @dev: drm_device pointer
356  *
357  * Returns true if the device is a dGPU with Smart Shift support,
358  * otherwise returns false.
359  */
360 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
361 {
362 	return (amdgpu_device_supports_boco(dev) &&
363 		amdgpu_acpi_is_power_shift_control_supported());
364 }
365 
366 /*
367  * VRAM access helper functions
368  */
369 
370 /**
371  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
372  *
373  * @adev: amdgpu_device pointer
374  * @pos: offset of the buffer in vram
375  * @buf: virtual address of the buffer in system memory
376  * @size: read/write size, sizeof(@buf) must > @size
377  * @write: true - write to vram, otherwise - read from vram
378  */
379 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
380 			     void *buf, size_t size, bool write)
381 {
382 	unsigned long flags;
383 	uint32_t hi = ~0, tmp = 0;
384 	uint32_t *data = buf;
385 	uint64_t last;
386 	int idx;
387 
388 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
389 		return;
390 
391 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
392 
393 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
394 	for (last = pos + size; pos < last; pos += 4) {
395 		tmp = pos >> 31;
396 
397 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
398 		if (tmp != hi) {
399 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
400 			hi = tmp;
401 		}
402 		if (write)
403 			WREG32_NO_KIQ(mmMM_DATA, *data++);
404 		else
405 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
406 	}
407 
408 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
409 	drm_dev_exit(idx);
410 }
411 
412 /**
413  * amdgpu_device_aper_access - access vram by vram aperature
414  *
415  * @adev: amdgpu_device pointer
416  * @pos: offset of the buffer in vram
417  * @buf: virtual address of the buffer in system memory
418  * @size: read/write size, sizeof(@buf) must > @size
419  * @write: true - write to vram, otherwise - read from vram
420  *
421  * The return value means how many bytes have been transferred.
422  */
423 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
424 				 void *buf, size_t size, bool write)
425 {
426 #ifdef CONFIG_64BIT
427 	void __iomem *addr;
428 	size_t count = 0;
429 	uint64_t last;
430 
431 	if (!adev->mman.aper_base_kaddr)
432 		return 0;
433 
434 	last = min(pos + size, adev->gmc.visible_vram_size);
435 	if (last > pos) {
436 		addr = adev->mman.aper_base_kaddr + pos;
437 		count = last - pos;
438 
439 		if (write) {
440 			memcpy_toio(addr, buf, count);
441 			/* Make sure HDP write cache flush happens without any reordering
442 			 * after the system memory contents are sent over PCIe device
443 			 */
444 			mb();
445 			amdgpu_device_flush_hdp(adev, NULL);
446 		} else {
447 			amdgpu_device_invalidate_hdp(adev, NULL);
448 			/* Make sure HDP read cache is invalidated before issuing a read
449 			 * to the PCIe device
450 			 */
451 			mb();
452 			memcpy_fromio(buf, addr, count);
453 		}
454 
455 	}
456 
457 	return count;
458 #else
459 	return 0;
460 #endif
461 }
462 
463 /**
464  * amdgpu_device_vram_access - read/write a buffer in vram
465  *
466  * @adev: amdgpu_device pointer
467  * @pos: offset of the buffer in vram
468  * @buf: virtual address of the buffer in system memory
469  * @size: read/write size, sizeof(@buf) must > @size
470  * @write: true - write to vram, otherwise - read from vram
471  */
472 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
473 			       void *buf, size_t size, bool write)
474 {
475 	size_t count;
476 
477 	/* try to using vram apreature to access vram first */
478 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
479 	size -= count;
480 	if (size) {
481 		/* using MM to access rest vram */
482 		pos += count;
483 		buf += count;
484 		amdgpu_device_mm_access(adev, pos, buf, size, write);
485 	}
486 }
487 
488 /*
489  * register access helper functions.
490  */
491 
492 /* Check if hw access should be skipped because of hotplug or device error */
493 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
494 {
495 	if (adev->no_hw_access)
496 		return true;
497 
498 #ifdef CONFIG_LOCKDEP
499 	/*
500 	 * This is a bit complicated to understand, so worth a comment. What we assert
501 	 * here is that the GPU reset is not running on another thread in parallel.
502 	 *
503 	 * For this we trylock the read side of the reset semaphore, if that succeeds
504 	 * we know that the reset is not running in paralell.
505 	 *
506 	 * If the trylock fails we assert that we are either already holding the read
507 	 * side of the lock or are the reset thread itself and hold the write side of
508 	 * the lock.
509 	 */
510 	if (in_task()) {
511 		if (down_read_trylock(&adev->reset_domain->sem))
512 			up_read(&adev->reset_domain->sem);
513 		else
514 			lockdep_assert_held(&adev->reset_domain->sem);
515 	}
516 #endif
517 	return false;
518 }
519 
520 /**
521  * amdgpu_device_rreg - read a memory mapped IO or indirect register
522  *
523  * @adev: amdgpu_device pointer
524  * @reg: dword aligned register offset
525  * @acc_flags: access flags which require special behavior
526  *
527  * Returns the 32 bit value from the offset specified.
528  */
529 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
530 			    uint32_t reg, uint32_t acc_flags)
531 {
532 	uint32_t ret;
533 
534 	if (amdgpu_device_skip_hw_access(adev))
535 		return 0;
536 
537 	if ((reg * 4) < adev->rmmio_size) {
538 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
539 		    amdgpu_sriov_runtime(adev) &&
540 		    down_read_trylock(&adev->reset_domain->sem)) {
541 			ret = amdgpu_kiq_rreg(adev, reg, 0);
542 			up_read(&adev->reset_domain->sem);
543 		} else {
544 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
545 		}
546 	} else {
547 		ret = adev->pcie_rreg(adev, reg * 4);
548 	}
549 
550 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
551 
552 	return ret;
553 }
554 
555 /*
556  * MMIO register read with bytes helper functions
557  * @offset:bytes offset from MMIO start
558  */
559 
560 /**
561  * amdgpu_mm_rreg8 - read a memory mapped IO register
562  *
563  * @adev: amdgpu_device pointer
564  * @offset: byte aligned register offset
565  *
566  * Returns the 8 bit value from the offset specified.
567  */
568 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
569 {
570 	if (amdgpu_device_skip_hw_access(adev))
571 		return 0;
572 
573 	if (offset < adev->rmmio_size)
574 		return (readb(adev->rmmio + offset));
575 	BUG();
576 }
577 
578 
579 /**
580  * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
581  *
582  * @adev: amdgpu_device pointer
583  * @reg: dword aligned register offset
584  * @acc_flags: access flags which require special behavior
585  * @xcc_id: xcc accelerated compute core id
586  *
587  * Returns the 32 bit value from the offset specified.
588  */
589 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
590 				uint32_t reg, uint32_t acc_flags,
591 				uint32_t xcc_id)
592 {
593 	uint32_t ret, rlcg_flag;
594 
595 	if (amdgpu_device_skip_hw_access(adev))
596 		return 0;
597 
598 	if ((reg * 4) < adev->rmmio_size) {
599 		if (amdgpu_sriov_vf(adev) &&
600 		    !amdgpu_sriov_runtime(adev) &&
601 		    adev->gfx.rlc.rlcg_reg_access_supported &&
602 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
603 							 GC_HWIP, false,
604 							 &rlcg_flag)) {
605 			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
606 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
607 		    amdgpu_sriov_runtime(adev) &&
608 		    down_read_trylock(&adev->reset_domain->sem)) {
609 			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
610 			up_read(&adev->reset_domain->sem);
611 		} else {
612 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
613 		}
614 	} else {
615 		ret = adev->pcie_rreg(adev, reg * 4);
616 	}
617 
618 	return ret;
619 }
620 
621 /*
622  * MMIO register write with bytes helper functions
623  * @offset:bytes offset from MMIO start
624  * @value: the value want to be written to the register
625  */
626 
627 /**
628  * amdgpu_mm_wreg8 - read a memory mapped IO register
629  *
630  * @adev: amdgpu_device pointer
631  * @offset: byte aligned register offset
632  * @value: 8 bit value to write
633  *
634  * Writes the value specified to the offset specified.
635  */
636 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
637 {
638 	if (amdgpu_device_skip_hw_access(adev))
639 		return;
640 
641 	if (offset < adev->rmmio_size)
642 		writeb(value, adev->rmmio + offset);
643 	else
644 		BUG();
645 }
646 
647 /**
648  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
649  *
650  * @adev: amdgpu_device pointer
651  * @reg: dword aligned register offset
652  * @v: 32 bit value to write to the register
653  * @acc_flags: access flags which require special behavior
654  *
655  * Writes the value specified to the offset specified.
656  */
657 void amdgpu_device_wreg(struct amdgpu_device *adev,
658 			uint32_t reg, uint32_t v,
659 			uint32_t acc_flags)
660 {
661 	if (amdgpu_device_skip_hw_access(adev))
662 		return;
663 
664 	if ((reg * 4) < adev->rmmio_size) {
665 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
666 		    amdgpu_sriov_runtime(adev) &&
667 		    down_read_trylock(&adev->reset_domain->sem)) {
668 			amdgpu_kiq_wreg(adev, reg, v, 0);
669 			up_read(&adev->reset_domain->sem);
670 		} else {
671 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
672 		}
673 	} else {
674 		adev->pcie_wreg(adev, reg * 4, v);
675 	}
676 
677 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
678 }
679 
680 /**
681  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
682  *
683  * @adev: amdgpu_device pointer
684  * @reg: mmio/rlc register
685  * @v: value to write
686  * @xcc_id: xcc accelerated compute core id
687  *
688  * this function is invoked only for the debugfs register access
689  */
690 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
691 			     uint32_t reg, uint32_t v,
692 			     uint32_t xcc_id)
693 {
694 	if (amdgpu_device_skip_hw_access(adev))
695 		return;
696 
697 	if (amdgpu_sriov_fullaccess(adev) &&
698 	    adev->gfx.rlc.funcs &&
699 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
700 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
701 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
702 	} else if ((reg * 4) >= adev->rmmio_size) {
703 		adev->pcie_wreg(adev, reg * 4, v);
704 	} else {
705 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
706 	}
707 }
708 
709 /**
710  * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
711  *
712  * @adev: amdgpu_device pointer
713  * @reg: dword aligned register offset
714  * @v: 32 bit value to write to the register
715  * @acc_flags: access flags which require special behavior
716  * @xcc_id: xcc accelerated compute core id
717  *
718  * Writes the value specified to the offset specified.
719  */
720 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
721 			uint32_t reg, uint32_t v,
722 			uint32_t acc_flags, uint32_t xcc_id)
723 {
724 	uint32_t rlcg_flag;
725 
726 	if (amdgpu_device_skip_hw_access(adev))
727 		return;
728 
729 	if ((reg * 4) < adev->rmmio_size) {
730 		if (amdgpu_sriov_vf(adev) &&
731 		    !amdgpu_sriov_runtime(adev) &&
732 		    adev->gfx.rlc.rlcg_reg_access_supported &&
733 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
734 							 GC_HWIP, true,
735 							 &rlcg_flag)) {
736 			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
737 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
738 		    amdgpu_sriov_runtime(adev) &&
739 		    down_read_trylock(&adev->reset_domain->sem)) {
740 			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
741 			up_read(&adev->reset_domain->sem);
742 		} else {
743 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
744 		}
745 	} else {
746 		adev->pcie_wreg(adev, reg * 4, v);
747 	}
748 }
749 
750 /**
751  * amdgpu_device_indirect_rreg - read an indirect register
752  *
753  * @adev: amdgpu_device pointer
754  * @reg_addr: indirect register address to read from
755  *
756  * Returns the value of indirect register @reg_addr
757  */
758 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
759 				u32 reg_addr)
760 {
761 	unsigned long flags, pcie_index, pcie_data;
762 	void __iomem *pcie_index_offset;
763 	void __iomem *pcie_data_offset;
764 	u32 r;
765 
766 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
767 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
768 
769 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
770 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
771 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
772 
773 	writel(reg_addr, pcie_index_offset);
774 	readl(pcie_index_offset);
775 	r = readl(pcie_data_offset);
776 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
777 
778 	return r;
779 }
780 
781 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
782 				    u64 reg_addr)
783 {
784 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
785 	u32 r;
786 	void __iomem *pcie_index_offset;
787 	void __iomem *pcie_index_hi_offset;
788 	void __iomem *pcie_data_offset;
789 
790 	if (unlikely(!adev->nbio.funcs)) {
791 		pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
792 		pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
793 	} else {
794 		pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
795 		pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
796 	}
797 
798 	if (reg_addr >> 32) {
799 		if (unlikely(!adev->nbio.funcs))
800 			pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
801 		else
802 			pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
803 	} else {
804 		pcie_index_hi = 0;
805 	}
806 
807 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
808 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
809 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
810 	if (pcie_index_hi != 0)
811 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
812 				pcie_index_hi * 4;
813 
814 	writel(reg_addr, pcie_index_offset);
815 	readl(pcie_index_offset);
816 	if (pcie_index_hi != 0) {
817 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
818 		readl(pcie_index_hi_offset);
819 	}
820 	r = readl(pcie_data_offset);
821 
822 	/* clear the high bits */
823 	if (pcie_index_hi != 0) {
824 		writel(0, pcie_index_hi_offset);
825 		readl(pcie_index_hi_offset);
826 	}
827 
828 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
829 
830 	return r;
831 }
832 
833 /**
834  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
835  *
836  * @adev: amdgpu_device pointer
837  * @reg_addr: indirect register address to read from
838  *
839  * Returns the value of indirect register @reg_addr
840  */
841 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
842 				  u32 reg_addr)
843 {
844 	unsigned long flags, pcie_index, pcie_data;
845 	void __iomem *pcie_index_offset;
846 	void __iomem *pcie_data_offset;
847 	u64 r;
848 
849 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
850 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
851 
852 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
853 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
854 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
855 
856 	/* read low 32 bits */
857 	writel(reg_addr, pcie_index_offset);
858 	readl(pcie_index_offset);
859 	r = readl(pcie_data_offset);
860 	/* read high 32 bits */
861 	writel(reg_addr + 4, pcie_index_offset);
862 	readl(pcie_index_offset);
863 	r |= ((u64)readl(pcie_data_offset) << 32);
864 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
865 
866 	return r;
867 }
868 
869 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
870 				  u64 reg_addr)
871 {
872 	unsigned long flags, pcie_index, pcie_data;
873 	unsigned long pcie_index_hi = 0;
874 	void __iomem *pcie_index_offset;
875 	void __iomem *pcie_index_hi_offset;
876 	void __iomem *pcie_data_offset;
877 	u64 r;
878 
879 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
880 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
881 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
882 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
883 
884 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
885 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
886 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
887 	if (pcie_index_hi != 0)
888 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
889 			pcie_index_hi * 4;
890 
891 	/* read low 32 bits */
892 	writel(reg_addr, pcie_index_offset);
893 	readl(pcie_index_offset);
894 	if (pcie_index_hi != 0) {
895 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
896 		readl(pcie_index_hi_offset);
897 	}
898 	r = readl(pcie_data_offset);
899 	/* read high 32 bits */
900 	writel(reg_addr + 4, pcie_index_offset);
901 	readl(pcie_index_offset);
902 	if (pcie_index_hi != 0) {
903 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
904 		readl(pcie_index_hi_offset);
905 	}
906 	r |= ((u64)readl(pcie_data_offset) << 32);
907 
908 	/* clear the high bits */
909 	if (pcie_index_hi != 0) {
910 		writel(0, pcie_index_hi_offset);
911 		readl(pcie_index_hi_offset);
912 	}
913 
914 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
915 
916 	return r;
917 }
918 
919 /**
920  * amdgpu_device_indirect_wreg - write an indirect register address
921  *
922  * @adev: amdgpu_device pointer
923  * @reg_addr: indirect register offset
924  * @reg_data: indirect register data
925  *
926  */
927 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
928 				 u32 reg_addr, u32 reg_data)
929 {
930 	unsigned long flags, pcie_index, pcie_data;
931 	void __iomem *pcie_index_offset;
932 	void __iomem *pcie_data_offset;
933 
934 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
935 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
936 
937 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
938 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
939 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
940 
941 	writel(reg_addr, pcie_index_offset);
942 	readl(pcie_index_offset);
943 	writel(reg_data, pcie_data_offset);
944 	readl(pcie_data_offset);
945 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
946 }
947 
948 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
949 				     u64 reg_addr, u32 reg_data)
950 {
951 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
952 	void __iomem *pcie_index_offset;
953 	void __iomem *pcie_index_hi_offset;
954 	void __iomem *pcie_data_offset;
955 
956 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
957 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
958 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
959 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
960 	else
961 		pcie_index_hi = 0;
962 
963 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
964 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
965 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
966 	if (pcie_index_hi != 0)
967 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
968 				pcie_index_hi * 4;
969 
970 	writel(reg_addr, pcie_index_offset);
971 	readl(pcie_index_offset);
972 	if (pcie_index_hi != 0) {
973 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
974 		readl(pcie_index_hi_offset);
975 	}
976 	writel(reg_data, pcie_data_offset);
977 	readl(pcie_data_offset);
978 
979 	/* clear the high bits */
980 	if (pcie_index_hi != 0) {
981 		writel(0, pcie_index_hi_offset);
982 		readl(pcie_index_hi_offset);
983 	}
984 
985 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
986 }
987 
988 /**
989  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
990  *
991  * @adev: amdgpu_device pointer
992  * @reg_addr: indirect register offset
993  * @reg_data: indirect register data
994  *
995  */
996 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
997 				   u32 reg_addr, u64 reg_data)
998 {
999 	unsigned long flags, pcie_index, pcie_data;
1000 	void __iomem *pcie_index_offset;
1001 	void __iomem *pcie_data_offset;
1002 
1003 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1004 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1005 
1006 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1007 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1008 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1009 
1010 	/* write low 32 bits */
1011 	writel(reg_addr, pcie_index_offset);
1012 	readl(pcie_index_offset);
1013 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1014 	readl(pcie_data_offset);
1015 	/* write high 32 bits */
1016 	writel(reg_addr + 4, pcie_index_offset);
1017 	readl(pcie_index_offset);
1018 	writel((u32)(reg_data >> 32), pcie_data_offset);
1019 	readl(pcie_data_offset);
1020 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1021 }
1022 
1023 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1024 				   u64 reg_addr, u64 reg_data)
1025 {
1026 	unsigned long flags, pcie_index, pcie_data;
1027 	unsigned long pcie_index_hi = 0;
1028 	void __iomem *pcie_index_offset;
1029 	void __iomem *pcie_index_hi_offset;
1030 	void __iomem *pcie_data_offset;
1031 
1032 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1033 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1034 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1035 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1036 
1037 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1038 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1039 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1040 	if (pcie_index_hi != 0)
1041 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1042 				pcie_index_hi * 4;
1043 
1044 	/* write low 32 bits */
1045 	writel(reg_addr, pcie_index_offset);
1046 	readl(pcie_index_offset);
1047 	if (pcie_index_hi != 0) {
1048 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1049 		readl(pcie_index_hi_offset);
1050 	}
1051 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1052 	readl(pcie_data_offset);
1053 	/* write high 32 bits */
1054 	writel(reg_addr + 4, pcie_index_offset);
1055 	readl(pcie_index_offset);
1056 	if (pcie_index_hi != 0) {
1057 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1058 		readl(pcie_index_hi_offset);
1059 	}
1060 	writel((u32)(reg_data >> 32), pcie_data_offset);
1061 	readl(pcie_data_offset);
1062 
1063 	/* clear the high bits */
1064 	if (pcie_index_hi != 0) {
1065 		writel(0, pcie_index_hi_offset);
1066 		readl(pcie_index_hi_offset);
1067 	}
1068 
1069 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1070 }
1071 
1072 /**
1073  * amdgpu_device_get_rev_id - query device rev_id
1074  *
1075  * @adev: amdgpu_device pointer
1076  *
1077  * Return device rev_id
1078  */
1079 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1080 {
1081 	return adev->nbio.funcs->get_rev_id(adev);
1082 }
1083 
1084 /**
1085  * amdgpu_invalid_rreg - dummy reg read function
1086  *
1087  * @adev: amdgpu_device pointer
1088  * @reg: offset of register
1089  *
1090  * Dummy register read function.  Used for register blocks
1091  * that certain asics don't have (all asics).
1092  * Returns the value in the register.
1093  */
1094 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1095 {
1096 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1097 	BUG();
1098 	return 0;
1099 }
1100 
1101 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1102 {
1103 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1104 	BUG();
1105 	return 0;
1106 }
1107 
1108 /**
1109  * amdgpu_invalid_wreg - dummy reg write function
1110  *
1111  * @adev: amdgpu_device pointer
1112  * @reg: offset of register
1113  * @v: value to write to the register
1114  *
1115  * Dummy register read function.  Used for register blocks
1116  * that certain asics don't have (all asics).
1117  */
1118 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1119 {
1120 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1121 		  reg, v);
1122 	BUG();
1123 }
1124 
1125 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1126 {
1127 	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1128 		  reg, v);
1129 	BUG();
1130 }
1131 
1132 /**
1133  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1134  *
1135  * @adev: amdgpu_device pointer
1136  * @reg: offset of register
1137  *
1138  * Dummy register read function.  Used for register blocks
1139  * that certain asics don't have (all asics).
1140  * Returns the value in the register.
1141  */
1142 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1143 {
1144 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1145 	BUG();
1146 	return 0;
1147 }
1148 
1149 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1150 {
1151 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1152 	BUG();
1153 	return 0;
1154 }
1155 
1156 /**
1157  * amdgpu_invalid_wreg64 - dummy reg write function
1158  *
1159  * @adev: amdgpu_device pointer
1160  * @reg: offset of register
1161  * @v: value to write to the register
1162  *
1163  * Dummy register read function.  Used for register blocks
1164  * that certain asics don't have (all asics).
1165  */
1166 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1167 {
1168 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1169 		  reg, v);
1170 	BUG();
1171 }
1172 
1173 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1174 {
1175 	DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1176 		  reg, v);
1177 	BUG();
1178 }
1179 
1180 /**
1181  * amdgpu_block_invalid_rreg - dummy reg read function
1182  *
1183  * @adev: amdgpu_device pointer
1184  * @block: offset of instance
1185  * @reg: offset of register
1186  *
1187  * Dummy register read function.  Used for register blocks
1188  * that certain asics don't have (all asics).
1189  * Returns the value in the register.
1190  */
1191 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1192 					  uint32_t block, uint32_t reg)
1193 {
1194 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1195 		  reg, block);
1196 	BUG();
1197 	return 0;
1198 }
1199 
1200 /**
1201  * amdgpu_block_invalid_wreg - dummy reg write function
1202  *
1203  * @adev: amdgpu_device pointer
1204  * @block: offset of instance
1205  * @reg: offset of register
1206  * @v: value to write to the register
1207  *
1208  * Dummy register read function.  Used for register blocks
1209  * that certain asics don't have (all asics).
1210  */
1211 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1212 				      uint32_t block,
1213 				      uint32_t reg, uint32_t v)
1214 {
1215 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1216 		  reg, block, v);
1217 	BUG();
1218 }
1219 
1220 /**
1221  * amdgpu_device_asic_init - Wrapper for atom asic_init
1222  *
1223  * @adev: amdgpu_device pointer
1224  *
1225  * Does any asic specific work and then calls atom asic init.
1226  */
1227 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1228 {
1229 	int ret;
1230 
1231 	amdgpu_asic_pre_asic_init(adev);
1232 
1233 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1234 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1235 		amdgpu_psp_wait_for_bootloader(adev);
1236 		ret = amdgpu_atomfirmware_asic_init(adev, true);
1237 		return ret;
1238 	} else {
1239 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1240 	}
1241 
1242 	return 0;
1243 }
1244 
1245 /**
1246  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1247  *
1248  * @adev: amdgpu_device pointer
1249  *
1250  * Allocates a scratch page of VRAM for use by various things in the
1251  * driver.
1252  */
1253 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1254 {
1255 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1256 				       AMDGPU_GEM_DOMAIN_VRAM |
1257 				       AMDGPU_GEM_DOMAIN_GTT,
1258 				       &adev->mem_scratch.robj,
1259 				       &adev->mem_scratch.gpu_addr,
1260 				       (void **)&adev->mem_scratch.ptr);
1261 }
1262 
1263 /**
1264  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1265  *
1266  * @adev: amdgpu_device pointer
1267  *
1268  * Frees the VRAM scratch page.
1269  */
1270 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1271 {
1272 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1273 }
1274 
1275 /**
1276  * amdgpu_device_program_register_sequence - program an array of registers.
1277  *
1278  * @adev: amdgpu_device pointer
1279  * @registers: pointer to the register array
1280  * @array_size: size of the register array
1281  *
1282  * Programs an array or registers with and or masks.
1283  * This is a helper for setting golden registers.
1284  */
1285 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1286 					     const u32 *registers,
1287 					     const u32 array_size)
1288 {
1289 	u32 tmp, reg, and_mask, or_mask;
1290 	int i;
1291 
1292 	if (array_size % 3)
1293 		return;
1294 
1295 	for (i = 0; i < array_size; i += 3) {
1296 		reg = registers[i + 0];
1297 		and_mask = registers[i + 1];
1298 		or_mask = registers[i + 2];
1299 
1300 		if (and_mask == 0xffffffff) {
1301 			tmp = or_mask;
1302 		} else {
1303 			tmp = RREG32(reg);
1304 			tmp &= ~and_mask;
1305 			if (adev->family >= AMDGPU_FAMILY_AI)
1306 				tmp |= (or_mask & and_mask);
1307 			else
1308 				tmp |= or_mask;
1309 		}
1310 		WREG32(reg, tmp);
1311 	}
1312 }
1313 
1314 /**
1315  * amdgpu_device_pci_config_reset - reset the GPU
1316  *
1317  * @adev: amdgpu_device pointer
1318  *
1319  * Resets the GPU using the pci config reset sequence.
1320  * Only applicable to asics prior to vega10.
1321  */
1322 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1323 {
1324 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1325 }
1326 
1327 /**
1328  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1329  *
1330  * @adev: amdgpu_device pointer
1331  *
1332  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1333  */
1334 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1335 {
1336 	return pci_reset_function(adev->pdev);
1337 }
1338 
1339 /*
1340  * amdgpu_device_wb_*()
1341  * Writeback is the method by which the GPU updates special pages in memory
1342  * with the status of certain GPU events (fences, ring pointers,etc.).
1343  */
1344 
1345 /**
1346  * amdgpu_device_wb_fini - Disable Writeback and free memory
1347  *
1348  * @adev: amdgpu_device pointer
1349  *
1350  * Disables Writeback and frees the Writeback memory (all asics).
1351  * Used at driver shutdown.
1352  */
1353 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1354 {
1355 	if (adev->wb.wb_obj) {
1356 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1357 				      &adev->wb.gpu_addr,
1358 				      (void **)&adev->wb.wb);
1359 		adev->wb.wb_obj = NULL;
1360 	}
1361 }
1362 
1363 /**
1364  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1365  *
1366  * @adev: amdgpu_device pointer
1367  *
1368  * Initializes writeback and allocates writeback memory (all asics).
1369  * Used at driver startup.
1370  * Returns 0 on success or an -error on failure.
1371  */
1372 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1373 {
1374 	int r;
1375 
1376 	if (adev->wb.wb_obj == NULL) {
1377 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1378 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1379 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1380 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1381 					    (void **)&adev->wb.wb);
1382 		if (r) {
1383 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1384 			return r;
1385 		}
1386 
1387 		adev->wb.num_wb = AMDGPU_MAX_WB;
1388 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1389 
1390 		/* clear wb memory */
1391 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1392 	}
1393 
1394 	return 0;
1395 }
1396 
1397 /**
1398  * amdgpu_device_wb_get - Allocate a wb entry
1399  *
1400  * @adev: amdgpu_device pointer
1401  * @wb: wb index
1402  *
1403  * Allocate a wb slot for use by the driver (all asics).
1404  * Returns 0 on success or -EINVAL on failure.
1405  */
1406 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1407 {
1408 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1409 
1410 	if (offset < adev->wb.num_wb) {
1411 		__set_bit(offset, adev->wb.used);
1412 		*wb = offset << 3; /* convert to dw offset */
1413 		return 0;
1414 	} else {
1415 		return -EINVAL;
1416 	}
1417 }
1418 
1419 /**
1420  * amdgpu_device_wb_free - Free a wb entry
1421  *
1422  * @adev: amdgpu_device pointer
1423  * @wb: wb index
1424  *
1425  * Free a wb slot allocated for use by the driver (all asics)
1426  */
1427 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1428 {
1429 	wb >>= 3;
1430 	if (wb < adev->wb.num_wb)
1431 		__clear_bit(wb, adev->wb.used);
1432 }
1433 
1434 /**
1435  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1436  *
1437  * @adev: amdgpu_device pointer
1438  *
1439  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1440  * to fail, but if any of the BARs is not accessible after the size we abort
1441  * driver loading by returning -ENODEV.
1442  */
1443 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1444 {
1445 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1446 	struct pci_bus *root;
1447 	struct resource *res;
1448 	unsigned int i;
1449 	u16 cmd;
1450 	int r;
1451 
1452 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1453 		return 0;
1454 
1455 	/* Bypass for VF */
1456 	if (amdgpu_sriov_vf(adev))
1457 		return 0;
1458 
1459 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1460 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1461 		DRM_WARN("System can't access extended configuration space,please check!!\n");
1462 
1463 	/* skip if the bios has already enabled large BAR */
1464 	if (adev->gmc.real_vram_size &&
1465 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1466 		return 0;
1467 
1468 	/* Check if the root BUS has 64bit memory resources */
1469 	root = adev->pdev->bus;
1470 	while (root->parent)
1471 		root = root->parent;
1472 
1473 	pci_bus_for_each_resource(root, res, i) {
1474 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1475 		    res->start > 0x100000000ull)
1476 			break;
1477 	}
1478 
1479 	/* Trying to resize is pointless without a root hub window above 4GB */
1480 	if (!res)
1481 		return 0;
1482 
1483 	/* Limit the BAR size to what is available */
1484 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1485 			rbar_size);
1486 
1487 	/* Disable memory decoding while we change the BAR addresses and size */
1488 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1489 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1490 			      cmd & ~PCI_COMMAND_MEMORY);
1491 
1492 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1493 	amdgpu_doorbell_fini(adev);
1494 	if (adev->asic_type >= CHIP_BONAIRE)
1495 		pci_release_resource(adev->pdev, 2);
1496 
1497 	pci_release_resource(adev->pdev, 0);
1498 
1499 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1500 	if (r == -ENOSPC)
1501 		DRM_INFO("Not enough PCI address space for a large BAR.");
1502 	else if (r && r != -ENOTSUPP)
1503 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1504 
1505 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1506 
1507 	/* When the doorbell or fb BAR isn't available we have no chance of
1508 	 * using the device.
1509 	 */
1510 	r = amdgpu_doorbell_init(adev);
1511 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1512 		return -ENODEV;
1513 
1514 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1515 
1516 	return 0;
1517 }
1518 
1519 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1520 {
1521 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1522 		return false;
1523 
1524 	return true;
1525 }
1526 
1527 /*
1528  * GPU helpers function.
1529  */
1530 /**
1531  * amdgpu_device_need_post - check if the hw need post or not
1532  *
1533  * @adev: amdgpu_device pointer
1534  *
1535  * Check if the asic has been initialized (all asics) at driver startup
1536  * or post is needed if  hw reset is performed.
1537  * Returns true if need or false if not.
1538  */
1539 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1540 {
1541 	uint32_t reg;
1542 
1543 	if (amdgpu_sriov_vf(adev))
1544 		return false;
1545 
1546 	if (!amdgpu_device_read_bios(adev))
1547 		return false;
1548 
1549 	if (amdgpu_passthrough(adev)) {
1550 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1551 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1552 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1553 		 * vpost executed for smc version below 22.15
1554 		 */
1555 		if (adev->asic_type == CHIP_FIJI) {
1556 			int err;
1557 			uint32_t fw_ver;
1558 
1559 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1560 			/* force vPost if error occured */
1561 			if (err)
1562 				return true;
1563 
1564 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1565 			release_firmware(adev->pm.fw);
1566 			if (fw_ver < 0x00160e00)
1567 				return true;
1568 		}
1569 	}
1570 
1571 	/* Don't post if we need to reset whole hive on init */
1572 	if (adev->gmc.xgmi.pending_reset)
1573 		return false;
1574 
1575 	if (adev->has_hw_reset) {
1576 		adev->has_hw_reset = false;
1577 		return true;
1578 	}
1579 
1580 	/* bios scratch used on CIK+ */
1581 	if (adev->asic_type >= CHIP_BONAIRE)
1582 		return amdgpu_atombios_scratch_need_asic_init(adev);
1583 
1584 	/* check MEM_SIZE for older asics */
1585 	reg = amdgpu_asic_get_config_memsize(adev);
1586 
1587 	if ((reg != 0) && (reg != 0xffffffff))
1588 		return false;
1589 
1590 	return true;
1591 }
1592 
1593 /*
1594  * Check whether seamless boot is supported.
1595  *
1596  * So far we only support seamless boot on DCE 3.0 or later.
1597  * If users report that it works on older ASICS as well, we may
1598  * loosen this.
1599  */
1600 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1601 {
1602 	switch (amdgpu_seamless) {
1603 	case -1:
1604 		break;
1605 	case 1:
1606 		return true;
1607 	case 0:
1608 		return false;
1609 	default:
1610 		DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1611 			  amdgpu_seamless);
1612 		return false;
1613 	}
1614 
1615 	if (!(adev->flags & AMD_IS_APU))
1616 		return false;
1617 
1618 	if (adev->mman.keep_stolen_vga_memory)
1619 		return false;
1620 
1621 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1622 }
1623 
1624 /*
1625  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1626  * don't support dynamic speed switching. Until we have confirmation from Intel
1627  * that a specific host supports it, it's safer that we keep it disabled for all.
1628  *
1629  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1630  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1631  */
1632 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1633 {
1634 #if IS_ENABLED(CONFIG_X86)
1635 	struct cpuinfo_x86 *c = &cpu_data(0);
1636 
1637 	/* eGPU change speeds based on USB4 fabric conditions */
1638 	if (dev_is_removable(adev->dev))
1639 		return true;
1640 
1641 	if (c->x86_vendor == X86_VENDOR_INTEL)
1642 		return false;
1643 #endif
1644 	return true;
1645 }
1646 
1647 /**
1648  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1649  *
1650  * @adev: amdgpu_device pointer
1651  *
1652  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1653  * be set for this device.
1654  *
1655  * Returns true if it should be used or false if not.
1656  */
1657 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1658 {
1659 	switch (amdgpu_aspm) {
1660 	case -1:
1661 		break;
1662 	case 0:
1663 		return false;
1664 	case 1:
1665 		return true;
1666 	default:
1667 		return false;
1668 	}
1669 	if (adev->flags & AMD_IS_APU)
1670 		return false;
1671 	if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1672 		return false;
1673 	return pcie_aspm_enabled(adev->pdev);
1674 }
1675 
1676 /* if we get transitioned to only one device, take VGA back */
1677 /**
1678  * amdgpu_device_vga_set_decode - enable/disable vga decode
1679  *
1680  * @pdev: PCI device pointer
1681  * @state: enable/disable vga decode
1682  *
1683  * Enable/disable vga decode (all asics).
1684  * Returns VGA resource flags.
1685  */
1686 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1687 		bool state)
1688 {
1689 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1690 
1691 	amdgpu_asic_set_vga_state(adev, state);
1692 	if (state)
1693 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1694 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1695 	else
1696 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1697 }
1698 
1699 /**
1700  * amdgpu_device_check_block_size - validate the vm block size
1701  *
1702  * @adev: amdgpu_device pointer
1703  *
1704  * Validates the vm block size specified via module parameter.
1705  * The vm block size defines number of bits in page table versus page directory,
1706  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1707  * page table and the remaining bits are in the page directory.
1708  */
1709 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1710 {
1711 	/* defines number of bits in page table versus page directory,
1712 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1713 	 * page table and the remaining bits are in the page directory
1714 	 */
1715 	if (amdgpu_vm_block_size == -1)
1716 		return;
1717 
1718 	if (amdgpu_vm_block_size < 9) {
1719 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1720 			 amdgpu_vm_block_size);
1721 		amdgpu_vm_block_size = -1;
1722 	}
1723 }
1724 
1725 /**
1726  * amdgpu_device_check_vm_size - validate the vm size
1727  *
1728  * @adev: amdgpu_device pointer
1729  *
1730  * Validates the vm size in GB specified via module parameter.
1731  * The VM size is the size of the GPU virtual memory space in GB.
1732  */
1733 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1734 {
1735 	/* no need to check the default value */
1736 	if (amdgpu_vm_size == -1)
1737 		return;
1738 
1739 	if (amdgpu_vm_size < 1) {
1740 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1741 			 amdgpu_vm_size);
1742 		amdgpu_vm_size = -1;
1743 	}
1744 }
1745 
1746 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1747 {
1748 	struct sysinfo si;
1749 	bool is_os_64 = (sizeof(void *) == 8);
1750 	uint64_t total_memory;
1751 	uint64_t dram_size_seven_GB = 0x1B8000000;
1752 	uint64_t dram_size_three_GB = 0xB8000000;
1753 
1754 	if (amdgpu_smu_memory_pool_size == 0)
1755 		return;
1756 
1757 	if (!is_os_64) {
1758 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1759 		goto def_value;
1760 	}
1761 	si_meminfo(&si);
1762 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1763 
1764 	if ((amdgpu_smu_memory_pool_size == 1) ||
1765 		(amdgpu_smu_memory_pool_size == 2)) {
1766 		if (total_memory < dram_size_three_GB)
1767 			goto def_value1;
1768 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1769 		(amdgpu_smu_memory_pool_size == 8)) {
1770 		if (total_memory < dram_size_seven_GB)
1771 			goto def_value1;
1772 	} else {
1773 		DRM_WARN("Smu memory pool size not supported\n");
1774 		goto def_value;
1775 	}
1776 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1777 
1778 	return;
1779 
1780 def_value1:
1781 	DRM_WARN("No enough system memory\n");
1782 def_value:
1783 	adev->pm.smu_prv_buffer_size = 0;
1784 }
1785 
1786 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1787 {
1788 	if (!(adev->flags & AMD_IS_APU) ||
1789 	    adev->asic_type < CHIP_RAVEN)
1790 		return 0;
1791 
1792 	switch (adev->asic_type) {
1793 	case CHIP_RAVEN:
1794 		if (adev->pdev->device == 0x15dd)
1795 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1796 		if (adev->pdev->device == 0x15d8)
1797 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1798 		break;
1799 	case CHIP_RENOIR:
1800 		if ((adev->pdev->device == 0x1636) ||
1801 		    (adev->pdev->device == 0x164c))
1802 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1803 		else
1804 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1805 		break;
1806 	case CHIP_VANGOGH:
1807 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1808 		break;
1809 	case CHIP_YELLOW_CARP:
1810 		break;
1811 	case CHIP_CYAN_SKILLFISH:
1812 		if ((adev->pdev->device == 0x13FE) ||
1813 		    (adev->pdev->device == 0x143F))
1814 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1815 		break;
1816 	default:
1817 		break;
1818 	}
1819 
1820 	return 0;
1821 }
1822 
1823 /**
1824  * amdgpu_device_check_arguments - validate module params
1825  *
1826  * @adev: amdgpu_device pointer
1827  *
1828  * Validates certain module parameters and updates
1829  * the associated values used by the driver (all asics).
1830  */
1831 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1832 {
1833 	if (amdgpu_sched_jobs < 4) {
1834 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1835 			 amdgpu_sched_jobs);
1836 		amdgpu_sched_jobs = 4;
1837 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1838 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1839 			 amdgpu_sched_jobs);
1840 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1841 	}
1842 
1843 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1844 		/* gart size must be greater or equal to 32M */
1845 		dev_warn(adev->dev, "gart size (%d) too small\n",
1846 			 amdgpu_gart_size);
1847 		amdgpu_gart_size = -1;
1848 	}
1849 
1850 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1851 		/* gtt size must be greater or equal to 32M */
1852 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1853 				 amdgpu_gtt_size);
1854 		amdgpu_gtt_size = -1;
1855 	}
1856 
1857 	/* valid range is between 4 and 9 inclusive */
1858 	if (amdgpu_vm_fragment_size != -1 &&
1859 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1860 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1861 		amdgpu_vm_fragment_size = -1;
1862 	}
1863 
1864 	if (amdgpu_sched_hw_submission < 2) {
1865 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1866 			 amdgpu_sched_hw_submission);
1867 		amdgpu_sched_hw_submission = 2;
1868 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1869 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1870 			 amdgpu_sched_hw_submission);
1871 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1872 	}
1873 
1874 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1875 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1876 		amdgpu_reset_method = -1;
1877 	}
1878 
1879 	amdgpu_device_check_smu_prv_buffer_size(adev);
1880 
1881 	amdgpu_device_check_vm_size(adev);
1882 
1883 	amdgpu_device_check_block_size(adev);
1884 
1885 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1886 
1887 	return 0;
1888 }
1889 
1890 /**
1891  * amdgpu_switcheroo_set_state - set switcheroo state
1892  *
1893  * @pdev: pci dev pointer
1894  * @state: vga_switcheroo state
1895  *
1896  * Callback for the switcheroo driver.  Suspends or resumes
1897  * the asics before or after it is powered up using ACPI methods.
1898  */
1899 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1900 					enum vga_switcheroo_state state)
1901 {
1902 	struct drm_device *dev = pci_get_drvdata(pdev);
1903 	int r;
1904 
1905 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1906 		return;
1907 
1908 	if (state == VGA_SWITCHEROO_ON) {
1909 		pr_info("switched on\n");
1910 		/* don't suspend or resume card normally */
1911 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1912 
1913 		pci_set_power_state(pdev, PCI_D0);
1914 		amdgpu_device_load_pci_state(pdev);
1915 		r = pci_enable_device(pdev);
1916 		if (r)
1917 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1918 		amdgpu_device_resume(dev, true);
1919 
1920 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1921 	} else {
1922 		pr_info("switched off\n");
1923 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1924 		amdgpu_device_prepare(dev);
1925 		amdgpu_device_suspend(dev, true);
1926 		amdgpu_device_cache_pci_state(pdev);
1927 		/* Shut down the device */
1928 		pci_disable_device(pdev);
1929 		pci_set_power_state(pdev, PCI_D3cold);
1930 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1931 	}
1932 }
1933 
1934 /**
1935  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1936  *
1937  * @pdev: pci dev pointer
1938  *
1939  * Callback for the switcheroo driver.  Check of the switcheroo
1940  * state can be changed.
1941  * Returns true if the state can be changed, false if not.
1942  */
1943 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1944 {
1945 	struct drm_device *dev = pci_get_drvdata(pdev);
1946 
1947        /*
1948 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1949 	* locking inversion with the driver load path. And the access here is
1950 	* completely racy anyway. So don't bother with locking for now.
1951 	*/
1952 	return atomic_read(&dev->open_count) == 0;
1953 }
1954 
1955 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1956 	.set_gpu_state = amdgpu_switcheroo_set_state,
1957 	.reprobe = NULL,
1958 	.can_switch = amdgpu_switcheroo_can_switch,
1959 };
1960 
1961 /**
1962  * amdgpu_device_ip_set_clockgating_state - set the CG state
1963  *
1964  * @dev: amdgpu_device pointer
1965  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1966  * @state: clockgating state (gate or ungate)
1967  *
1968  * Sets the requested clockgating state for all instances of
1969  * the hardware IP specified.
1970  * Returns the error code from the last instance.
1971  */
1972 int amdgpu_device_ip_set_clockgating_state(void *dev,
1973 					   enum amd_ip_block_type block_type,
1974 					   enum amd_clockgating_state state)
1975 {
1976 	struct amdgpu_device *adev = dev;
1977 	int i, r = 0;
1978 
1979 	for (i = 0; i < adev->num_ip_blocks; i++) {
1980 		if (!adev->ip_blocks[i].status.valid)
1981 			continue;
1982 		if (adev->ip_blocks[i].version->type != block_type)
1983 			continue;
1984 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1985 			continue;
1986 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1987 			(void *)adev, state);
1988 		if (r)
1989 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1990 				  adev->ip_blocks[i].version->funcs->name, r);
1991 	}
1992 	return r;
1993 }
1994 
1995 /**
1996  * amdgpu_device_ip_set_powergating_state - set the PG state
1997  *
1998  * @dev: amdgpu_device pointer
1999  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2000  * @state: powergating state (gate or ungate)
2001  *
2002  * Sets the requested powergating state for all instances of
2003  * the hardware IP specified.
2004  * Returns the error code from the last instance.
2005  */
2006 int amdgpu_device_ip_set_powergating_state(void *dev,
2007 					   enum amd_ip_block_type block_type,
2008 					   enum amd_powergating_state state)
2009 {
2010 	struct amdgpu_device *adev = dev;
2011 	int i, r = 0;
2012 
2013 	for (i = 0; i < adev->num_ip_blocks; i++) {
2014 		if (!adev->ip_blocks[i].status.valid)
2015 			continue;
2016 		if (adev->ip_blocks[i].version->type != block_type)
2017 			continue;
2018 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2019 			continue;
2020 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2021 			(void *)adev, state);
2022 		if (r)
2023 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2024 				  adev->ip_blocks[i].version->funcs->name, r);
2025 	}
2026 	return r;
2027 }
2028 
2029 /**
2030  * amdgpu_device_ip_get_clockgating_state - get the CG state
2031  *
2032  * @adev: amdgpu_device pointer
2033  * @flags: clockgating feature flags
2034  *
2035  * Walks the list of IPs on the device and updates the clockgating
2036  * flags for each IP.
2037  * Updates @flags with the feature flags for each hardware IP where
2038  * clockgating is enabled.
2039  */
2040 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2041 					    u64 *flags)
2042 {
2043 	int i;
2044 
2045 	for (i = 0; i < adev->num_ip_blocks; i++) {
2046 		if (!adev->ip_blocks[i].status.valid)
2047 			continue;
2048 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2049 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2050 	}
2051 }
2052 
2053 /**
2054  * amdgpu_device_ip_wait_for_idle - wait for idle
2055  *
2056  * @adev: amdgpu_device pointer
2057  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2058  *
2059  * Waits for the request hardware IP to be idle.
2060  * Returns 0 for success or a negative error code on failure.
2061  */
2062 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2063 				   enum amd_ip_block_type block_type)
2064 {
2065 	int i, r;
2066 
2067 	for (i = 0; i < adev->num_ip_blocks; i++) {
2068 		if (!adev->ip_blocks[i].status.valid)
2069 			continue;
2070 		if (adev->ip_blocks[i].version->type == block_type) {
2071 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2072 			if (r)
2073 				return r;
2074 			break;
2075 		}
2076 	}
2077 	return 0;
2078 
2079 }
2080 
2081 /**
2082  * amdgpu_device_ip_is_idle - is the hardware IP idle
2083  *
2084  * @adev: amdgpu_device pointer
2085  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2086  *
2087  * Check if the hardware IP is idle or not.
2088  * Returns true if it the IP is idle, false if not.
2089  */
2090 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2091 			      enum amd_ip_block_type block_type)
2092 {
2093 	int i;
2094 
2095 	for (i = 0; i < adev->num_ip_blocks; i++) {
2096 		if (!adev->ip_blocks[i].status.valid)
2097 			continue;
2098 		if (adev->ip_blocks[i].version->type == block_type)
2099 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2100 	}
2101 	return true;
2102 
2103 }
2104 
2105 /**
2106  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2107  *
2108  * @adev: amdgpu_device pointer
2109  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2110  *
2111  * Returns a pointer to the hardware IP block structure
2112  * if it exists for the asic, otherwise NULL.
2113  */
2114 struct amdgpu_ip_block *
2115 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2116 			      enum amd_ip_block_type type)
2117 {
2118 	int i;
2119 
2120 	for (i = 0; i < adev->num_ip_blocks; i++)
2121 		if (adev->ip_blocks[i].version->type == type)
2122 			return &adev->ip_blocks[i];
2123 
2124 	return NULL;
2125 }
2126 
2127 /**
2128  * amdgpu_device_ip_block_version_cmp
2129  *
2130  * @adev: amdgpu_device pointer
2131  * @type: enum amd_ip_block_type
2132  * @major: major version
2133  * @minor: minor version
2134  *
2135  * return 0 if equal or greater
2136  * return 1 if smaller or the ip_block doesn't exist
2137  */
2138 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2139 				       enum amd_ip_block_type type,
2140 				       u32 major, u32 minor)
2141 {
2142 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2143 
2144 	if (ip_block && ((ip_block->version->major > major) ||
2145 			((ip_block->version->major == major) &&
2146 			(ip_block->version->minor >= minor))))
2147 		return 0;
2148 
2149 	return 1;
2150 }
2151 
2152 /**
2153  * amdgpu_device_ip_block_add
2154  *
2155  * @adev: amdgpu_device pointer
2156  * @ip_block_version: pointer to the IP to add
2157  *
2158  * Adds the IP block driver information to the collection of IPs
2159  * on the asic.
2160  */
2161 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2162 			       const struct amdgpu_ip_block_version *ip_block_version)
2163 {
2164 	if (!ip_block_version)
2165 		return -EINVAL;
2166 
2167 	switch (ip_block_version->type) {
2168 	case AMD_IP_BLOCK_TYPE_VCN:
2169 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2170 			return 0;
2171 		break;
2172 	case AMD_IP_BLOCK_TYPE_JPEG:
2173 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2174 			return 0;
2175 		break;
2176 	default:
2177 		break;
2178 	}
2179 
2180 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2181 		  ip_block_version->funcs->name);
2182 
2183 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2184 
2185 	return 0;
2186 }
2187 
2188 /**
2189  * amdgpu_device_enable_virtual_display - enable virtual display feature
2190  *
2191  * @adev: amdgpu_device pointer
2192  *
2193  * Enabled the virtual display feature if the user has enabled it via
2194  * the module parameter virtual_display.  This feature provides a virtual
2195  * display hardware on headless boards or in virtualized environments.
2196  * This function parses and validates the configuration string specified by
2197  * the user and configues the virtual display configuration (number of
2198  * virtual connectors, crtcs, etc.) specified.
2199  */
2200 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2201 {
2202 	adev->enable_virtual_display = false;
2203 
2204 	if (amdgpu_virtual_display) {
2205 		const char *pci_address_name = pci_name(adev->pdev);
2206 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2207 
2208 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2209 		pciaddstr_tmp = pciaddstr;
2210 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2211 			pciaddname = strsep(&pciaddname_tmp, ",");
2212 			if (!strcmp("all", pciaddname)
2213 			    || !strcmp(pci_address_name, pciaddname)) {
2214 				long num_crtc;
2215 				int res = -1;
2216 
2217 				adev->enable_virtual_display = true;
2218 
2219 				if (pciaddname_tmp)
2220 					res = kstrtol(pciaddname_tmp, 10,
2221 						      &num_crtc);
2222 
2223 				if (!res) {
2224 					if (num_crtc < 1)
2225 						num_crtc = 1;
2226 					if (num_crtc > 6)
2227 						num_crtc = 6;
2228 					adev->mode_info.num_crtc = num_crtc;
2229 				} else {
2230 					adev->mode_info.num_crtc = 1;
2231 				}
2232 				break;
2233 			}
2234 		}
2235 
2236 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2237 			 amdgpu_virtual_display, pci_address_name,
2238 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2239 
2240 		kfree(pciaddstr);
2241 	}
2242 }
2243 
2244 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2245 {
2246 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2247 		adev->mode_info.num_crtc = 1;
2248 		adev->enable_virtual_display = true;
2249 		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2250 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2251 	}
2252 }
2253 
2254 /**
2255  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2256  *
2257  * @adev: amdgpu_device pointer
2258  *
2259  * Parses the asic configuration parameters specified in the gpu info
2260  * firmware and makes them availale to the driver for use in configuring
2261  * the asic.
2262  * Returns 0 on success, -EINVAL on failure.
2263  */
2264 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2265 {
2266 	const char *chip_name;
2267 	char fw_name[40];
2268 	int err;
2269 	const struct gpu_info_firmware_header_v1_0 *hdr;
2270 
2271 	adev->firmware.gpu_info_fw = NULL;
2272 
2273 	if (adev->mman.discovery_bin)
2274 		return 0;
2275 
2276 	switch (adev->asic_type) {
2277 	default:
2278 		return 0;
2279 	case CHIP_VEGA10:
2280 		chip_name = "vega10";
2281 		break;
2282 	case CHIP_VEGA12:
2283 		chip_name = "vega12";
2284 		break;
2285 	case CHIP_RAVEN:
2286 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2287 			chip_name = "raven2";
2288 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2289 			chip_name = "picasso";
2290 		else
2291 			chip_name = "raven";
2292 		break;
2293 	case CHIP_ARCTURUS:
2294 		chip_name = "arcturus";
2295 		break;
2296 	case CHIP_NAVI12:
2297 		chip_name = "navi12";
2298 		break;
2299 	}
2300 
2301 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2302 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
2303 	if (err) {
2304 		dev_err(adev->dev,
2305 			"Failed to get gpu_info firmware \"%s\"\n",
2306 			fw_name);
2307 		goto out;
2308 	}
2309 
2310 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2311 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2312 
2313 	switch (hdr->version_major) {
2314 	case 1:
2315 	{
2316 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2317 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2318 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2319 
2320 		/*
2321 		 * Should be droped when DAL no longer needs it.
2322 		 */
2323 		if (adev->asic_type == CHIP_NAVI12)
2324 			goto parse_soc_bounding_box;
2325 
2326 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2327 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2328 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2329 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2330 		adev->gfx.config.max_texture_channel_caches =
2331 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2332 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2333 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2334 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2335 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2336 		adev->gfx.config.double_offchip_lds_buf =
2337 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2338 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2339 		adev->gfx.cu_info.max_waves_per_simd =
2340 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2341 		adev->gfx.cu_info.max_scratch_slots_per_cu =
2342 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2343 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2344 		if (hdr->version_minor >= 1) {
2345 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2346 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2347 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2348 			adev->gfx.config.num_sc_per_sh =
2349 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2350 			adev->gfx.config.num_packer_per_sc =
2351 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2352 		}
2353 
2354 parse_soc_bounding_box:
2355 		/*
2356 		 * soc bounding box info is not integrated in disocovery table,
2357 		 * we always need to parse it from gpu info firmware if needed.
2358 		 */
2359 		if (hdr->version_minor == 2) {
2360 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2361 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2362 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2363 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2364 		}
2365 		break;
2366 	}
2367 	default:
2368 		dev_err(adev->dev,
2369 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2370 		err = -EINVAL;
2371 		goto out;
2372 	}
2373 out:
2374 	return err;
2375 }
2376 
2377 /**
2378  * amdgpu_device_ip_early_init - run early init for hardware IPs
2379  *
2380  * @adev: amdgpu_device pointer
2381  *
2382  * Early initialization pass for hardware IPs.  The hardware IPs that make
2383  * up each asic are discovered each IP's early_init callback is run.  This
2384  * is the first stage in initializing the asic.
2385  * Returns 0 on success, negative error code on failure.
2386  */
2387 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2388 {
2389 	struct pci_dev *parent;
2390 	int i, r;
2391 	bool total;
2392 
2393 	amdgpu_device_enable_virtual_display(adev);
2394 
2395 	if (amdgpu_sriov_vf(adev)) {
2396 		r = amdgpu_virt_request_full_gpu(adev, true);
2397 		if (r)
2398 			return r;
2399 	}
2400 
2401 	switch (adev->asic_type) {
2402 #ifdef CONFIG_DRM_AMDGPU_SI
2403 	case CHIP_VERDE:
2404 	case CHIP_TAHITI:
2405 	case CHIP_PITCAIRN:
2406 	case CHIP_OLAND:
2407 	case CHIP_HAINAN:
2408 		adev->family = AMDGPU_FAMILY_SI;
2409 		r = si_set_ip_blocks(adev);
2410 		if (r)
2411 			return r;
2412 		break;
2413 #endif
2414 #ifdef CONFIG_DRM_AMDGPU_CIK
2415 	case CHIP_BONAIRE:
2416 	case CHIP_HAWAII:
2417 	case CHIP_KAVERI:
2418 	case CHIP_KABINI:
2419 	case CHIP_MULLINS:
2420 		if (adev->flags & AMD_IS_APU)
2421 			adev->family = AMDGPU_FAMILY_KV;
2422 		else
2423 			adev->family = AMDGPU_FAMILY_CI;
2424 
2425 		r = cik_set_ip_blocks(adev);
2426 		if (r)
2427 			return r;
2428 		break;
2429 #endif
2430 	case CHIP_TOPAZ:
2431 	case CHIP_TONGA:
2432 	case CHIP_FIJI:
2433 	case CHIP_POLARIS10:
2434 	case CHIP_POLARIS11:
2435 	case CHIP_POLARIS12:
2436 	case CHIP_VEGAM:
2437 	case CHIP_CARRIZO:
2438 	case CHIP_STONEY:
2439 		if (adev->flags & AMD_IS_APU)
2440 			adev->family = AMDGPU_FAMILY_CZ;
2441 		else
2442 			adev->family = AMDGPU_FAMILY_VI;
2443 
2444 		r = vi_set_ip_blocks(adev);
2445 		if (r)
2446 			return r;
2447 		break;
2448 	default:
2449 		r = amdgpu_discovery_set_ip_blocks(adev);
2450 		if (r)
2451 			return r;
2452 		break;
2453 	}
2454 
2455 	if (amdgpu_has_atpx() &&
2456 	    (amdgpu_is_atpx_hybrid() ||
2457 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2458 	    ((adev->flags & AMD_IS_APU) == 0) &&
2459 	    !dev_is_removable(&adev->pdev->dev))
2460 		adev->flags |= AMD_IS_PX;
2461 
2462 	if (!(adev->flags & AMD_IS_APU)) {
2463 		parent = pcie_find_root_port(adev->pdev);
2464 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2465 	}
2466 
2467 
2468 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2469 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2470 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2471 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2472 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2473 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2474 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2475 
2476 	total = true;
2477 	for (i = 0; i < adev->num_ip_blocks; i++) {
2478 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2479 			DRM_WARN("disabled ip block: %d <%s>\n",
2480 				  i, adev->ip_blocks[i].version->funcs->name);
2481 			adev->ip_blocks[i].status.valid = false;
2482 		} else {
2483 			if (adev->ip_blocks[i].version->funcs->early_init) {
2484 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2485 				if (r == -ENOENT) {
2486 					adev->ip_blocks[i].status.valid = false;
2487 				} else if (r) {
2488 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2489 						  adev->ip_blocks[i].version->funcs->name, r);
2490 					total = false;
2491 				} else {
2492 					adev->ip_blocks[i].status.valid = true;
2493 				}
2494 			} else {
2495 				adev->ip_blocks[i].status.valid = true;
2496 			}
2497 		}
2498 		/* get the vbios after the asic_funcs are set up */
2499 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2500 			r = amdgpu_device_parse_gpu_info_fw(adev);
2501 			if (r)
2502 				return r;
2503 
2504 			/* Read BIOS */
2505 			if (amdgpu_device_read_bios(adev)) {
2506 				if (!amdgpu_get_bios(adev))
2507 					return -EINVAL;
2508 
2509 				r = amdgpu_atombios_init(adev);
2510 				if (r) {
2511 					dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2512 					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2513 					return r;
2514 				}
2515 			}
2516 
2517 			/*get pf2vf msg info at it's earliest time*/
2518 			if (amdgpu_sriov_vf(adev))
2519 				amdgpu_virt_init_data_exchange(adev);
2520 
2521 		}
2522 	}
2523 	if (!total)
2524 		return -ENODEV;
2525 
2526 	amdgpu_amdkfd_device_probe(adev);
2527 	adev->cg_flags &= amdgpu_cg_mask;
2528 	adev->pg_flags &= amdgpu_pg_mask;
2529 
2530 	return 0;
2531 }
2532 
2533 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2534 {
2535 	int i, r;
2536 
2537 	for (i = 0; i < adev->num_ip_blocks; i++) {
2538 		if (!adev->ip_blocks[i].status.sw)
2539 			continue;
2540 		if (adev->ip_blocks[i].status.hw)
2541 			continue;
2542 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2543 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2544 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2545 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2546 			if (r) {
2547 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2548 					  adev->ip_blocks[i].version->funcs->name, r);
2549 				return r;
2550 			}
2551 			adev->ip_blocks[i].status.hw = true;
2552 		}
2553 	}
2554 
2555 	return 0;
2556 }
2557 
2558 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2559 {
2560 	int i, r;
2561 
2562 	for (i = 0; i < adev->num_ip_blocks; i++) {
2563 		if (!adev->ip_blocks[i].status.sw)
2564 			continue;
2565 		if (adev->ip_blocks[i].status.hw)
2566 			continue;
2567 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2568 		if (r) {
2569 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2570 				  adev->ip_blocks[i].version->funcs->name, r);
2571 			return r;
2572 		}
2573 		adev->ip_blocks[i].status.hw = true;
2574 	}
2575 
2576 	return 0;
2577 }
2578 
2579 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2580 {
2581 	int r = 0;
2582 	int i;
2583 	uint32_t smu_version;
2584 
2585 	if (adev->asic_type >= CHIP_VEGA10) {
2586 		for (i = 0; i < adev->num_ip_blocks; i++) {
2587 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2588 				continue;
2589 
2590 			if (!adev->ip_blocks[i].status.sw)
2591 				continue;
2592 
2593 			/* no need to do the fw loading again if already done*/
2594 			if (adev->ip_blocks[i].status.hw == true)
2595 				break;
2596 
2597 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2598 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2599 				if (r) {
2600 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2601 							  adev->ip_blocks[i].version->funcs->name, r);
2602 					return r;
2603 				}
2604 			} else {
2605 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2606 				if (r) {
2607 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2608 							  adev->ip_blocks[i].version->funcs->name, r);
2609 					return r;
2610 				}
2611 			}
2612 
2613 			adev->ip_blocks[i].status.hw = true;
2614 			break;
2615 		}
2616 	}
2617 
2618 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2619 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2620 
2621 	return r;
2622 }
2623 
2624 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2625 {
2626 	long timeout;
2627 	int r, i;
2628 
2629 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2630 		struct amdgpu_ring *ring = adev->rings[i];
2631 
2632 		/* No need to setup the GPU scheduler for rings that don't need it */
2633 		if (!ring || ring->no_scheduler)
2634 			continue;
2635 
2636 		switch (ring->funcs->type) {
2637 		case AMDGPU_RING_TYPE_GFX:
2638 			timeout = adev->gfx_timeout;
2639 			break;
2640 		case AMDGPU_RING_TYPE_COMPUTE:
2641 			timeout = adev->compute_timeout;
2642 			break;
2643 		case AMDGPU_RING_TYPE_SDMA:
2644 			timeout = adev->sdma_timeout;
2645 			break;
2646 		default:
2647 			timeout = adev->video_timeout;
2648 			break;
2649 		}
2650 
2651 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2652 				   DRM_SCHED_PRIORITY_COUNT,
2653 				   ring->num_hw_submission, 0,
2654 				   timeout, adev->reset_domain->wq,
2655 				   ring->sched_score, ring->name,
2656 				   adev->dev);
2657 		if (r) {
2658 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2659 				  ring->name);
2660 			return r;
2661 		}
2662 		r = amdgpu_uvd_entity_init(adev, ring);
2663 		if (r) {
2664 			DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2665 				  ring->name);
2666 			return r;
2667 		}
2668 		r = amdgpu_vce_entity_init(adev, ring);
2669 		if (r) {
2670 			DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2671 				  ring->name);
2672 			return r;
2673 		}
2674 	}
2675 
2676 	amdgpu_xcp_update_partition_sched_list(adev);
2677 
2678 	return 0;
2679 }
2680 
2681 
2682 /**
2683  * amdgpu_device_ip_init - run init for hardware IPs
2684  *
2685  * @adev: amdgpu_device pointer
2686  *
2687  * Main initialization pass for hardware IPs.  The list of all the hardware
2688  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2689  * are run.  sw_init initializes the software state associated with each IP
2690  * and hw_init initializes the hardware associated with each IP.
2691  * Returns 0 on success, negative error code on failure.
2692  */
2693 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2694 {
2695 	int i, r;
2696 
2697 	r = amdgpu_ras_init(adev);
2698 	if (r)
2699 		return r;
2700 
2701 	for (i = 0; i < adev->num_ip_blocks; i++) {
2702 		if (!adev->ip_blocks[i].status.valid)
2703 			continue;
2704 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2705 		if (r) {
2706 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2707 				  adev->ip_blocks[i].version->funcs->name, r);
2708 			goto init_failed;
2709 		}
2710 		adev->ip_blocks[i].status.sw = true;
2711 
2712 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2713 			/* need to do common hw init early so everything is set up for gmc */
2714 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2715 			if (r) {
2716 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2717 				goto init_failed;
2718 			}
2719 			adev->ip_blocks[i].status.hw = true;
2720 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2721 			/* need to do gmc hw init early so we can allocate gpu mem */
2722 			/* Try to reserve bad pages early */
2723 			if (amdgpu_sriov_vf(adev))
2724 				amdgpu_virt_exchange_data(adev);
2725 
2726 			r = amdgpu_device_mem_scratch_init(adev);
2727 			if (r) {
2728 				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2729 				goto init_failed;
2730 			}
2731 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2732 			if (r) {
2733 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2734 				goto init_failed;
2735 			}
2736 			r = amdgpu_device_wb_init(adev);
2737 			if (r) {
2738 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2739 				goto init_failed;
2740 			}
2741 			adev->ip_blocks[i].status.hw = true;
2742 
2743 			/* right after GMC hw init, we create CSA */
2744 			if (adev->gfx.mcbp) {
2745 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2746 							       AMDGPU_GEM_DOMAIN_VRAM |
2747 							       AMDGPU_GEM_DOMAIN_GTT,
2748 							       AMDGPU_CSA_SIZE);
2749 				if (r) {
2750 					DRM_ERROR("allocate CSA failed %d\n", r);
2751 					goto init_failed;
2752 				}
2753 			}
2754 
2755 			r = amdgpu_seq64_init(adev);
2756 			if (r) {
2757 				DRM_ERROR("allocate seq64 failed %d\n", r);
2758 				goto init_failed;
2759 			}
2760 		}
2761 	}
2762 
2763 	if (amdgpu_sriov_vf(adev))
2764 		amdgpu_virt_init_data_exchange(adev);
2765 
2766 	r = amdgpu_ib_pool_init(adev);
2767 	if (r) {
2768 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2769 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2770 		goto init_failed;
2771 	}
2772 
2773 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2774 	if (r)
2775 		goto init_failed;
2776 
2777 	r = amdgpu_device_ip_hw_init_phase1(adev);
2778 	if (r)
2779 		goto init_failed;
2780 
2781 	r = amdgpu_device_fw_loading(adev);
2782 	if (r)
2783 		goto init_failed;
2784 
2785 	r = amdgpu_device_ip_hw_init_phase2(adev);
2786 	if (r)
2787 		goto init_failed;
2788 
2789 	/*
2790 	 * retired pages will be loaded from eeprom and reserved here,
2791 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2792 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2793 	 * for I2C communication which only true at this point.
2794 	 *
2795 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2796 	 * failure from bad gpu situation and stop amdgpu init process
2797 	 * accordingly. For other failed cases, it will still release all
2798 	 * the resource and print error message, rather than returning one
2799 	 * negative value to upper level.
2800 	 *
2801 	 * Note: theoretically, this should be called before all vram allocations
2802 	 * to protect retired page from abusing
2803 	 */
2804 	r = amdgpu_ras_recovery_init(adev);
2805 	if (r)
2806 		goto init_failed;
2807 
2808 	/**
2809 	 * In case of XGMI grab extra reference for reset domain for this device
2810 	 */
2811 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2812 		if (amdgpu_xgmi_add_device(adev) == 0) {
2813 			if (!amdgpu_sriov_vf(adev)) {
2814 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2815 
2816 				if (WARN_ON(!hive)) {
2817 					r = -ENOENT;
2818 					goto init_failed;
2819 				}
2820 
2821 				if (!hive->reset_domain ||
2822 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2823 					r = -ENOENT;
2824 					amdgpu_put_xgmi_hive(hive);
2825 					goto init_failed;
2826 				}
2827 
2828 				/* Drop the early temporary reset domain we created for device */
2829 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2830 				adev->reset_domain = hive->reset_domain;
2831 				amdgpu_put_xgmi_hive(hive);
2832 			}
2833 		}
2834 	}
2835 
2836 	r = amdgpu_device_init_schedulers(adev);
2837 	if (r)
2838 		goto init_failed;
2839 
2840 	if (adev->mman.buffer_funcs_ring->sched.ready)
2841 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
2842 
2843 	/* Don't init kfd if whole hive need to be reset during init */
2844 	if (!adev->gmc.xgmi.pending_reset) {
2845 		kgd2kfd_init_zone_device(adev);
2846 		amdgpu_amdkfd_device_init(adev);
2847 	}
2848 
2849 	amdgpu_fru_get_product_info(adev);
2850 
2851 init_failed:
2852 
2853 	return r;
2854 }
2855 
2856 /**
2857  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2858  *
2859  * @adev: amdgpu_device pointer
2860  *
2861  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2862  * this function before a GPU reset.  If the value is retained after a
2863  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2864  */
2865 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2866 {
2867 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2868 }
2869 
2870 /**
2871  * amdgpu_device_check_vram_lost - check if vram is valid
2872  *
2873  * @adev: amdgpu_device pointer
2874  *
2875  * Checks the reset magic value written to the gart pointer in VRAM.
2876  * The driver calls this after a GPU reset to see if the contents of
2877  * VRAM is lost or now.
2878  * returns true if vram is lost, false if not.
2879  */
2880 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2881 {
2882 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2883 			AMDGPU_RESET_MAGIC_NUM))
2884 		return true;
2885 
2886 	if (!amdgpu_in_reset(adev))
2887 		return false;
2888 
2889 	/*
2890 	 * For all ASICs with baco/mode1 reset, the VRAM is
2891 	 * always assumed to be lost.
2892 	 */
2893 	switch (amdgpu_asic_reset_method(adev)) {
2894 	case AMD_RESET_METHOD_BACO:
2895 	case AMD_RESET_METHOD_MODE1:
2896 		return true;
2897 	default:
2898 		return false;
2899 	}
2900 }
2901 
2902 /**
2903  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2904  *
2905  * @adev: amdgpu_device pointer
2906  * @state: clockgating state (gate or ungate)
2907  *
2908  * The list of all the hardware IPs that make up the asic is walked and the
2909  * set_clockgating_state callbacks are run.
2910  * Late initialization pass enabling clockgating for hardware IPs.
2911  * Fini or suspend, pass disabling clockgating for hardware IPs.
2912  * Returns 0 on success, negative error code on failure.
2913  */
2914 
2915 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2916 			       enum amd_clockgating_state state)
2917 {
2918 	int i, j, r;
2919 
2920 	if (amdgpu_emu_mode == 1)
2921 		return 0;
2922 
2923 	for (j = 0; j < adev->num_ip_blocks; j++) {
2924 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2925 		if (!adev->ip_blocks[i].status.late_initialized)
2926 			continue;
2927 		/* skip CG for GFX, SDMA on S0ix */
2928 		if (adev->in_s0ix &&
2929 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2930 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2931 			continue;
2932 		/* skip CG for VCE/UVD, it's handled specially */
2933 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2934 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2935 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2936 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2937 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2938 			/* enable clockgating to save power */
2939 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2940 										     state);
2941 			if (r) {
2942 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2943 					  adev->ip_blocks[i].version->funcs->name, r);
2944 				return r;
2945 			}
2946 		}
2947 	}
2948 
2949 	return 0;
2950 }
2951 
2952 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2953 			       enum amd_powergating_state state)
2954 {
2955 	int i, j, r;
2956 
2957 	if (amdgpu_emu_mode == 1)
2958 		return 0;
2959 
2960 	for (j = 0; j < adev->num_ip_blocks; j++) {
2961 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2962 		if (!adev->ip_blocks[i].status.late_initialized)
2963 			continue;
2964 		/* skip PG for GFX, SDMA on S0ix */
2965 		if (adev->in_s0ix &&
2966 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2967 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2968 			continue;
2969 		/* skip CG for VCE/UVD, it's handled specially */
2970 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2971 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2972 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2973 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2974 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2975 			/* enable powergating to save power */
2976 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2977 											state);
2978 			if (r) {
2979 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2980 					  adev->ip_blocks[i].version->funcs->name, r);
2981 				return r;
2982 			}
2983 		}
2984 	}
2985 	return 0;
2986 }
2987 
2988 static int amdgpu_device_enable_mgpu_fan_boost(void)
2989 {
2990 	struct amdgpu_gpu_instance *gpu_ins;
2991 	struct amdgpu_device *adev;
2992 	int i, ret = 0;
2993 
2994 	mutex_lock(&mgpu_info.mutex);
2995 
2996 	/*
2997 	 * MGPU fan boost feature should be enabled
2998 	 * only when there are two or more dGPUs in
2999 	 * the system
3000 	 */
3001 	if (mgpu_info.num_dgpu < 2)
3002 		goto out;
3003 
3004 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
3005 		gpu_ins = &(mgpu_info.gpu_ins[i]);
3006 		adev = gpu_ins->adev;
3007 		if (!(adev->flags & AMD_IS_APU) &&
3008 		    !gpu_ins->mgpu_fan_enabled) {
3009 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3010 			if (ret)
3011 				break;
3012 
3013 			gpu_ins->mgpu_fan_enabled = 1;
3014 		}
3015 	}
3016 
3017 out:
3018 	mutex_unlock(&mgpu_info.mutex);
3019 
3020 	return ret;
3021 }
3022 
3023 /**
3024  * amdgpu_device_ip_late_init - run late init for hardware IPs
3025  *
3026  * @adev: amdgpu_device pointer
3027  *
3028  * Late initialization pass for hardware IPs.  The list of all the hardware
3029  * IPs that make up the asic is walked and the late_init callbacks are run.
3030  * late_init covers any special initialization that an IP requires
3031  * after all of the have been initialized or something that needs to happen
3032  * late in the init process.
3033  * Returns 0 on success, negative error code on failure.
3034  */
3035 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3036 {
3037 	struct amdgpu_gpu_instance *gpu_instance;
3038 	int i = 0, r;
3039 
3040 	for (i = 0; i < adev->num_ip_blocks; i++) {
3041 		if (!adev->ip_blocks[i].status.hw)
3042 			continue;
3043 		if (adev->ip_blocks[i].version->funcs->late_init) {
3044 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3045 			if (r) {
3046 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
3047 					  adev->ip_blocks[i].version->funcs->name, r);
3048 				return r;
3049 			}
3050 		}
3051 		adev->ip_blocks[i].status.late_initialized = true;
3052 	}
3053 
3054 	r = amdgpu_ras_late_init(adev);
3055 	if (r) {
3056 		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3057 		return r;
3058 	}
3059 
3060 	amdgpu_ras_set_error_query_ready(adev, true);
3061 
3062 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3063 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3064 
3065 	amdgpu_device_fill_reset_magic(adev);
3066 
3067 	r = amdgpu_device_enable_mgpu_fan_boost();
3068 	if (r)
3069 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3070 
3071 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3072 	if (amdgpu_passthrough(adev) &&
3073 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3074 	     adev->asic_type == CHIP_ALDEBARAN))
3075 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3076 
3077 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3078 		mutex_lock(&mgpu_info.mutex);
3079 
3080 		/*
3081 		 * Reset device p-state to low as this was booted with high.
3082 		 *
3083 		 * This should be performed only after all devices from the same
3084 		 * hive get initialized.
3085 		 *
3086 		 * However, it's unknown how many device in the hive in advance.
3087 		 * As this is counted one by one during devices initializations.
3088 		 *
3089 		 * So, we wait for all XGMI interlinked devices initialized.
3090 		 * This may bring some delays as those devices may come from
3091 		 * different hives. But that should be OK.
3092 		 */
3093 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3094 			for (i = 0; i < mgpu_info.num_gpu; i++) {
3095 				gpu_instance = &(mgpu_info.gpu_ins[i]);
3096 				if (gpu_instance->adev->flags & AMD_IS_APU)
3097 					continue;
3098 
3099 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3100 						AMDGPU_XGMI_PSTATE_MIN);
3101 				if (r) {
3102 					DRM_ERROR("pstate setting failed (%d).\n", r);
3103 					break;
3104 				}
3105 			}
3106 		}
3107 
3108 		mutex_unlock(&mgpu_info.mutex);
3109 	}
3110 
3111 	return 0;
3112 }
3113 
3114 /**
3115  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3116  *
3117  * @adev: amdgpu_device pointer
3118  *
3119  * For ASICs need to disable SMC first
3120  */
3121 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3122 {
3123 	int i, r;
3124 
3125 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3126 		return;
3127 
3128 	for (i = 0; i < adev->num_ip_blocks; i++) {
3129 		if (!adev->ip_blocks[i].status.hw)
3130 			continue;
3131 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3132 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3133 			/* XXX handle errors */
3134 			if (r) {
3135 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3136 					  adev->ip_blocks[i].version->funcs->name, r);
3137 			}
3138 			adev->ip_blocks[i].status.hw = false;
3139 			break;
3140 		}
3141 	}
3142 }
3143 
3144 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3145 {
3146 	int i, r;
3147 
3148 	for (i = 0; i < adev->num_ip_blocks; i++) {
3149 		if (!adev->ip_blocks[i].version->funcs->early_fini)
3150 			continue;
3151 
3152 		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3153 		if (r) {
3154 			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3155 				  adev->ip_blocks[i].version->funcs->name, r);
3156 		}
3157 	}
3158 
3159 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3160 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3161 
3162 	amdgpu_amdkfd_suspend(adev, false);
3163 
3164 	/* Workaroud for ASICs need to disable SMC first */
3165 	amdgpu_device_smu_fini_early(adev);
3166 
3167 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3168 		if (!adev->ip_blocks[i].status.hw)
3169 			continue;
3170 
3171 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3172 		/* XXX handle errors */
3173 		if (r) {
3174 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3175 				  adev->ip_blocks[i].version->funcs->name, r);
3176 		}
3177 
3178 		adev->ip_blocks[i].status.hw = false;
3179 	}
3180 
3181 	if (amdgpu_sriov_vf(adev)) {
3182 		if (amdgpu_virt_release_full_gpu(adev, false))
3183 			DRM_ERROR("failed to release exclusive mode on fini\n");
3184 	}
3185 
3186 	return 0;
3187 }
3188 
3189 /**
3190  * amdgpu_device_ip_fini - run fini for hardware IPs
3191  *
3192  * @adev: amdgpu_device pointer
3193  *
3194  * Main teardown pass for hardware IPs.  The list of all the hardware
3195  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3196  * are run.  hw_fini tears down the hardware associated with each IP
3197  * and sw_fini tears down any software state associated with each IP.
3198  * Returns 0 on success, negative error code on failure.
3199  */
3200 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3201 {
3202 	int i, r;
3203 
3204 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3205 		amdgpu_virt_release_ras_err_handler_data(adev);
3206 
3207 	if (adev->gmc.xgmi.num_physical_nodes > 1)
3208 		amdgpu_xgmi_remove_device(adev);
3209 
3210 	amdgpu_amdkfd_device_fini_sw(adev);
3211 
3212 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3213 		if (!adev->ip_blocks[i].status.sw)
3214 			continue;
3215 
3216 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3217 			amdgpu_ucode_free_bo(adev);
3218 			amdgpu_free_static_csa(&adev->virt.csa_obj);
3219 			amdgpu_device_wb_fini(adev);
3220 			amdgpu_device_mem_scratch_fini(adev);
3221 			amdgpu_ib_pool_fini(adev);
3222 			amdgpu_seq64_fini(adev);
3223 		}
3224 
3225 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3226 		/* XXX handle errors */
3227 		if (r) {
3228 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3229 				  adev->ip_blocks[i].version->funcs->name, r);
3230 		}
3231 		adev->ip_blocks[i].status.sw = false;
3232 		adev->ip_blocks[i].status.valid = false;
3233 	}
3234 
3235 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3236 		if (!adev->ip_blocks[i].status.late_initialized)
3237 			continue;
3238 		if (adev->ip_blocks[i].version->funcs->late_fini)
3239 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3240 		adev->ip_blocks[i].status.late_initialized = false;
3241 	}
3242 
3243 	amdgpu_ras_fini(adev);
3244 
3245 	return 0;
3246 }
3247 
3248 /**
3249  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3250  *
3251  * @work: work_struct.
3252  */
3253 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3254 {
3255 	struct amdgpu_device *adev =
3256 		container_of(work, struct amdgpu_device, delayed_init_work.work);
3257 	int r;
3258 
3259 	r = amdgpu_ib_ring_tests(adev);
3260 	if (r)
3261 		DRM_ERROR("ib ring test failed (%d).\n", r);
3262 }
3263 
3264 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3265 {
3266 	struct amdgpu_device *adev =
3267 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3268 
3269 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3270 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3271 
3272 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3273 		adev->gfx.gfx_off_state = true;
3274 }
3275 
3276 /**
3277  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3278  *
3279  * @adev: amdgpu_device pointer
3280  *
3281  * Main suspend function for hardware IPs.  The list of all the hardware
3282  * IPs that make up the asic is walked, clockgating is disabled and the
3283  * suspend callbacks are run.  suspend puts the hardware and software state
3284  * in each IP into a state suitable for suspend.
3285  * Returns 0 on success, negative error code on failure.
3286  */
3287 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3288 {
3289 	int i, r;
3290 
3291 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3292 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3293 
3294 	/*
3295 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3296 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3297 	 * scenario. Add the missing df cstate disablement here.
3298 	 */
3299 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3300 		dev_warn(adev->dev, "Failed to disallow df cstate");
3301 
3302 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3303 		if (!adev->ip_blocks[i].status.valid)
3304 			continue;
3305 
3306 		/* displays are handled separately */
3307 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3308 			continue;
3309 
3310 		/* XXX handle errors */
3311 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3312 		/* XXX handle errors */
3313 		if (r) {
3314 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3315 				  adev->ip_blocks[i].version->funcs->name, r);
3316 			return r;
3317 		}
3318 
3319 		adev->ip_blocks[i].status.hw = false;
3320 	}
3321 
3322 	return 0;
3323 }
3324 
3325 /**
3326  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3327  *
3328  * @adev: amdgpu_device pointer
3329  *
3330  * Main suspend function for hardware IPs.  The list of all the hardware
3331  * IPs that make up the asic is walked, clockgating is disabled and the
3332  * suspend callbacks are run.  suspend puts the hardware and software state
3333  * in each IP into a state suitable for suspend.
3334  * Returns 0 on success, negative error code on failure.
3335  */
3336 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3337 {
3338 	int i, r;
3339 
3340 	if (adev->in_s0ix)
3341 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3342 
3343 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3344 		if (!adev->ip_blocks[i].status.valid)
3345 			continue;
3346 		/* displays are handled in phase1 */
3347 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3348 			continue;
3349 		/* PSP lost connection when err_event_athub occurs */
3350 		if (amdgpu_ras_intr_triggered() &&
3351 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3352 			adev->ip_blocks[i].status.hw = false;
3353 			continue;
3354 		}
3355 
3356 		/* skip unnecessary suspend if we do not initialize them yet */
3357 		if (adev->gmc.xgmi.pending_reset &&
3358 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3359 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3360 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3361 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3362 			adev->ip_blocks[i].status.hw = false;
3363 			continue;
3364 		}
3365 
3366 		/* skip suspend of gfx/mes and psp for S0ix
3367 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3368 		 * like at runtime. PSP is also part of the always on hardware
3369 		 * so no need to suspend it.
3370 		 */
3371 		if (adev->in_s0ix &&
3372 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3373 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3374 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3375 			continue;
3376 
3377 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3378 		if (adev->in_s0ix &&
3379 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3380 		     IP_VERSION(5, 0, 0)) &&
3381 		    (adev->ip_blocks[i].version->type ==
3382 		     AMD_IP_BLOCK_TYPE_SDMA))
3383 			continue;
3384 
3385 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3386 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3387 		 * from this location and RLC Autoload automatically also gets loaded
3388 		 * from here based on PMFW -> PSP message during re-init sequence.
3389 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3390 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3391 		 */
3392 		if (amdgpu_in_reset(adev) &&
3393 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3394 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3395 			continue;
3396 
3397 		/* XXX handle errors */
3398 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3399 		/* XXX handle errors */
3400 		if (r) {
3401 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3402 				  adev->ip_blocks[i].version->funcs->name, r);
3403 		}
3404 		adev->ip_blocks[i].status.hw = false;
3405 		/* handle putting the SMC in the appropriate state */
3406 		if (!amdgpu_sriov_vf(adev)) {
3407 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3408 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3409 				if (r) {
3410 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3411 							adev->mp1_state, r);
3412 					return r;
3413 				}
3414 			}
3415 		}
3416 	}
3417 
3418 	return 0;
3419 }
3420 
3421 /**
3422  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3423  *
3424  * @adev: amdgpu_device pointer
3425  *
3426  * Main suspend function for hardware IPs.  The list of all the hardware
3427  * IPs that make up the asic is walked, clockgating is disabled and the
3428  * suspend callbacks are run.  suspend puts the hardware and software state
3429  * in each IP into a state suitable for suspend.
3430  * Returns 0 on success, negative error code on failure.
3431  */
3432 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3433 {
3434 	int r;
3435 
3436 	if (amdgpu_sriov_vf(adev)) {
3437 		amdgpu_virt_fini_data_exchange(adev);
3438 		amdgpu_virt_request_full_gpu(adev, false);
3439 	}
3440 
3441 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3442 
3443 	r = amdgpu_device_ip_suspend_phase1(adev);
3444 	if (r)
3445 		return r;
3446 	r = amdgpu_device_ip_suspend_phase2(adev);
3447 
3448 	if (amdgpu_sriov_vf(adev))
3449 		amdgpu_virt_release_full_gpu(adev, false);
3450 
3451 	return r;
3452 }
3453 
3454 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3455 {
3456 	int i, r;
3457 
3458 	static enum amd_ip_block_type ip_order[] = {
3459 		AMD_IP_BLOCK_TYPE_COMMON,
3460 		AMD_IP_BLOCK_TYPE_GMC,
3461 		AMD_IP_BLOCK_TYPE_PSP,
3462 		AMD_IP_BLOCK_TYPE_IH,
3463 	};
3464 
3465 	for (i = 0; i < adev->num_ip_blocks; i++) {
3466 		int j;
3467 		struct amdgpu_ip_block *block;
3468 
3469 		block = &adev->ip_blocks[i];
3470 		block->status.hw = false;
3471 
3472 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3473 
3474 			if (block->version->type != ip_order[j] ||
3475 				!block->status.valid)
3476 				continue;
3477 
3478 			r = block->version->funcs->hw_init(adev);
3479 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3480 			if (r)
3481 				return r;
3482 			block->status.hw = true;
3483 		}
3484 	}
3485 
3486 	return 0;
3487 }
3488 
3489 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3490 {
3491 	int i, r;
3492 
3493 	static enum amd_ip_block_type ip_order[] = {
3494 		AMD_IP_BLOCK_TYPE_SMC,
3495 		AMD_IP_BLOCK_TYPE_DCE,
3496 		AMD_IP_BLOCK_TYPE_GFX,
3497 		AMD_IP_BLOCK_TYPE_SDMA,
3498 		AMD_IP_BLOCK_TYPE_MES,
3499 		AMD_IP_BLOCK_TYPE_UVD,
3500 		AMD_IP_BLOCK_TYPE_VCE,
3501 		AMD_IP_BLOCK_TYPE_VCN,
3502 		AMD_IP_BLOCK_TYPE_JPEG
3503 	};
3504 
3505 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3506 		int j;
3507 		struct amdgpu_ip_block *block;
3508 
3509 		for (j = 0; j < adev->num_ip_blocks; j++) {
3510 			block = &adev->ip_blocks[j];
3511 
3512 			if (block->version->type != ip_order[i] ||
3513 				!block->status.valid ||
3514 				block->status.hw)
3515 				continue;
3516 
3517 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3518 				r = block->version->funcs->resume(adev);
3519 			else
3520 				r = block->version->funcs->hw_init(adev);
3521 
3522 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3523 			if (r)
3524 				return r;
3525 			block->status.hw = true;
3526 		}
3527 	}
3528 
3529 	return 0;
3530 }
3531 
3532 /**
3533  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3534  *
3535  * @adev: amdgpu_device pointer
3536  *
3537  * First resume function for hardware IPs.  The list of all the hardware
3538  * IPs that make up the asic is walked and the resume callbacks are run for
3539  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3540  * after a suspend and updates the software state as necessary.  This
3541  * function is also used for restoring the GPU after a GPU reset.
3542  * Returns 0 on success, negative error code on failure.
3543  */
3544 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3545 {
3546 	int i, r;
3547 
3548 	for (i = 0; i < adev->num_ip_blocks; i++) {
3549 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3550 			continue;
3551 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3552 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3553 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3554 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3555 
3556 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3557 			if (r) {
3558 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3559 					  adev->ip_blocks[i].version->funcs->name, r);
3560 				return r;
3561 			}
3562 			adev->ip_blocks[i].status.hw = true;
3563 		}
3564 	}
3565 
3566 	return 0;
3567 }
3568 
3569 /**
3570  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3571  *
3572  * @adev: amdgpu_device pointer
3573  *
3574  * First resume function for hardware IPs.  The list of all the hardware
3575  * IPs that make up the asic is walked and the resume callbacks are run for
3576  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3577  * functional state after a suspend and updates the software state as
3578  * necessary.  This function is also used for restoring the GPU after a GPU
3579  * reset.
3580  * Returns 0 on success, negative error code on failure.
3581  */
3582 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3583 {
3584 	int i, r;
3585 
3586 	for (i = 0; i < adev->num_ip_blocks; i++) {
3587 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3588 			continue;
3589 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3590 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3591 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3592 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3593 			continue;
3594 		r = adev->ip_blocks[i].version->funcs->resume(adev);
3595 		if (r) {
3596 			DRM_ERROR("resume of IP block <%s> failed %d\n",
3597 				  adev->ip_blocks[i].version->funcs->name, r);
3598 			return r;
3599 		}
3600 		adev->ip_blocks[i].status.hw = true;
3601 	}
3602 
3603 	return 0;
3604 }
3605 
3606 /**
3607  * amdgpu_device_ip_resume - run resume for hardware IPs
3608  *
3609  * @adev: amdgpu_device pointer
3610  *
3611  * Main resume function for hardware IPs.  The hardware IPs
3612  * are split into two resume functions because they are
3613  * also used in recovering from a GPU reset and some additional
3614  * steps need to be take between them.  In this case (S3/S4) they are
3615  * run sequentially.
3616  * Returns 0 on success, negative error code on failure.
3617  */
3618 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3619 {
3620 	int r;
3621 
3622 	r = amdgpu_device_ip_resume_phase1(adev);
3623 	if (r)
3624 		return r;
3625 
3626 	r = amdgpu_device_fw_loading(adev);
3627 	if (r)
3628 		return r;
3629 
3630 	r = amdgpu_device_ip_resume_phase2(adev);
3631 
3632 	if (adev->mman.buffer_funcs_ring->sched.ready)
3633 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3634 
3635 	return r;
3636 }
3637 
3638 /**
3639  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3640  *
3641  * @adev: amdgpu_device pointer
3642  *
3643  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3644  */
3645 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3646 {
3647 	if (amdgpu_sriov_vf(adev)) {
3648 		if (adev->is_atom_fw) {
3649 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3650 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3651 		} else {
3652 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3653 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3654 		}
3655 
3656 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3657 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3658 	}
3659 }
3660 
3661 /**
3662  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3663  *
3664  * @asic_type: AMD asic type
3665  *
3666  * Check if there is DC (new modesetting infrastructre) support for an asic.
3667  * returns true if DC has support, false if not.
3668  */
3669 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3670 {
3671 	switch (asic_type) {
3672 #ifdef CONFIG_DRM_AMDGPU_SI
3673 	case CHIP_HAINAN:
3674 #endif
3675 	case CHIP_TOPAZ:
3676 		/* chips with no display hardware */
3677 		return false;
3678 #if defined(CONFIG_DRM_AMD_DC)
3679 	case CHIP_TAHITI:
3680 	case CHIP_PITCAIRN:
3681 	case CHIP_VERDE:
3682 	case CHIP_OLAND:
3683 		/*
3684 		 * We have systems in the wild with these ASICs that require
3685 		 * LVDS and VGA support which is not supported with DC.
3686 		 *
3687 		 * Fallback to the non-DC driver here by default so as not to
3688 		 * cause regressions.
3689 		 */
3690 #if defined(CONFIG_DRM_AMD_DC_SI)
3691 		return amdgpu_dc > 0;
3692 #else
3693 		return false;
3694 #endif
3695 	case CHIP_BONAIRE:
3696 	case CHIP_KAVERI:
3697 	case CHIP_KABINI:
3698 	case CHIP_MULLINS:
3699 		/*
3700 		 * We have systems in the wild with these ASICs that require
3701 		 * VGA support which is not supported with DC.
3702 		 *
3703 		 * Fallback to the non-DC driver here by default so as not to
3704 		 * cause regressions.
3705 		 */
3706 		return amdgpu_dc > 0;
3707 	default:
3708 		return amdgpu_dc != 0;
3709 #else
3710 	default:
3711 		if (amdgpu_dc > 0)
3712 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3713 		return false;
3714 #endif
3715 	}
3716 }
3717 
3718 /**
3719  * amdgpu_device_has_dc_support - check if dc is supported
3720  *
3721  * @adev: amdgpu_device pointer
3722  *
3723  * Returns true for supported, false for not supported
3724  */
3725 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3726 {
3727 	if (adev->enable_virtual_display ||
3728 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3729 		return false;
3730 
3731 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3732 }
3733 
3734 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3735 {
3736 	struct amdgpu_device *adev =
3737 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3738 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3739 
3740 	/* It's a bug to not have a hive within this function */
3741 	if (WARN_ON(!hive))
3742 		return;
3743 
3744 	/*
3745 	 * Use task barrier to synchronize all xgmi reset works across the
3746 	 * hive. task_barrier_enter and task_barrier_exit will block
3747 	 * until all the threads running the xgmi reset works reach
3748 	 * those points. task_barrier_full will do both blocks.
3749 	 */
3750 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3751 
3752 		task_barrier_enter(&hive->tb);
3753 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3754 
3755 		if (adev->asic_reset_res)
3756 			goto fail;
3757 
3758 		task_barrier_exit(&hive->tb);
3759 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3760 
3761 		if (adev->asic_reset_res)
3762 			goto fail;
3763 
3764 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3765 	} else {
3766 
3767 		task_barrier_full(&hive->tb);
3768 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3769 	}
3770 
3771 fail:
3772 	if (adev->asic_reset_res)
3773 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3774 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3775 	amdgpu_put_xgmi_hive(hive);
3776 }
3777 
3778 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3779 {
3780 	char *input = amdgpu_lockup_timeout;
3781 	char *timeout_setting = NULL;
3782 	int index = 0;
3783 	long timeout;
3784 	int ret = 0;
3785 
3786 	/*
3787 	 * By default timeout for non compute jobs is 10000
3788 	 * and 60000 for compute jobs.
3789 	 * In SR-IOV or passthrough mode, timeout for compute
3790 	 * jobs are 60000 by default.
3791 	 */
3792 	adev->gfx_timeout = msecs_to_jiffies(10000);
3793 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3794 	if (amdgpu_sriov_vf(adev))
3795 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3796 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3797 	else
3798 		adev->compute_timeout =  msecs_to_jiffies(60000);
3799 
3800 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3801 		while ((timeout_setting = strsep(&input, ",")) &&
3802 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3803 			ret = kstrtol(timeout_setting, 0, &timeout);
3804 			if (ret)
3805 				return ret;
3806 
3807 			if (timeout == 0) {
3808 				index++;
3809 				continue;
3810 			} else if (timeout < 0) {
3811 				timeout = MAX_SCHEDULE_TIMEOUT;
3812 				dev_warn(adev->dev, "lockup timeout disabled");
3813 				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3814 			} else {
3815 				timeout = msecs_to_jiffies(timeout);
3816 			}
3817 
3818 			switch (index++) {
3819 			case 0:
3820 				adev->gfx_timeout = timeout;
3821 				break;
3822 			case 1:
3823 				adev->compute_timeout = timeout;
3824 				break;
3825 			case 2:
3826 				adev->sdma_timeout = timeout;
3827 				break;
3828 			case 3:
3829 				adev->video_timeout = timeout;
3830 				break;
3831 			default:
3832 				break;
3833 			}
3834 		}
3835 		/*
3836 		 * There is only one value specified and
3837 		 * it should apply to all non-compute jobs.
3838 		 */
3839 		if (index == 1) {
3840 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3841 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3842 				adev->compute_timeout = adev->gfx_timeout;
3843 		}
3844 	}
3845 
3846 	return ret;
3847 }
3848 
3849 /**
3850  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3851  *
3852  * @adev: amdgpu_device pointer
3853  *
3854  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3855  */
3856 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3857 {
3858 	struct iommu_domain *domain;
3859 
3860 	domain = iommu_get_domain_for_dev(adev->dev);
3861 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3862 		adev->ram_is_direct_mapped = true;
3863 }
3864 
3865 static const struct attribute *amdgpu_dev_attributes[] = {
3866 	&dev_attr_pcie_replay_count.attr,
3867 	NULL
3868 };
3869 
3870 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3871 {
3872 	if (amdgpu_mcbp == 1)
3873 		adev->gfx.mcbp = true;
3874 	else if (amdgpu_mcbp == 0)
3875 		adev->gfx.mcbp = false;
3876 
3877 	if (amdgpu_sriov_vf(adev))
3878 		adev->gfx.mcbp = true;
3879 
3880 	if (adev->gfx.mcbp)
3881 		DRM_INFO("MCBP is enabled\n");
3882 }
3883 
3884 /**
3885  * amdgpu_device_init - initialize the driver
3886  *
3887  * @adev: amdgpu_device pointer
3888  * @flags: driver flags
3889  *
3890  * Initializes the driver info and hw (all asics).
3891  * Returns 0 for success or an error on failure.
3892  * Called at driver startup.
3893  */
3894 int amdgpu_device_init(struct amdgpu_device *adev,
3895 		       uint32_t flags)
3896 {
3897 	struct drm_device *ddev = adev_to_drm(adev);
3898 	struct pci_dev *pdev = adev->pdev;
3899 	int r, i;
3900 	bool px = false;
3901 	u32 max_MBps;
3902 	int tmp;
3903 
3904 	adev->shutdown = false;
3905 	adev->flags = flags;
3906 
3907 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3908 		adev->asic_type = amdgpu_force_asic_type;
3909 	else
3910 		adev->asic_type = flags & AMD_ASIC_MASK;
3911 
3912 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3913 	if (amdgpu_emu_mode == 1)
3914 		adev->usec_timeout *= 10;
3915 	adev->gmc.gart_size = 512 * 1024 * 1024;
3916 	adev->accel_working = false;
3917 	adev->num_rings = 0;
3918 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3919 	adev->mman.buffer_funcs = NULL;
3920 	adev->mman.buffer_funcs_ring = NULL;
3921 	adev->vm_manager.vm_pte_funcs = NULL;
3922 	adev->vm_manager.vm_pte_num_scheds = 0;
3923 	adev->gmc.gmc_funcs = NULL;
3924 	adev->harvest_ip_mask = 0x0;
3925 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3926 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3927 
3928 	adev->smc_rreg = &amdgpu_invalid_rreg;
3929 	adev->smc_wreg = &amdgpu_invalid_wreg;
3930 	adev->pcie_rreg = &amdgpu_invalid_rreg;
3931 	adev->pcie_wreg = &amdgpu_invalid_wreg;
3932 	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3933 	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
3934 	adev->pciep_rreg = &amdgpu_invalid_rreg;
3935 	adev->pciep_wreg = &amdgpu_invalid_wreg;
3936 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3937 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3938 	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3939 	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
3940 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3941 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3942 	adev->didt_rreg = &amdgpu_invalid_rreg;
3943 	adev->didt_wreg = &amdgpu_invalid_wreg;
3944 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3945 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3946 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3947 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3948 
3949 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3950 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3951 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3952 
3953 	/* mutex initialization are all done here so we
3954 	 * can recall function without having locking issues
3955 	 */
3956 	mutex_init(&adev->firmware.mutex);
3957 	mutex_init(&adev->pm.mutex);
3958 	mutex_init(&adev->gfx.gpu_clock_mutex);
3959 	mutex_init(&adev->srbm_mutex);
3960 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3961 	mutex_init(&adev->gfx.gfx_off_mutex);
3962 	mutex_init(&adev->gfx.partition_mutex);
3963 	mutex_init(&adev->grbm_idx_mutex);
3964 	mutex_init(&adev->mn_lock);
3965 	mutex_init(&adev->virt.vf_errors.lock);
3966 	hash_init(adev->mn_hash);
3967 	mutex_init(&adev->psp.mutex);
3968 	mutex_init(&adev->notifier_lock);
3969 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3970 	mutex_init(&adev->benchmark_mutex);
3971 
3972 	amdgpu_device_init_apu_flags(adev);
3973 
3974 	r = amdgpu_device_check_arguments(adev);
3975 	if (r)
3976 		return r;
3977 
3978 	spin_lock_init(&adev->mmio_idx_lock);
3979 	spin_lock_init(&adev->smc_idx_lock);
3980 	spin_lock_init(&adev->pcie_idx_lock);
3981 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3982 	spin_lock_init(&adev->didt_idx_lock);
3983 	spin_lock_init(&adev->gc_cac_idx_lock);
3984 	spin_lock_init(&adev->se_cac_idx_lock);
3985 	spin_lock_init(&adev->audio_endpt_idx_lock);
3986 	spin_lock_init(&adev->mm_stats.lock);
3987 
3988 	INIT_LIST_HEAD(&adev->shadow_list);
3989 	mutex_init(&adev->shadow_list_lock);
3990 
3991 	INIT_LIST_HEAD(&adev->reset_list);
3992 
3993 	INIT_LIST_HEAD(&adev->ras_list);
3994 
3995 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3996 
3997 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3998 			  amdgpu_device_delayed_init_work_handler);
3999 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4000 			  amdgpu_device_delay_enable_gfx_off);
4001 
4002 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4003 
4004 	adev->gfx.gfx_off_req_count = 1;
4005 	adev->gfx.gfx_off_residency = 0;
4006 	adev->gfx.gfx_off_entrycount = 0;
4007 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4008 
4009 	atomic_set(&adev->throttling_logging_enabled, 1);
4010 	/*
4011 	 * If throttling continues, logging will be performed every minute
4012 	 * to avoid log flooding. "-1" is subtracted since the thermal
4013 	 * throttling interrupt comes every second. Thus, the total logging
4014 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4015 	 * for throttling interrupt) = 60 seconds.
4016 	 */
4017 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4018 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4019 
4020 	/* Registers mapping */
4021 	/* TODO: block userspace mapping of io register */
4022 	if (adev->asic_type >= CHIP_BONAIRE) {
4023 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4024 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4025 	} else {
4026 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4027 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4028 	}
4029 
4030 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4031 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4032 
4033 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4034 	if (!adev->rmmio)
4035 		return -ENOMEM;
4036 
4037 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4038 	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4039 
4040 	/*
4041 	 * Reset domain needs to be present early, before XGMI hive discovered
4042 	 * (if any) and intitialized to use reset sem and in_gpu reset flag
4043 	 * early on during init and before calling to RREG32.
4044 	 */
4045 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4046 	if (!adev->reset_domain)
4047 		return -ENOMEM;
4048 
4049 	/* detect hw virtualization here */
4050 	amdgpu_detect_virtualization(adev);
4051 
4052 	amdgpu_device_get_pcie_info(adev);
4053 
4054 	r = amdgpu_device_get_job_timeout_settings(adev);
4055 	if (r) {
4056 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4057 		return r;
4058 	}
4059 
4060 	amdgpu_device_set_mcbp(adev);
4061 
4062 	/* early init functions */
4063 	r = amdgpu_device_ip_early_init(adev);
4064 	if (r)
4065 		return r;
4066 
4067 	/* Get rid of things like offb */
4068 	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4069 	if (r)
4070 		return r;
4071 
4072 	/* Enable TMZ based on IP_VERSION */
4073 	amdgpu_gmc_tmz_set(adev);
4074 
4075 	amdgpu_gmc_noretry_set(adev);
4076 	/* Need to get xgmi info early to decide the reset behavior*/
4077 	if (adev->gmc.xgmi.supported) {
4078 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4079 		if (r)
4080 			return r;
4081 	}
4082 
4083 	/* enable PCIE atomic ops */
4084 	if (amdgpu_sriov_vf(adev)) {
4085 		if (adev->virt.fw_reserve.p_pf2vf)
4086 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4087 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4088 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4089 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4090 	 * internal path natively support atomics, set have_atomics_support to true.
4091 	 */
4092 	} else if ((adev->flags & AMD_IS_APU) &&
4093 		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4094 		    IP_VERSION(9, 0, 0))) {
4095 		adev->have_atomics_support = true;
4096 	} else {
4097 		adev->have_atomics_support =
4098 			!pci_enable_atomic_ops_to_root(adev->pdev,
4099 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4100 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4101 	}
4102 
4103 	if (!adev->have_atomics_support)
4104 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4105 
4106 	/* doorbell bar mapping and doorbell index init*/
4107 	amdgpu_doorbell_init(adev);
4108 
4109 	if (amdgpu_emu_mode == 1) {
4110 		/* post the asic on emulation mode */
4111 		emu_soc_asic_init(adev);
4112 		goto fence_driver_init;
4113 	}
4114 
4115 	amdgpu_reset_init(adev);
4116 
4117 	/* detect if we are with an SRIOV vbios */
4118 	if (adev->bios)
4119 		amdgpu_device_detect_sriov_bios(adev);
4120 
4121 	/* check if we need to reset the asic
4122 	 *  E.g., driver was not cleanly unloaded previously, etc.
4123 	 */
4124 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4125 		if (adev->gmc.xgmi.num_physical_nodes) {
4126 			dev_info(adev->dev, "Pending hive reset.\n");
4127 			adev->gmc.xgmi.pending_reset = true;
4128 			/* Only need to init necessary block for SMU to handle the reset */
4129 			for (i = 0; i < adev->num_ip_blocks; i++) {
4130 				if (!adev->ip_blocks[i].status.valid)
4131 					continue;
4132 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4133 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4134 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4135 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4136 					DRM_DEBUG("IP %s disabled for hw_init.\n",
4137 						adev->ip_blocks[i].version->funcs->name);
4138 					adev->ip_blocks[i].status.hw = true;
4139 				}
4140 			}
4141 		} else {
4142 			tmp = amdgpu_reset_method;
4143 			/* It should do a default reset when loading or reloading the driver,
4144 			 * regardless of the module parameter reset_method.
4145 			 */
4146 			amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4147 			r = amdgpu_asic_reset(adev);
4148 			amdgpu_reset_method = tmp;
4149 			if (r) {
4150 				dev_err(adev->dev, "asic reset on init failed\n");
4151 				goto failed;
4152 			}
4153 		}
4154 	}
4155 
4156 	/* Post card if necessary */
4157 	if (amdgpu_device_need_post(adev)) {
4158 		if (!adev->bios) {
4159 			dev_err(adev->dev, "no vBIOS found\n");
4160 			r = -EINVAL;
4161 			goto failed;
4162 		}
4163 		DRM_INFO("GPU posting now...\n");
4164 		r = amdgpu_device_asic_init(adev);
4165 		if (r) {
4166 			dev_err(adev->dev, "gpu post error!\n");
4167 			goto failed;
4168 		}
4169 	}
4170 
4171 	if (adev->bios) {
4172 		if (adev->is_atom_fw) {
4173 			/* Initialize clocks */
4174 			r = amdgpu_atomfirmware_get_clock_info(adev);
4175 			if (r) {
4176 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4177 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4178 				goto failed;
4179 			}
4180 		} else {
4181 			/* Initialize clocks */
4182 			r = amdgpu_atombios_get_clock_info(adev);
4183 			if (r) {
4184 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4185 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4186 				goto failed;
4187 			}
4188 			/* init i2c buses */
4189 			if (!amdgpu_device_has_dc_support(adev))
4190 				amdgpu_atombios_i2c_init(adev);
4191 		}
4192 	}
4193 
4194 fence_driver_init:
4195 	/* Fence driver */
4196 	r = amdgpu_fence_driver_sw_init(adev);
4197 	if (r) {
4198 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4199 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4200 		goto failed;
4201 	}
4202 
4203 	/* init the mode config */
4204 	drm_mode_config_init(adev_to_drm(adev));
4205 
4206 	r = amdgpu_device_ip_init(adev);
4207 	if (r) {
4208 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4209 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4210 		goto release_ras_con;
4211 	}
4212 
4213 	amdgpu_fence_driver_hw_init(adev);
4214 
4215 	dev_info(adev->dev,
4216 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4217 			adev->gfx.config.max_shader_engines,
4218 			adev->gfx.config.max_sh_per_se,
4219 			adev->gfx.config.max_cu_per_sh,
4220 			adev->gfx.cu_info.number);
4221 
4222 	adev->accel_working = true;
4223 
4224 	amdgpu_vm_check_compute_bug(adev);
4225 
4226 	/* Initialize the buffer migration limit. */
4227 	if (amdgpu_moverate >= 0)
4228 		max_MBps = amdgpu_moverate;
4229 	else
4230 		max_MBps = 8; /* Allow 8 MB/s. */
4231 	/* Get a log2 for easy divisions. */
4232 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4233 
4234 	/*
4235 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4236 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4237 	 * gpu instance is counted less.
4238 	 */
4239 	amdgpu_register_gpu_instance(adev);
4240 
4241 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4242 	 * explicit gating rather than handling it automatically.
4243 	 */
4244 	if (!adev->gmc.xgmi.pending_reset) {
4245 		r = amdgpu_device_ip_late_init(adev);
4246 		if (r) {
4247 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4248 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4249 			goto release_ras_con;
4250 		}
4251 		/* must succeed. */
4252 		amdgpu_ras_resume(adev);
4253 		queue_delayed_work(system_wq, &adev->delayed_init_work,
4254 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4255 	}
4256 
4257 	if (amdgpu_sriov_vf(adev)) {
4258 		amdgpu_virt_release_full_gpu(adev, true);
4259 		flush_delayed_work(&adev->delayed_init_work);
4260 	}
4261 
4262 	/*
4263 	 * Place those sysfs registering after `late_init`. As some of those
4264 	 * operations performed in `late_init` might affect the sysfs
4265 	 * interfaces creating.
4266 	 */
4267 	r = amdgpu_atombios_sysfs_init(adev);
4268 	if (r)
4269 		drm_err(&adev->ddev,
4270 			"registering atombios sysfs failed (%d).\n", r);
4271 
4272 	r = amdgpu_pm_sysfs_init(adev);
4273 	if (r)
4274 		DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4275 
4276 	r = amdgpu_ucode_sysfs_init(adev);
4277 	if (r) {
4278 		adev->ucode_sysfs_en = false;
4279 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4280 	} else
4281 		adev->ucode_sysfs_en = true;
4282 
4283 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4284 	if (r)
4285 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4286 
4287 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4288 	if (r)
4289 		dev_err(adev->dev,
4290 			"Could not create amdgpu board attributes\n");
4291 
4292 	amdgpu_fru_sysfs_init(adev);
4293 	amdgpu_reg_state_sysfs_init(adev);
4294 
4295 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4296 		r = amdgpu_pmu_init(adev);
4297 	if (r)
4298 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4299 
4300 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4301 	if (amdgpu_device_cache_pci_state(adev->pdev))
4302 		pci_restore_state(pdev);
4303 
4304 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4305 	/* this will fail for cards that aren't VGA class devices, just
4306 	 * ignore it
4307 	 */
4308 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4309 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4310 
4311 	px = amdgpu_device_supports_px(ddev);
4312 
4313 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4314 				apple_gmux_detect(NULL, NULL)))
4315 		vga_switcheroo_register_client(adev->pdev,
4316 					       &amdgpu_switcheroo_ops, px);
4317 
4318 	if (px)
4319 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4320 
4321 	if (adev->gmc.xgmi.pending_reset)
4322 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4323 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4324 
4325 	amdgpu_device_check_iommu_direct_map(adev);
4326 
4327 	return 0;
4328 
4329 release_ras_con:
4330 	if (amdgpu_sriov_vf(adev))
4331 		amdgpu_virt_release_full_gpu(adev, true);
4332 
4333 	/* failed in exclusive mode due to timeout */
4334 	if (amdgpu_sriov_vf(adev) &&
4335 		!amdgpu_sriov_runtime(adev) &&
4336 		amdgpu_virt_mmio_blocked(adev) &&
4337 		!amdgpu_virt_wait_reset(adev)) {
4338 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4339 		/* Don't send request since VF is inactive. */
4340 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4341 		adev->virt.ops = NULL;
4342 		r = -EAGAIN;
4343 	}
4344 	amdgpu_release_ras_context(adev);
4345 
4346 failed:
4347 	amdgpu_vf_error_trans_all(adev);
4348 
4349 	return r;
4350 }
4351 
4352 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4353 {
4354 
4355 	/* Clear all CPU mappings pointing to this device */
4356 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4357 
4358 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4359 	amdgpu_doorbell_fini(adev);
4360 
4361 	iounmap(adev->rmmio);
4362 	adev->rmmio = NULL;
4363 	if (adev->mman.aper_base_kaddr)
4364 		iounmap(adev->mman.aper_base_kaddr);
4365 	adev->mman.aper_base_kaddr = NULL;
4366 
4367 	/* Memory manager related */
4368 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4369 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4370 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4371 	}
4372 }
4373 
4374 /**
4375  * amdgpu_device_fini_hw - tear down the driver
4376  *
4377  * @adev: amdgpu_device pointer
4378  *
4379  * Tear down the driver info (all asics).
4380  * Called at driver shutdown.
4381  */
4382 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4383 {
4384 	dev_info(adev->dev, "amdgpu: finishing device.\n");
4385 	flush_delayed_work(&adev->delayed_init_work);
4386 	adev->shutdown = true;
4387 
4388 	/* make sure IB test finished before entering exclusive mode
4389 	 * to avoid preemption on IB test
4390 	 */
4391 	if (amdgpu_sriov_vf(adev)) {
4392 		amdgpu_virt_request_full_gpu(adev, false);
4393 		amdgpu_virt_fini_data_exchange(adev);
4394 	}
4395 
4396 	/* disable all interrupts */
4397 	amdgpu_irq_disable_all(adev);
4398 	if (adev->mode_info.mode_config_initialized) {
4399 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4400 			drm_helper_force_disable_all(adev_to_drm(adev));
4401 		else
4402 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4403 	}
4404 	amdgpu_fence_driver_hw_fini(adev);
4405 
4406 	if (adev->mman.initialized)
4407 		drain_workqueue(adev->mman.bdev.wq);
4408 
4409 	if (adev->pm.sysfs_initialized)
4410 		amdgpu_pm_sysfs_fini(adev);
4411 	if (adev->ucode_sysfs_en)
4412 		amdgpu_ucode_sysfs_fini(adev);
4413 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4414 	amdgpu_fru_sysfs_fini(adev);
4415 
4416 	amdgpu_reg_state_sysfs_fini(adev);
4417 
4418 	/* disable ras feature must before hw fini */
4419 	amdgpu_ras_pre_fini(adev);
4420 
4421 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4422 
4423 	amdgpu_device_ip_fini_early(adev);
4424 
4425 	amdgpu_irq_fini_hw(adev);
4426 
4427 	if (adev->mman.initialized)
4428 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4429 
4430 	amdgpu_gart_dummy_page_fini(adev);
4431 
4432 	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4433 		amdgpu_device_unmap_mmio(adev);
4434 
4435 }
4436 
4437 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4438 {
4439 	int idx;
4440 	bool px;
4441 
4442 	amdgpu_fence_driver_sw_fini(adev);
4443 	amdgpu_device_ip_fini(adev);
4444 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4445 	adev->accel_working = false;
4446 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4447 
4448 	amdgpu_reset_fini(adev);
4449 
4450 	/* free i2c buses */
4451 	if (!amdgpu_device_has_dc_support(adev))
4452 		amdgpu_i2c_fini(adev);
4453 
4454 	if (amdgpu_emu_mode != 1)
4455 		amdgpu_atombios_fini(adev);
4456 
4457 	kfree(adev->bios);
4458 	adev->bios = NULL;
4459 
4460 	kfree(adev->fru_info);
4461 	adev->fru_info = NULL;
4462 
4463 	px = amdgpu_device_supports_px(adev_to_drm(adev));
4464 
4465 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4466 				apple_gmux_detect(NULL, NULL)))
4467 		vga_switcheroo_unregister_client(adev->pdev);
4468 
4469 	if (px)
4470 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4471 
4472 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4473 		vga_client_unregister(adev->pdev);
4474 
4475 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4476 
4477 		iounmap(adev->rmmio);
4478 		adev->rmmio = NULL;
4479 		amdgpu_doorbell_fini(adev);
4480 		drm_dev_exit(idx);
4481 	}
4482 
4483 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4484 		amdgpu_pmu_fini(adev);
4485 	if (adev->mman.discovery_bin)
4486 		amdgpu_discovery_fini(adev);
4487 
4488 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4489 	adev->reset_domain = NULL;
4490 
4491 	kfree(adev->pci_state);
4492 
4493 }
4494 
4495 /**
4496  * amdgpu_device_evict_resources - evict device resources
4497  * @adev: amdgpu device object
4498  *
4499  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4500  * of the vram memory type. Mainly used for evicting device resources
4501  * at suspend time.
4502  *
4503  */
4504 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4505 {
4506 	int ret;
4507 
4508 	/* No need to evict vram on APUs for suspend to ram or s2idle */
4509 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4510 		return 0;
4511 
4512 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4513 	if (ret)
4514 		DRM_WARN("evicting device resources failed\n");
4515 	return ret;
4516 }
4517 
4518 /*
4519  * Suspend & resume.
4520  */
4521 /**
4522  * amdgpu_device_prepare - prepare for device suspend
4523  *
4524  * @dev: drm dev pointer
4525  *
4526  * Prepare to put the hw in the suspend state (all asics).
4527  * Returns 0 for success or an error on failure.
4528  * Called at driver suspend.
4529  */
4530 int amdgpu_device_prepare(struct drm_device *dev)
4531 {
4532 	struct amdgpu_device *adev = drm_to_adev(dev);
4533 	int i, r;
4534 
4535 	amdgpu_choose_low_power_state(adev);
4536 
4537 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4538 		return 0;
4539 
4540 	/* Evict the majority of BOs before starting suspend sequence */
4541 	r = amdgpu_device_evict_resources(adev);
4542 	if (r)
4543 		goto unprepare;
4544 
4545 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4546 
4547 	for (i = 0; i < adev->num_ip_blocks; i++) {
4548 		if (!adev->ip_blocks[i].status.valid)
4549 			continue;
4550 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4551 			continue;
4552 		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4553 		if (r)
4554 			goto unprepare;
4555 	}
4556 
4557 	return 0;
4558 
4559 unprepare:
4560 	adev->in_s0ix = adev->in_s3 = false;
4561 
4562 	return r;
4563 }
4564 
4565 /**
4566  * amdgpu_device_suspend - initiate device suspend
4567  *
4568  * @dev: drm dev pointer
4569  * @fbcon : notify the fbdev of suspend
4570  *
4571  * Puts the hw in the suspend state (all asics).
4572  * Returns 0 for success or an error on failure.
4573  * Called at driver suspend.
4574  */
4575 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4576 {
4577 	struct amdgpu_device *adev = drm_to_adev(dev);
4578 	int r = 0;
4579 
4580 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4581 		return 0;
4582 
4583 	adev->in_suspend = true;
4584 
4585 	if (amdgpu_sriov_vf(adev)) {
4586 		amdgpu_virt_fini_data_exchange(adev);
4587 		r = amdgpu_virt_request_full_gpu(adev, false);
4588 		if (r)
4589 			return r;
4590 	}
4591 
4592 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4593 		DRM_WARN("smart shift update failed\n");
4594 
4595 	if (fbcon)
4596 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4597 
4598 	cancel_delayed_work_sync(&adev->delayed_init_work);
4599 
4600 	amdgpu_ras_suspend(adev);
4601 
4602 	amdgpu_device_ip_suspend_phase1(adev);
4603 
4604 	if (!adev->in_s0ix)
4605 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4606 
4607 	r = amdgpu_device_evict_resources(adev);
4608 	if (r)
4609 		return r;
4610 
4611 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4612 
4613 	amdgpu_fence_driver_hw_fini(adev);
4614 
4615 	amdgpu_device_ip_suspend_phase2(adev);
4616 
4617 	if (amdgpu_sriov_vf(adev))
4618 		amdgpu_virt_release_full_gpu(adev, false);
4619 
4620 	r = amdgpu_dpm_notify_rlc_state(adev, false);
4621 	if (r)
4622 		return r;
4623 
4624 	return 0;
4625 }
4626 
4627 /**
4628  * amdgpu_device_resume - initiate device resume
4629  *
4630  * @dev: drm dev pointer
4631  * @fbcon : notify the fbdev of resume
4632  *
4633  * Bring the hw back to operating state (all asics).
4634  * Returns 0 for success or an error on failure.
4635  * Called at driver resume.
4636  */
4637 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4638 {
4639 	struct amdgpu_device *adev = drm_to_adev(dev);
4640 	int r = 0;
4641 
4642 	if (amdgpu_sriov_vf(adev)) {
4643 		r = amdgpu_virt_request_full_gpu(adev, true);
4644 		if (r)
4645 			return r;
4646 	}
4647 
4648 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4649 		return 0;
4650 
4651 	if (adev->in_s0ix)
4652 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4653 
4654 	/* post card */
4655 	if (amdgpu_device_need_post(adev)) {
4656 		r = amdgpu_device_asic_init(adev);
4657 		if (r)
4658 			dev_err(adev->dev, "amdgpu asic init failed\n");
4659 	}
4660 
4661 	r = amdgpu_device_ip_resume(adev);
4662 
4663 	if (r) {
4664 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4665 		goto exit;
4666 	}
4667 	amdgpu_fence_driver_hw_init(adev);
4668 
4669 	if (!adev->in_s0ix) {
4670 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4671 		if (r)
4672 			goto exit;
4673 	}
4674 
4675 	r = amdgpu_device_ip_late_init(adev);
4676 	if (r)
4677 		goto exit;
4678 
4679 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4680 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4681 exit:
4682 	if (amdgpu_sriov_vf(adev)) {
4683 		amdgpu_virt_init_data_exchange(adev);
4684 		amdgpu_virt_release_full_gpu(adev, true);
4685 	}
4686 
4687 	if (r)
4688 		return r;
4689 
4690 	/* Make sure IB tests flushed */
4691 	flush_delayed_work(&adev->delayed_init_work);
4692 
4693 	if (fbcon)
4694 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4695 
4696 	amdgpu_ras_resume(adev);
4697 
4698 	if (adev->mode_info.num_crtc) {
4699 		/*
4700 		 * Most of the connector probing functions try to acquire runtime pm
4701 		 * refs to ensure that the GPU is powered on when connector polling is
4702 		 * performed. Since we're calling this from a runtime PM callback,
4703 		 * trying to acquire rpm refs will cause us to deadlock.
4704 		 *
4705 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4706 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4707 		 */
4708 #ifdef CONFIG_PM
4709 		dev->dev->power.disable_depth++;
4710 #endif
4711 		if (!adev->dc_enabled)
4712 			drm_helper_hpd_irq_event(dev);
4713 		else
4714 			drm_kms_helper_hotplug_event(dev);
4715 #ifdef CONFIG_PM
4716 		dev->dev->power.disable_depth--;
4717 #endif
4718 	}
4719 	adev->in_suspend = false;
4720 
4721 	if (adev->enable_mes)
4722 		amdgpu_mes_self_test(adev);
4723 
4724 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4725 		DRM_WARN("smart shift update failed\n");
4726 
4727 	return 0;
4728 }
4729 
4730 /**
4731  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4732  *
4733  * @adev: amdgpu_device pointer
4734  *
4735  * The list of all the hardware IPs that make up the asic is walked and
4736  * the check_soft_reset callbacks are run.  check_soft_reset determines
4737  * if the asic is still hung or not.
4738  * Returns true if any of the IPs are still in a hung state, false if not.
4739  */
4740 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4741 {
4742 	int i;
4743 	bool asic_hang = false;
4744 
4745 	if (amdgpu_sriov_vf(adev))
4746 		return true;
4747 
4748 	if (amdgpu_asic_need_full_reset(adev))
4749 		return true;
4750 
4751 	for (i = 0; i < adev->num_ip_blocks; i++) {
4752 		if (!adev->ip_blocks[i].status.valid)
4753 			continue;
4754 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4755 			adev->ip_blocks[i].status.hang =
4756 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4757 		if (adev->ip_blocks[i].status.hang) {
4758 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4759 			asic_hang = true;
4760 		}
4761 	}
4762 	return asic_hang;
4763 }
4764 
4765 /**
4766  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4767  *
4768  * @adev: amdgpu_device pointer
4769  *
4770  * The list of all the hardware IPs that make up the asic is walked and the
4771  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4772  * handles any IP specific hardware or software state changes that are
4773  * necessary for a soft reset to succeed.
4774  * Returns 0 on success, negative error code on failure.
4775  */
4776 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4777 {
4778 	int i, r = 0;
4779 
4780 	for (i = 0; i < adev->num_ip_blocks; i++) {
4781 		if (!adev->ip_blocks[i].status.valid)
4782 			continue;
4783 		if (adev->ip_blocks[i].status.hang &&
4784 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4785 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4786 			if (r)
4787 				return r;
4788 		}
4789 	}
4790 
4791 	return 0;
4792 }
4793 
4794 /**
4795  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4796  *
4797  * @adev: amdgpu_device pointer
4798  *
4799  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4800  * reset is necessary to recover.
4801  * Returns true if a full asic reset is required, false if not.
4802  */
4803 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4804 {
4805 	int i;
4806 
4807 	if (amdgpu_asic_need_full_reset(adev))
4808 		return true;
4809 
4810 	for (i = 0; i < adev->num_ip_blocks; i++) {
4811 		if (!adev->ip_blocks[i].status.valid)
4812 			continue;
4813 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4814 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4815 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4816 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4817 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4818 			if (adev->ip_blocks[i].status.hang) {
4819 				dev_info(adev->dev, "Some block need full reset!\n");
4820 				return true;
4821 			}
4822 		}
4823 	}
4824 	return false;
4825 }
4826 
4827 /**
4828  * amdgpu_device_ip_soft_reset - do a soft reset
4829  *
4830  * @adev: amdgpu_device pointer
4831  *
4832  * The list of all the hardware IPs that make up the asic is walked and the
4833  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4834  * IP specific hardware or software state changes that are necessary to soft
4835  * reset the IP.
4836  * Returns 0 on success, negative error code on failure.
4837  */
4838 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4839 {
4840 	int i, r = 0;
4841 
4842 	for (i = 0; i < adev->num_ip_blocks; i++) {
4843 		if (!adev->ip_blocks[i].status.valid)
4844 			continue;
4845 		if (adev->ip_blocks[i].status.hang &&
4846 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4847 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4848 			if (r)
4849 				return r;
4850 		}
4851 	}
4852 
4853 	return 0;
4854 }
4855 
4856 /**
4857  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4858  *
4859  * @adev: amdgpu_device pointer
4860  *
4861  * The list of all the hardware IPs that make up the asic is walked and the
4862  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4863  * handles any IP specific hardware or software state changes that are
4864  * necessary after the IP has been soft reset.
4865  * Returns 0 on success, negative error code on failure.
4866  */
4867 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4868 {
4869 	int i, r = 0;
4870 
4871 	for (i = 0; i < adev->num_ip_blocks; i++) {
4872 		if (!adev->ip_blocks[i].status.valid)
4873 			continue;
4874 		if (adev->ip_blocks[i].status.hang &&
4875 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4876 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4877 		if (r)
4878 			return r;
4879 	}
4880 
4881 	return 0;
4882 }
4883 
4884 /**
4885  * amdgpu_device_recover_vram - Recover some VRAM contents
4886  *
4887  * @adev: amdgpu_device pointer
4888  *
4889  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4890  * restore things like GPUVM page tables after a GPU reset where
4891  * the contents of VRAM might be lost.
4892  *
4893  * Returns:
4894  * 0 on success, negative error code on failure.
4895  */
4896 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4897 {
4898 	struct dma_fence *fence = NULL, *next = NULL;
4899 	struct amdgpu_bo *shadow;
4900 	struct amdgpu_bo_vm *vmbo;
4901 	long r = 1, tmo;
4902 
4903 	if (amdgpu_sriov_runtime(adev))
4904 		tmo = msecs_to_jiffies(8000);
4905 	else
4906 		tmo = msecs_to_jiffies(100);
4907 
4908 	dev_info(adev->dev, "recover vram bo from shadow start\n");
4909 	mutex_lock(&adev->shadow_list_lock);
4910 	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4911 		/* If vm is compute context or adev is APU, shadow will be NULL */
4912 		if (!vmbo->shadow)
4913 			continue;
4914 		shadow = vmbo->shadow;
4915 
4916 		/* No need to recover an evicted BO */
4917 		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4918 		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4919 		    shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4920 			continue;
4921 
4922 		r = amdgpu_bo_restore_shadow(shadow, &next);
4923 		if (r)
4924 			break;
4925 
4926 		if (fence) {
4927 			tmo = dma_fence_wait_timeout(fence, false, tmo);
4928 			dma_fence_put(fence);
4929 			fence = next;
4930 			if (tmo == 0) {
4931 				r = -ETIMEDOUT;
4932 				break;
4933 			} else if (tmo < 0) {
4934 				r = tmo;
4935 				break;
4936 			}
4937 		} else {
4938 			fence = next;
4939 		}
4940 	}
4941 	mutex_unlock(&adev->shadow_list_lock);
4942 
4943 	if (fence)
4944 		tmo = dma_fence_wait_timeout(fence, false, tmo);
4945 	dma_fence_put(fence);
4946 
4947 	if (r < 0 || tmo <= 0) {
4948 		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4949 		return -EIO;
4950 	}
4951 
4952 	dev_info(adev->dev, "recover vram bo from shadow done\n");
4953 	return 0;
4954 }
4955 
4956 
4957 /**
4958  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4959  *
4960  * @adev: amdgpu_device pointer
4961  * @from_hypervisor: request from hypervisor
4962  *
4963  * do VF FLR and reinitialize Asic
4964  * return 0 means succeeded otherwise failed
4965  */
4966 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4967 				     bool from_hypervisor)
4968 {
4969 	int r;
4970 	struct amdgpu_hive_info *hive = NULL;
4971 	int retry_limit = 0;
4972 
4973 retry:
4974 	amdgpu_amdkfd_pre_reset(adev);
4975 
4976 	amdgpu_device_stop_pending_resets(adev);
4977 
4978 	if (from_hypervisor)
4979 		r = amdgpu_virt_request_full_gpu(adev, true);
4980 	else
4981 		r = amdgpu_virt_reset_gpu(adev);
4982 	if (r)
4983 		return r;
4984 	amdgpu_irq_gpu_reset_resume_helper(adev);
4985 
4986 	/* some sw clean up VF needs to do before recover */
4987 	amdgpu_virt_post_reset(adev);
4988 
4989 	/* Resume IP prior to SMC */
4990 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4991 	if (r)
4992 		goto error;
4993 
4994 	amdgpu_virt_init_data_exchange(adev);
4995 
4996 	r = amdgpu_device_fw_loading(adev);
4997 	if (r)
4998 		return r;
4999 
5000 	/* now we are okay to resume SMC/CP/SDMA */
5001 	r = amdgpu_device_ip_reinit_late_sriov(adev);
5002 	if (r)
5003 		goto error;
5004 
5005 	hive = amdgpu_get_xgmi_hive(adev);
5006 	/* Update PSP FW topology after reset */
5007 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5008 		r = amdgpu_xgmi_update_topology(hive, adev);
5009 
5010 	if (hive)
5011 		amdgpu_put_xgmi_hive(hive);
5012 
5013 	if (!r) {
5014 		r = amdgpu_ib_ring_tests(adev);
5015 
5016 		amdgpu_amdkfd_post_reset(adev);
5017 	}
5018 
5019 error:
5020 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
5021 		amdgpu_inc_vram_lost(adev);
5022 		r = amdgpu_device_recover_vram(adev);
5023 	}
5024 	amdgpu_virt_release_full_gpu(adev, true);
5025 
5026 	if (AMDGPU_RETRY_SRIOV_RESET(r)) {
5027 		if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
5028 			retry_limit++;
5029 			goto retry;
5030 		} else
5031 			DRM_ERROR("GPU reset retry is beyond the retry limit\n");
5032 	}
5033 
5034 	return r;
5035 }
5036 
5037 /**
5038  * amdgpu_device_has_job_running - check if there is any job in mirror list
5039  *
5040  * @adev: amdgpu_device pointer
5041  *
5042  * check if there is any job in mirror list
5043  */
5044 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5045 {
5046 	int i;
5047 	struct drm_sched_job *job;
5048 
5049 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5050 		struct amdgpu_ring *ring = adev->rings[i];
5051 
5052 		if (!amdgpu_ring_sched_ready(ring))
5053 			continue;
5054 
5055 		spin_lock(&ring->sched.job_list_lock);
5056 		job = list_first_entry_or_null(&ring->sched.pending_list,
5057 					       struct drm_sched_job, list);
5058 		spin_unlock(&ring->sched.job_list_lock);
5059 		if (job)
5060 			return true;
5061 	}
5062 	return false;
5063 }
5064 
5065 /**
5066  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5067  *
5068  * @adev: amdgpu_device pointer
5069  *
5070  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5071  * a hung GPU.
5072  */
5073 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5074 {
5075 
5076 	if (amdgpu_gpu_recovery == 0)
5077 		goto disabled;
5078 
5079 	/* Skip soft reset check in fatal error mode */
5080 	if (!amdgpu_ras_is_poison_mode_supported(adev))
5081 		return true;
5082 
5083 	if (amdgpu_sriov_vf(adev))
5084 		return true;
5085 
5086 	if (amdgpu_gpu_recovery == -1) {
5087 		switch (adev->asic_type) {
5088 #ifdef CONFIG_DRM_AMDGPU_SI
5089 		case CHIP_VERDE:
5090 		case CHIP_TAHITI:
5091 		case CHIP_PITCAIRN:
5092 		case CHIP_OLAND:
5093 		case CHIP_HAINAN:
5094 #endif
5095 #ifdef CONFIG_DRM_AMDGPU_CIK
5096 		case CHIP_KAVERI:
5097 		case CHIP_KABINI:
5098 		case CHIP_MULLINS:
5099 #endif
5100 		case CHIP_CARRIZO:
5101 		case CHIP_STONEY:
5102 		case CHIP_CYAN_SKILLFISH:
5103 			goto disabled;
5104 		default:
5105 			break;
5106 		}
5107 	}
5108 
5109 	return true;
5110 
5111 disabled:
5112 		dev_info(adev->dev, "GPU recovery disabled.\n");
5113 		return false;
5114 }
5115 
5116 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5117 {
5118 	u32 i;
5119 	int ret = 0;
5120 
5121 	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5122 
5123 	dev_info(adev->dev, "GPU mode1 reset\n");
5124 
5125 	/* disable BM */
5126 	pci_clear_master(adev->pdev);
5127 
5128 	amdgpu_device_cache_pci_state(adev->pdev);
5129 
5130 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5131 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5132 		ret = amdgpu_dpm_mode1_reset(adev);
5133 	} else {
5134 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5135 		ret = psp_gpu_reset(adev);
5136 	}
5137 
5138 	if (ret)
5139 		goto mode1_reset_failed;
5140 
5141 	amdgpu_device_load_pci_state(adev->pdev);
5142 	ret = amdgpu_psp_wait_for_bootloader(adev);
5143 	if (ret)
5144 		goto mode1_reset_failed;
5145 
5146 	/* wait for asic to come out of reset */
5147 	for (i = 0; i < adev->usec_timeout; i++) {
5148 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5149 
5150 		if (memsize != 0xffffffff)
5151 			break;
5152 		udelay(1);
5153 	}
5154 
5155 	if (i >= adev->usec_timeout) {
5156 		ret = -ETIMEDOUT;
5157 		goto mode1_reset_failed;
5158 	}
5159 
5160 	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5161 
5162 	return 0;
5163 
5164 mode1_reset_failed:
5165 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5166 	return ret;
5167 }
5168 
5169 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5170 				 struct amdgpu_reset_context *reset_context)
5171 {
5172 	int i, r = 0;
5173 	struct amdgpu_job *job = NULL;
5174 	bool need_full_reset =
5175 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5176 
5177 	if (reset_context->reset_req_dev == adev)
5178 		job = reset_context->job;
5179 
5180 	if (amdgpu_sriov_vf(adev)) {
5181 		/* stop the data exchange thread */
5182 		amdgpu_virt_fini_data_exchange(adev);
5183 	}
5184 
5185 	amdgpu_fence_driver_isr_toggle(adev, true);
5186 
5187 	/* block all schedulers and reset given job's ring */
5188 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5189 		struct amdgpu_ring *ring = adev->rings[i];
5190 
5191 		if (!amdgpu_ring_sched_ready(ring))
5192 			continue;
5193 
5194 		/* Clear job fence from fence drv to avoid force_completion
5195 		 * leave NULL and vm flush fence in fence drv
5196 		 */
5197 		amdgpu_fence_driver_clear_job_fences(ring);
5198 
5199 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5200 		amdgpu_fence_driver_force_completion(ring);
5201 	}
5202 
5203 	amdgpu_fence_driver_isr_toggle(adev, false);
5204 
5205 	if (job && job->vm)
5206 		drm_sched_increase_karma(&job->base);
5207 
5208 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5209 	/* If reset handler not implemented, continue; otherwise return */
5210 	if (r == -EOPNOTSUPP)
5211 		r = 0;
5212 	else
5213 		return r;
5214 
5215 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5216 	if (!amdgpu_sriov_vf(adev)) {
5217 
5218 		if (!need_full_reset)
5219 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5220 
5221 		if (!need_full_reset && amdgpu_gpu_recovery &&
5222 		    amdgpu_device_ip_check_soft_reset(adev)) {
5223 			amdgpu_device_ip_pre_soft_reset(adev);
5224 			r = amdgpu_device_ip_soft_reset(adev);
5225 			amdgpu_device_ip_post_soft_reset(adev);
5226 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5227 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5228 				need_full_reset = true;
5229 			}
5230 		}
5231 
5232 		if (need_full_reset)
5233 			r = amdgpu_device_ip_suspend(adev);
5234 		if (need_full_reset)
5235 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5236 		else
5237 			clear_bit(AMDGPU_NEED_FULL_RESET,
5238 				  &reset_context->flags);
5239 	}
5240 
5241 	return r;
5242 }
5243 
5244 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5245 {
5246 	int i;
5247 
5248 	lockdep_assert_held(&adev->reset_domain->sem);
5249 
5250 	for (i = 0; i < adev->reset_info.num_regs; i++) {
5251 		adev->reset_info.reset_dump_reg_value[i] =
5252 			RREG32(adev->reset_info.reset_dump_reg_list[i]);
5253 
5254 		trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5255 					     adev->reset_info.reset_dump_reg_value[i]);
5256 	}
5257 
5258 	return 0;
5259 }
5260 
5261 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5262 			 struct amdgpu_reset_context *reset_context)
5263 {
5264 	struct amdgpu_device *tmp_adev = NULL;
5265 	bool need_full_reset, skip_hw_reset, vram_lost = false;
5266 	int r = 0;
5267 
5268 	/* Try reset handler method first */
5269 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5270 				    reset_list);
5271 	amdgpu_reset_reg_dumps(tmp_adev);
5272 
5273 	reset_context->reset_device_list = device_list_handle;
5274 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5275 	/* If reset handler not implemented, continue; otherwise return */
5276 	if (r == -EOPNOTSUPP)
5277 		r = 0;
5278 	else
5279 		return r;
5280 
5281 	/* Reset handler not implemented, use the default method */
5282 	need_full_reset =
5283 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5284 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5285 
5286 	/*
5287 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5288 	 * to allow proper links negotiation in FW (within 1 sec)
5289 	 */
5290 	if (!skip_hw_reset && need_full_reset) {
5291 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5292 			/* For XGMI run all resets in parallel to speed up the process */
5293 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5294 				tmp_adev->gmc.xgmi.pending_reset = false;
5295 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5296 					r = -EALREADY;
5297 			} else
5298 				r = amdgpu_asic_reset(tmp_adev);
5299 
5300 			if (r) {
5301 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5302 					 r, adev_to_drm(tmp_adev)->unique);
5303 				goto out;
5304 			}
5305 		}
5306 
5307 		/* For XGMI wait for all resets to complete before proceed */
5308 		if (!r) {
5309 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5310 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5311 					flush_work(&tmp_adev->xgmi_reset_work);
5312 					r = tmp_adev->asic_reset_res;
5313 					if (r)
5314 						break;
5315 				}
5316 			}
5317 		}
5318 	}
5319 
5320 	if (!r && amdgpu_ras_intr_triggered()) {
5321 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5322 			amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5323 		}
5324 
5325 		amdgpu_ras_intr_cleared();
5326 	}
5327 
5328 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5329 		if (need_full_reset) {
5330 			/* post card */
5331 			amdgpu_ras_set_fed(tmp_adev, false);
5332 			r = amdgpu_device_asic_init(tmp_adev);
5333 			if (r) {
5334 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5335 			} else {
5336 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5337 
5338 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5339 				if (r)
5340 					goto out;
5341 
5342 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5343 
5344 				amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5345 
5346 				if (vram_lost) {
5347 					DRM_INFO("VRAM is lost due to GPU reset!\n");
5348 					amdgpu_inc_vram_lost(tmp_adev);
5349 				}
5350 
5351 				r = amdgpu_device_fw_loading(tmp_adev);
5352 				if (r)
5353 					return r;
5354 
5355 				r = amdgpu_xcp_restore_partition_mode(
5356 					tmp_adev->xcp_mgr);
5357 				if (r)
5358 					goto out;
5359 
5360 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5361 				if (r)
5362 					goto out;
5363 
5364 				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5365 					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5366 
5367 				if (vram_lost)
5368 					amdgpu_device_fill_reset_magic(tmp_adev);
5369 
5370 				/*
5371 				 * Add this ASIC as tracked as reset was already
5372 				 * complete successfully.
5373 				 */
5374 				amdgpu_register_gpu_instance(tmp_adev);
5375 
5376 				if (!reset_context->hive &&
5377 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5378 					amdgpu_xgmi_add_device(tmp_adev);
5379 
5380 				r = amdgpu_device_ip_late_init(tmp_adev);
5381 				if (r)
5382 					goto out;
5383 
5384 				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5385 
5386 				/*
5387 				 * The GPU enters bad state once faulty pages
5388 				 * by ECC has reached the threshold, and ras
5389 				 * recovery is scheduled next. So add one check
5390 				 * here to break recovery if it indeed exceeds
5391 				 * bad page threshold, and remind user to
5392 				 * retire this GPU or setting one bigger
5393 				 * bad_page_threshold value to fix this once
5394 				 * probing driver again.
5395 				 */
5396 				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5397 					/* must succeed. */
5398 					amdgpu_ras_resume(tmp_adev);
5399 				} else {
5400 					r = -EINVAL;
5401 					goto out;
5402 				}
5403 
5404 				/* Update PSP FW topology after reset */
5405 				if (reset_context->hive &&
5406 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5407 					r = amdgpu_xgmi_update_topology(
5408 						reset_context->hive, tmp_adev);
5409 			}
5410 		}
5411 
5412 out:
5413 		if (!r) {
5414 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5415 			r = amdgpu_ib_ring_tests(tmp_adev);
5416 			if (r) {
5417 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5418 				need_full_reset = true;
5419 				r = -EAGAIN;
5420 				goto end;
5421 			}
5422 		}
5423 
5424 		if (!r)
5425 			r = amdgpu_device_recover_vram(tmp_adev);
5426 		else
5427 			tmp_adev->asic_reset_res = r;
5428 	}
5429 
5430 end:
5431 	if (need_full_reset)
5432 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5433 	else
5434 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5435 	return r;
5436 }
5437 
5438 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5439 {
5440 
5441 	switch (amdgpu_asic_reset_method(adev)) {
5442 	case AMD_RESET_METHOD_MODE1:
5443 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5444 		break;
5445 	case AMD_RESET_METHOD_MODE2:
5446 		adev->mp1_state = PP_MP1_STATE_RESET;
5447 		break;
5448 	default:
5449 		adev->mp1_state = PP_MP1_STATE_NONE;
5450 		break;
5451 	}
5452 }
5453 
5454 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5455 {
5456 	amdgpu_vf_error_trans_all(adev);
5457 	adev->mp1_state = PP_MP1_STATE_NONE;
5458 }
5459 
5460 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5461 {
5462 	struct pci_dev *p = NULL;
5463 
5464 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5465 			adev->pdev->bus->number, 1);
5466 	if (p) {
5467 		pm_runtime_enable(&(p->dev));
5468 		pm_runtime_resume(&(p->dev));
5469 	}
5470 
5471 	pci_dev_put(p);
5472 }
5473 
5474 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5475 {
5476 	enum amd_reset_method reset_method;
5477 	struct pci_dev *p = NULL;
5478 	u64 expires;
5479 
5480 	/*
5481 	 * For now, only BACO and mode1 reset are confirmed
5482 	 * to suffer the audio issue without proper suspended.
5483 	 */
5484 	reset_method = amdgpu_asic_reset_method(adev);
5485 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5486 	     (reset_method != AMD_RESET_METHOD_MODE1))
5487 		return -EINVAL;
5488 
5489 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5490 			adev->pdev->bus->number, 1);
5491 	if (!p)
5492 		return -ENODEV;
5493 
5494 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5495 	if (!expires)
5496 		/*
5497 		 * If we cannot get the audio device autosuspend delay,
5498 		 * a fixed 4S interval will be used. Considering 3S is
5499 		 * the audio controller default autosuspend delay setting.
5500 		 * 4S used here is guaranteed to cover that.
5501 		 */
5502 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5503 
5504 	while (!pm_runtime_status_suspended(&(p->dev))) {
5505 		if (!pm_runtime_suspend(&(p->dev)))
5506 			break;
5507 
5508 		if (expires < ktime_get_mono_fast_ns()) {
5509 			dev_warn(adev->dev, "failed to suspend display audio\n");
5510 			pci_dev_put(p);
5511 			/* TODO: abort the succeeding gpu reset? */
5512 			return -ETIMEDOUT;
5513 		}
5514 	}
5515 
5516 	pm_runtime_disable(&(p->dev));
5517 
5518 	pci_dev_put(p);
5519 	return 0;
5520 }
5521 
5522 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5523 {
5524 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5525 
5526 #if defined(CONFIG_DEBUG_FS)
5527 	if (!amdgpu_sriov_vf(adev))
5528 		cancel_work(&adev->reset_work);
5529 #endif
5530 
5531 	if (adev->kfd.dev)
5532 		cancel_work(&adev->kfd.reset_work);
5533 
5534 	if (amdgpu_sriov_vf(adev))
5535 		cancel_work(&adev->virt.flr_work);
5536 
5537 	if (con && adev->ras_enabled)
5538 		cancel_work(&con->recovery_work);
5539 
5540 }
5541 
5542 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5543 {
5544 	struct amdgpu_device *tmp_adev;
5545 	int ret = 0;
5546 	u32 status;
5547 
5548 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5549 		pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5550 		if (PCI_POSSIBLE_ERROR(status)) {
5551 			dev_err(tmp_adev->dev, "device lost from bus!");
5552 			ret = -ENODEV;
5553 		}
5554 	}
5555 
5556 	return ret;
5557 }
5558 
5559 /**
5560  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5561  *
5562  * @adev: amdgpu_device pointer
5563  * @job: which job trigger hang
5564  * @reset_context: amdgpu reset context pointer
5565  *
5566  * Attempt to reset the GPU if it has hung (all asics).
5567  * Attempt to do soft-reset or full-reset and reinitialize Asic
5568  * Returns 0 for success or an error on failure.
5569  */
5570 
5571 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5572 			      struct amdgpu_job *job,
5573 			      struct amdgpu_reset_context *reset_context)
5574 {
5575 	struct list_head device_list, *device_list_handle =  NULL;
5576 	bool job_signaled = false;
5577 	struct amdgpu_hive_info *hive = NULL;
5578 	struct amdgpu_device *tmp_adev = NULL;
5579 	int i, r = 0;
5580 	bool need_emergency_restart = false;
5581 	bool audio_suspended = false;
5582 
5583 	/*
5584 	 * Special case: RAS triggered and full reset isn't supported
5585 	 */
5586 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5587 
5588 	/*
5589 	 * Flush RAM to disk so that after reboot
5590 	 * the user can read log and see why the system rebooted.
5591 	 */
5592 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5593 		amdgpu_ras_get_context(adev)->reboot) {
5594 		DRM_WARN("Emergency reboot.");
5595 
5596 		ksys_sync_helper();
5597 		emergency_restart();
5598 	}
5599 
5600 	dev_info(adev->dev, "GPU %s begin!\n",
5601 		need_emergency_restart ? "jobs stop":"reset");
5602 
5603 	if (!amdgpu_sriov_vf(adev))
5604 		hive = amdgpu_get_xgmi_hive(adev);
5605 	if (hive)
5606 		mutex_lock(&hive->hive_lock);
5607 
5608 	reset_context->job = job;
5609 	reset_context->hive = hive;
5610 	/*
5611 	 * Build list of devices to reset.
5612 	 * In case we are in XGMI hive mode, resort the device list
5613 	 * to put adev in the 1st position.
5614 	 */
5615 	INIT_LIST_HEAD(&device_list);
5616 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5617 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5618 			list_add_tail(&tmp_adev->reset_list, &device_list);
5619 			if (adev->shutdown)
5620 				tmp_adev->shutdown = true;
5621 		}
5622 		if (!list_is_first(&adev->reset_list, &device_list))
5623 			list_rotate_to_front(&adev->reset_list, &device_list);
5624 		device_list_handle = &device_list;
5625 	} else {
5626 		list_add_tail(&adev->reset_list, &device_list);
5627 		device_list_handle = &device_list;
5628 	}
5629 
5630 	if (!amdgpu_sriov_vf(adev)) {
5631 		r = amdgpu_device_health_check(device_list_handle);
5632 		if (r)
5633 			goto end_reset;
5634 	}
5635 
5636 	/* We need to lock reset domain only once both for XGMI and single device */
5637 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5638 				    reset_list);
5639 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5640 
5641 	/* block all schedulers and reset given job's ring */
5642 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5643 
5644 		amdgpu_device_set_mp1_state(tmp_adev);
5645 
5646 		/*
5647 		 * Try to put the audio codec into suspend state
5648 		 * before gpu reset started.
5649 		 *
5650 		 * Due to the power domain of the graphics device
5651 		 * is shared with AZ power domain. Without this,
5652 		 * we may change the audio hardware from behind
5653 		 * the audio driver's back. That will trigger
5654 		 * some audio codec errors.
5655 		 */
5656 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5657 			audio_suspended = true;
5658 
5659 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5660 
5661 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5662 
5663 		if (!amdgpu_sriov_vf(tmp_adev))
5664 			amdgpu_amdkfd_pre_reset(tmp_adev);
5665 
5666 		/*
5667 		 * Mark these ASICs to be reseted as untracked first
5668 		 * And add them back after reset completed
5669 		 */
5670 		amdgpu_unregister_gpu_instance(tmp_adev);
5671 
5672 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5673 
5674 		/* disable ras on ALL IPs */
5675 		if (!need_emergency_restart &&
5676 		      amdgpu_device_ip_need_full_reset(tmp_adev))
5677 			amdgpu_ras_suspend(tmp_adev);
5678 
5679 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5680 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5681 
5682 			if (!amdgpu_ring_sched_ready(ring))
5683 				continue;
5684 
5685 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5686 
5687 			if (need_emergency_restart)
5688 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5689 		}
5690 		atomic_inc(&tmp_adev->gpu_reset_counter);
5691 	}
5692 
5693 	if (need_emergency_restart)
5694 		goto skip_sched_resume;
5695 
5696 	/*
5697 	 * Must check guilty signal here since after this point all old
5698 	 * HW fences are force signaled.
5699 	 *
5700 	 * job->base holds a reference to parent fence
5701 	 */
5702 	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5703 		job_signaled = true;
5704 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5705 		goto skip_hw_reset;
5706 	}
5707 
5708 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5709 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5710 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5711 		/*TODO Should we stop ?*/
5712 		if (r) {
5713 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5714 				  r, adev_to_drm(tmp_adev)->unique);
5715 			tmp_adev->asic_reset_res = r;
5716 		}
5717 
5718 		if (!amdgpu_sriov_vf(tmp_adev))
5719 			/*
5720 			* Drop all pending non scheduler resets. Scheduler resets
5721 			* were already dropped during drm_sched_stop
5722 			*/
5723 			amdgpu_device_stop_pending_resets(tmp_adev);
5724 	}
5725 
5726 	/* Actual ASIC resets if needed.*/
5727 	/* Host driver will handle XGMI hive reset for SRIOV */
5728 	if (amdgpu_sriov_vf(adev)) {
5729 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
5730 		if (r)
5731 			adev->asic_reset_res = r;
5732 
5733 		/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5734 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5735 			    IP_VERSION(9, 4, 2) ||
5736 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5737 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5738 			amdgpu_ras_resume(adev);
5739 	} else {
5740 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5741 		if (r && r == -EAGAIN)
5742 			goto retry;
5743 	}
5744 
5745 skip_hw_reset:
5746 
5747 	/* Post ASIC reset for all devs .*/
5748 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5749 
5750 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5751 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5752 
5753 			if (!amdgpu_ring_sched_ready(ring))
5754 				continue;
5755 
5756 			drm_sched_start(&ring->sched, true);
5757 		}
5758 
5759 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5760 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5761 
5762 		if (tmp_adev->asic_reset_res)
5763 			r = tmp_adev->asic_reset_res;
5764 
5765 		tmp_adev->asic_reset_res = 0;
5766 
5767 		if (r) {
5768 			/* bad news, how to tell it to userspace ? */
5769 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5770 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5771 		} else {
5772 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5773 			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5774 				DRM_WARN("smart shift update failed\n");
5775 		}
5776 	}
5777 
5778 skip_sched_resume:
5779 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5780 		/* unlock kfd: SRIOV would do it separately */
5781 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5782 			amdgpu_amdkfd_post_reset(tmp_adev);
5783 
5784 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5785 		 * need to bring up kfd here if it's not be initialized before
5786 		 */
5787 		if (!adev->kfd.init_complete)
5788 			amdgpu_amdkfd_device_init(adev);
5789 
5790 		if (audio_suspended)
5791 			amdgpu_device_resume_display_audio(tmp_adev);
5792 
5793 		amdgpu_device_unset_mp1_state(tmp_adev);
5794 
5795 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5796 	}
5797 
5798 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5799 					    reset_list);
5800 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5801 
5802 end_reset:
5803 	if (hive) {
5804 		mutex_unlock(&hive->hive_lock);
5805 		amdgpu_put_xgmi_hive(hive);
5806 	}
5807 
5808 	if (r)
5809 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5810 
5811 	atomic_set(&adev->reset_domain->reset_res, r);
5812 	return r;
5813 }
5814 
5815 /**
5816  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5817  *
5818  * @adev: amdgpu_device pointer
5819  * @speed: pointer to the speed of the link
5820  * @width: pointer to the width of the link
5821  *
5822  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5823  * first physical partner to an AMD dGPU.
5824  * This will exclude any virtual switches and links.
5825  */
5826 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5827 					    enum pci_bus_speed *speed,
5828 					    enum pcie_link_width *width)
5829 {
5830 	struct pci_dev *parent = adev->pdev;
5831 
5832 	if (!speed || !width)
5833 		return;
5834 
5835 	*speed = PCI_SPEED_UNKNOWN;
5836 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5837 
5838 	while ((parent = pci_upstream_bridge(parent))) {
5839 		/* skip upstream/downstream switches internal to dGPU*/
5840 		if (parent->vendor == PCI_VENDOR_ID_ATI)
5841 			continue;
5842 		*speed = pcie_get_speed_cap(parent);
5843 		*width = pcie_get_width_cap(parent);
5844 		break;
5845 	}
5846 }
5847 
5848 /**
5849  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5850  *
5851  * @adev: amdgpu_device pointer
5852  *
5853  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5854  * and lanes) of the slot the device is in. Handles APUs and
5855  * virtualized environments where PCIE config space may not be available.
5856  */
5857 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5858 {
5859 	struct pci_dev *pdev;
5860 	enum pci_bus_speed speed_cap, platform_speed_cap;
5861 	enum pcie_link_width platform_link_width;
5862 
5863 	if (amdgpu_pcie_gen_cap)
5864 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5865 
5866 	if (amdgpu_pcie_lane_cap)
5867 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5868 
5869 	/* covers APUs as well */
5870 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5871 		if (adev->pm.pcie_gen_mask == 0)
5872 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5873 		if (adev->pm.pcie_mlw_mask == 0)
5874 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5875 		return;
5876 	}
5877 
5878 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5879 		return;
5880 
5881 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5882 					&platform_link_width);
5883 
5884 	if (adev->pm.pcie_gen_mask == 0) {
5885 		/* asic caps */
5886 		pdev = adev->pdev;
5887 		speed_cap = pcie_get_speed_cap(pdev);
5888 		if (speed_cap == PCI_SPEED_UNKNOWN) {
5889 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5890 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5891 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5892 		} else {
5893 			if (speed_cap == PCIE_SPEED_32_0GT)
5894 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5895 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5896 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5897 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5898 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5899 			else if (speed_cap == PCIE_SPEED_16_0GT)
5900 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5901 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5902 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5903 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5904 			else if (speed_cap == PCIE_SPEED_8_0GT)
5905 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5906 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5907 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5908 			else if (speed_cap == PCIE_SPEED_5_0GT)
5909 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5910 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5911 			else
5912 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5913 		}
5914 		/* platform caps */
5915 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5916 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5917 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5918 		} else {
5919 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
5920 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5921 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5922 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5923 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5924 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5925 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5926 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5927 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5928 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5929 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5930 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5931 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5932 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5933 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5934 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5935 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5936 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5937 			else
5938 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5939 
5940 		}
5941 	}
5942 	if (adev->pm.pcie_mlw_mask == 0) {
5943 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5944 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5945 		} else {
5946 			switch (platform_link_width) {
5947 			case PCIE_LNK_X32:
5948 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5949 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5950 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5951 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5952 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5953 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5954 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5955 				break;
5956 			case PCIE_LNK_X16:
5957 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5958 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5959 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5960 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5961 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5962 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5963 				break;
5964 			case PCIE_LNK_X12:
5965 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5966 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5967 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5968 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5969 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5970 				break;
5971 			case PCIE_LNK_X8:
5972 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5973 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5974 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5975 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5976 				break;
5977 			case PCIE_LNK_X4:
5978 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5979 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5980 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5981 				break;
5982 			case PCIE_LNK_X2:
5983 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5984 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5985 				break;
5986 			case PCIE_LNK_X1:
5987 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5988 				break;
5989 			default:
5990 				break;
5991 			}
5992 		}
5993 	}
5994 }
5995 
5996 /**
5997  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5998  *
5999  * @adev: amdgpu_device pointer
6000  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6001  *
6002  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6003  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6004  * @peer_adev.
6005  */
6006 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6007 				      struct amdgpu_device *peer_adev)
6008 {
6009 #ifdef CONFIG_HSA_AMD_P2P
6010 	uint64_t address_mask = peer_adev->dev->dma_mask ?
6011 		~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6012 	resource_size_t aper_limit =
6013 		adev->gmc.aper_base + adev->gmc.aper_size - 1;
6014 	bool p2p_access =
6015 		!adev->gmc.xgmi.connected_to_cpu &&
6016 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6017 
6018 	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6019 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6020 		!(adev->gmc.aper_base & address_mask ||
6021 		  aper_limit & address_mask));
6022 #else
6023 	return false;
6024 #endif
6025 }
6026 
6027 int amdgpu_device_baco_enter(struct drm_device *dev)
6028 {
6029 	struct amdgpu_device *adev = drm_to_adev(dev);
6030 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6031 
6032 	if (!amdgpu_device_supports_baco(dev))
6033 		return -ENOTSUPP;
6034 
6035 	if (ras && adev->ras_enabled &&
6036 	    adev->nbio.funcs->enable_doorbell_interrupt)
6037 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6038 
6039 	return amdgpu_dpm_baco_enter(adev);
6040 }
6041 
6042 int amdgpu_device_baco_exit(struct drm_device *dev)
6043 {
6044 	struct amdgpu_device *adev = drm_to_adev(dev);
6045 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6046 	int ret = 0;
6047 
6048 	if (!amdgpu_device_supports_baco(dev))
6049 		return -ENOTSUPP;
6050 
6051 	ret = amdgpu_dpm_baco_exit(adev);
6052 	if (ret)
6053 		return ret;
6054 
6055 	if (ras && adev->ras_enabled &&
6056 	    adev->nbio.funcs->enable_doorbell_interrupt)
6057 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6058 
6059 	if (amdgpu_passthrough(adev) &&
6060 	    adev->nbio.funcs->clear_doorbell_interrupt)
6061 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6062 
6063 	return 0;
6064 }
6065 
6066 /**
6067  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6068  * @pdev: PCI device struct
6069  * @state: PCI channel state
6070  *
6071  * Description: Called when a PCI error is detected.
6072  *
6073  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6074  */
6075 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6076 {
6077 	struct drm_device *dev = pci_get_drvdata(pdev);
6078 	struct amdgpu_device *adev = drm_to_adev(dev);
6079 	int i;
6080 
6081 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6082 
6083 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
6084 		DRM_WARN("No support for XGMI hive yet...");
6085 		return PCI_ERS_RESULT_DISCONNECT;
6086 	}
6087 
6088 	adev->pci_channel_state = state;
6089 
6090 	switch (state) {
6091 	case pci_channel_io_normal:
6092 		return PCI_ERS_RESULT_CAN_RECOVER;
6093 	/* Fatal error, prepare for slot reset */
6094 	case pci_channel_io_frozen:
6095 		/*
6096 		 * Locking adev->reset_domain->sem will prevent any external access
6097 		 * to GPU during PCI error recovery
6098 		 */
6099 		amdgpu_device_lock_reset_domain(adev->reset_domain);
6100 		amdgpu_device_set_mp1_state(adev);
6101 
6102 		/*
6103 		 * Block any work scheduling as we do for regular GPU reset
6104 		 * for the duration of the recovery
6105 		 */
6106 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6107 			struct amdgpu_ring *ring = adev->rings[i];
6108 
6109 			if (!amdgpu_ring_sched_ready(ring))
6110 				continue;
6111 
6112 			drm_sched_stop(&ring->sched, NULL);
6113 		}
6114 		atomic_inc(&adev->gpu_reset_counter);
6115 		return PCI_ERS_RESULT_NEED_RESET;
6116 	case pci_channel_io_perm_failure:
6117 		/* Permanent error, prepare for device removal */
6118 		return PCI_ERS_RESULT_DISCONNECT;
6119 	}
6120 
6121 	return PCI_ERS_RESULT_NEED_RESET;
6122 }
6123 
6124 /**
6125  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6126  * @pdev: pointer to PCI device
6127  */
6128 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6129 {
6130 
6131 	DRM_INFO("PCI error: mmio enabled callback!!\n");
6132 
6133 	/* TODO - dump whatever for debugging purposes */
6134 
6135 	/* This called only if amdgpu_pci_error_detected returns
6136 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6137 	 * works, no need to reset slot.
6138 	 */
6139 
6140 	return PCI_ERS_RESULT_RECOVERED;
6141 }
6142 
6143 /**
6144  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6145  * @pdev: PCI device struct
6146  *
6147  * Description: This routine is called by the pci error recovery
6148  * code after the PCI slot has been reset, just before we
6149  * should resume normal operations.
6150  */
6151 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6152 {
6153 	struct drm_device *dev = pci_get_drvdata(pdev);
6154 	struct amdgpu_device *adev = drm_to_adev(dev);
6155 	int r, i;
6156 	struct amdgpu_reset_context reset_context;
6157 	u32 memsize;
6158 	struct list_head device_list;
6159 	struct amdgpu_hive_info *hive;
6160 	int hive_ras_recovery = 0;
6161 	struct amdgpu_ras *ras;
6162 
6163 	/* PCI error slot reset should be skipped During RAS recovery */
6164 	hive = amdgpu_get_xgmi_hive(adev);
6165 	if (hive) {
6166 		hive_ras_recovery = atomic_read(&hive->ras_recovery);
6167 		amdgpu_put_xgmi_hive(hive);
6168 	}
6169 	ras = amdgpu_ras_get_context(adev);
6170 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
6171 		 ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
6172 		return PCI_ERS_RESULT_RECOVERED;
6173 
6174 	DRM_INFO("PCI error: slot reset callback!!\n");
6175 
6176 	memset(&reset_context, 0, sizeof(reset_context));
6177 
6178 	INIT_LIST_HEAD(&device_list);
6179 	list_add_tail(&adev->reset_list, &device_list);
6180 
6181 	/* wait for asic to come out of reset */
6182 	msleep(500);
6183 
6184 	/* Restore PCI confspace */
6185 	amdgpu_device_load_pci_state(pdev);
6186 
6187 	/* confirm  ASIC came out of reset */
6188 	for (i = 0; i < adev->usec_timeout; i++) {
6189 		memsize = amdgpu_asic_get_config_memsize(adev);
6190 
6191 		if (memsize != 0xffffffff)
6192 			break;
6193 		udelay(1);
6194 	}
6195 	if (memsize == 0xffffffff) {
6196 		r = -ETIME;
6197 		goto out;
6198 	}
6199 
6200 	reset_context.method = AMD_RESET_METHOD_NONE;
6201 	reset_context.reset_req_dev = adev;
6202 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6203 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6204 
6205 	adev->no_hw_access = true;
6206 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6207 	adev->no_hw_access = false;
6208 	if (r)
6209 		goto out;
6210 
6211 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
6212 
6213 out:
6214 	if (!r) {
6215 		if (amdgpu_device_cache_pci_state(adev->pdev))
6216 			pci_restore_state(adev->pdev);
6217 
6218 		DRM_INFO("PCIe error recovery succeeded\n");
6219 	} else {
6220 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
6221 		amdgpu_device_unset_mp1_state(adev);
6222 		amdgpu_device_unlock_reset_domain(adev->reset_domain);
6223 	}
6224 
6225 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6226 }
6227 
6228 /**
6229  * amdgpu_pci_resume() - resume normal ops after PCI reset
6230  * @pdev: pointer to PCI device
6231  *
6232  * Called when the error recovery driver tells us that its
6233  * OK to resume normal operation.
6234  */
6235 void amdgpu_pci_resume(struct pci_dev *pdev)
6236 {
6237 	struct drm_device *dev = pci_get_drvdata(pdev);
6238 	struct amdgpu_device *adev = drm_to_adev(dev);
6239 	int i;
6240 
6241 
6242 	DRM_INFO("PCI error: resume callback!!\n");
6243 
6244 	/* Only continue execution for the case of pci_channel_io_frozen */
6245 	if (adev->pci_channel_state != pci_channel_io_frozen)
6246 		return;
6247 
6248 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6249 		struct amdgpu_ring *ring = adev->rings[i];
6250 
6251 		if (!amdgpu_ring_sched_ready(ring))
6252 			continue;
6253 
6254 		drm_sched_start(&ring->sched, true);
6255 	}
6256 
6257 	amdgpu_device_unset_mp1_state(adev);
6258 	amdgpu_device_unlock_reset_domain(adev->reset_domain);
6259 }
6260 
6261 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6262 {
6263 	struct drm_device *dev = pci_get_drvdata(pdev);
6264 	struct amdgpu_device *adev = drm_to_adev(dev);
6265 	int r;
6266 
6267 	r = pci_save_state(pdev);
6268 	if (!r) {
6269 		kfree(adev->pci_state);
6270 
6271 		adev->pci_state = pci_store_saved_state(pdev);
6272 
6273 		if (!adev->pci_state) {
6274 			DRM_ERROR("Failed to store PCI saved state");
6275 			return false;
6276 		}
6277 	} else {
6278 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
6279 		return false;
6280 	}
6281 
6282 	return true;
6283 }
6284 
6285 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6286 {
6287 	struct drm_device *dev = pci_get_drvdata(pdev);
6288 	struct amdgpu_device *adev = drm_to_adev(dev);
6289 	int r;
6290 
6291 	if (!adev->pci_state)
6292 		return false;
6293 
6294 	r = pci_load_saved_state(pdev, adev->pci_state);
6295 
6296 	if (!r) {
6297 		pci_restore_state(pdev);
6298 	} else {
6299 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
6300 		return false;
6301 	}
6302 
6303 	return true;
6304 }
6305 
6306 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6307 		struct amdgpu_ring *ring)
6308 {
6309 #ifdef CONFIG_X86_64
6310 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6311 		return;
6312 #endif
6313 	if (adev->gmc.xgmi.connected_to_cpu)
6314 		return;
6315 
6316 	if (ring && ring->funcs->emit_hdp_flush)
6317 		amdgpu_ring_emit_hdp_flush(ring);
6318 	else
6319 		amdgpu_asic_flush_hdp(adev, ring);
6320 }
6321 
6322 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6323 		struct amdgpu_ring *ring)
6324 {
6325 #ifdef CONFIG_X86_64
6326 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6327 		return;
6328 #endif
6329 	if (adev->gmc.xgmi.connected_to_cpu)
6330 		return;
6331 
6332 	amdgpu_asic_invalidate_hdp(adev, ring);
6333 }
6334 
6335 int amdgpu_in_reset(struct amdgpu_device *adev)
6336 {
6337 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6338 }
6339 
6340 /**
6341  * amdgpu_device_halt() - bring hardware to some kind of halt state
6342  *
6343  * @adev: amdgpu_device pointer
6344  *
6345  * Bring hardware to some kind of halt state so that no one can touch it
6346  * any more. It will help to maintain error context when error occurred.
6347  * Compare to a simple hang, the system will keep stable at least for SSH
6348  * access. Then it should be trivial to inspect the hardware state and
6349  * see what's going on. Implemented as following:
6350  *
6351  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6352  *    clears all CPU mappings to device, disallows remappings through page faults
6353  * 2. amdgpu_irq_disable_all() disables all interrupts
6354  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6355  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6356  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6357  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6358  *    flush any in flight DMA operations
6359  */
6360 void amdgpu_device_halt(struct amdgpu_device *adev)
6361 {
6362 	struct pci_dev *pdev = adev->pdev;
6363 	struct drm_device *ddev = adev_to_drm(adev);
6364 
6365 	amdgpu_xcp_dev_unplug(adev);
6366 	drm_dev_unplug(ddev);
6367 
6368 	amdgpu_irq_disable_all(adev);
6369 
6370 	amdgpu_fence_driver_hw_fini(adev);
6371 
6372 	adev->no_hw_access = true;
6373 
6374 	amdgpu_device_unmap_mmio(adev);
6375 
6376 	pci_disable_device(pdev);
6377 	pci_wait_for_pending_transaction(pdev);
6378 }
6379 
6380 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6381 				u32 reg)
6382 {
6383 	unsigned long flags, address, data;
6384 	u32 r;
6385 
6386 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6387 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6388 
6389 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6390 	WREG32(address, reg * 4);
6391 	(void)RREG32(address);
6392 	r = RREG32(data);
6393 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6394 	return r;
6395 }
6396 
6397 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6398 				u32 reg, u32 v)
6399 {
6400 	unsigned long flags, address, data;
6401 
6402 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6403 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6404 
6405 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6406 	WREG32(address, reg * 4);
6407 	(void)RREG32(address);
6408 	WREG32(data, v);
6409 	(void)RREG32(data);
6410 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6411 }
6412 
6413 /**
6414  * amdgpu_device_switch_gang - switch to a new gang
6415  * @adev: amdgpu_device pointer
6416  * @gang: the gang to switch to
6417  *
6418  * Try to switch to a new gang.
6419  * Returns: NULL if we switched to the new gang or a reference to the current
6420  * gang leader.
6421  */
6422 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6423 					    struct dma_fence *gang)
6424 {
6425 	struct dma_fence *old = NULL;
6426 
6427 	do {
6428 		dma_fence_put(old);
6429 		rcu_read_lock();
6430 		old = dma_fence_get_rcu_safe(&adev->gang_submit);
6431 		rcu_read_unlock();
6432 
6433 		if (old == gang)
6434 			break;
6435 
6436 		if (!dma_fence_is_signaled(old))
6437 			return old;
6438 
6439 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6440 			 old, gang) != old);
6441 
6442 	dma_fence_put(old);
6443 	return NULL;
6444 }
6445 
6446 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6447 {
6448 	switch (adev->asic_type) {
6449 #ifdef CONFIG_DRM_AMDGPU_SI
6450 	case CHIP_HAINAN:
6451 #endif
6452 	case CHIP_TOPAZ:
6453 		/* chips with no display hardware */
6454 		return false;
6455 #ifdef CONFIG_DRM_AMDGPU_SI
6456 	case CHIP_TAHITI:
6457 	case CHIP_PITCAIRN:
6458 	case CHIP_VERDE:
6459 	case CHIP_OLAND:
6460 #endif
6461 #ifdef CONFIG_DRM_AMDGPU_CIK
6462 	case CHIP_BONAIRE:
6463 	case CHIP_HAWAII:
6464 	case CHIP_KAVERI:
6465 	case CHIP_KABINI:
6466 	case CHIP_MULLINS:
6467 #endif
6468 	case CHIP_TONGA:
6469 	case CHIP_FIJI:
6470 	case CHIP_POLARIS10:
6471 	case CHIP_POLARIS11:
6472 	case CHIP_POLARIS12:
6473 	case CHIP_VEGAM:
6474 	case CHIP_CARRIZO:
6475 	case CHIP_STONEY:
6476 		/* chips with display hardware */
6477 		return true;
6478 	default:
6479 		/* IP discovery */
6480 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6481 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6482 			return false;
6483 		return true;
6484 	}
6485 }
6486 
6487 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6488 		uint32_t inst, uint32_t reg_addr, char reg_name[],
6489 		uint32_t expected_value, uint32_t mask)
6490 {
6491 	uint32_t ret = 0;
6492 	uint32_t old_ = 0;
6493 	uint32_t tmp_ = RREG32(reg_addr);
6494 	uint32_t loop = adev->usec_timeout;
6495 
6496 	while ((tmp_ & (mask)) != (expected_value)) {
6497 		if (old_ != tmp_) {
6498 			loop = adev->usec_timeout;
6499 			old_ = tmp_;
6500 		} else
6501 			udelay(1);
6502 		tmp_ = RREG32(reg_addr);
6503 		loop--;
6504 		if (!loop) {
6505 			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6506 				  inst, reg_name, (uint32_t)expected_value,
6507 				  (uint32_t)(tmp_ & (mask)));
6508 			ret = -ETIMEDOUT;
6509 			break;
6510 		}
6511 	}
6512 	return ret;
6513 }
6514