1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/power_supply.h> 29 #include <linux/kthread.h> 30 #include <linux/module.h> 31 #include <linux/console.h> 32 #include <linux/slab.h> 33 34 #include <drm/drm_atomic_helper.h> 35 #include <drm/drm_probe_helper.h> 36 #include <drm/amdgpu_drm.h> 37 #include <linux/vgaarb.h> 38 #include <linux/vga_switcheroo.h> 39 #include <linux/efi.h> 40 #include "amdgpu.h" 41 #include "amdgpu_trace.h" 42 #include "amdgpu_i2c.h" 43 #include "atom.h" 44 #include "amdgpu_atombios.h" 45 #include "amdgpu_atomfirmware.h" 46 #include "amd_pcie.h" 47 #ifdef CONFIG_DRM_AMDGPU_SI 48 #include "si.h" 49 #endif 50 #ifdef CONFIG_DRM_AMDGPU_CIK 51 #include "cik.h" 52 #endif 53 #include "vi.h" 54 #include "soc15.h" 55 #include "nv.h" 56 #include "bif/bif_4_1_d.h" 57 #include <linux/pci.h> 58 #include <linux/firmware.h> 59 #include "amdgpu_vf_error.h" 60 61 #include "amdgpu_amdkfd.h" 62 #include "amdgpu_pm.h" 63 64 #include "amdgpu_xgmi.h" 65 #include "amdgpu_ras.h" 66 #include "amdgpu_pmu.h" 67 #include "amdgpu_fru_eeprom.h" 68 69 #include <linux/suspend.h> 70 #include <drm/task_barrier.h> 71 #include <linux/pm_runtime.h> 72 73 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 75 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 76 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 77 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 78 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); 79 MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); 80 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); 81 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); 82 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); 83 84 #define AMDGPU_RESUME_MS 2000 85 86 const char *amdgpu_asic_name[] = { 87 "TAHITI", 88 "PITCAIRN", 89 "VERDE", 90 "OLAND", 91 "HAINAN", 92 "BONAIRE", 93 "KAVERI", 94 "KABINI", 95 "HAWAII", 96 "MULLINS", 97 "TOPAZ", 98 "TONGA", 99 "FIJI", 100 "CARRIZO", 101 "STONEY", 102 "POLARIS10", 103 "POLARIS11", 104 "POLARIS12", 105 "VEGAM", 106 "VEGA10", 107 "VEGA12", 108 "VEGA20", 109 "RAVEN", 110 "ARCTURUS", 111 "RENOIR", 112 "NAVI10", 113 "NAVI14", 114 "NAVI12", 115 "LAST", 116 }; 117 118 /** 119 * DOC: pcie_replay_count 120 * 121 * The amdgpu driver provides a sysfs API for reporting the total number 122 * of PCIe replays (NAKs) 123 * The file pcie_replay_count is used for this and returns the total 124 * number of replays as a sum of the NAKs generated and NAKs received 125 */ 126 127 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, 128 struct device_attribute *attr, char *buf) 129 { 130 struct drm_device *ddev = dev_get_drvdata(dev); 131 struct amdgpu_device *adev = ddev->dev_private; 132 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); 133 134 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt); 135 } 136 137 static DEVICE_ATTR(pcie_replay_count, S_IRUGO, 138 amdgpu_device_get_pcie_replay_count, NULL); 139 140 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 141 142 /** 143 * DOC: product_name 144 * 145 * The amdgpu driver provides a sysfs API for reporting the product name 146 * for the device 147 * The file serial_number is used for this and returns the product name 148 * as returned from the FRU. 149 * NOTE: This is only available for certain server cards 150 */ 151 152 static ssize_t amdgpu_device_get_product_name(struct device *dev, 153 struct device_attribute *attr, char *buf) 154 { 155 struct drm_device *ddev = dev_get_drvdata(dev); 156 struct amdgpu_device *adev = ddev->dev_private; 157 158 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name); 159 } 160 161 static DEVICE_ATTR(product_name, S_IRUGO, 162 amdgpu_device_get_product_name, NULL); 163 164 /** 165 * DOC: product_number 166 * 167 * The amdgpu driver provides a sysfs API for reporting the part number 168 * for the device 169 * The file serial_number is used for this and returns the part number 170 * as returned from the FRU. 171 * NOTE: This is only available for certain server cards 172 */ 173 174 static ssize_t amdgpu_device_get_product_number(struct device *dev, 175 struct device_attribute *attr, char *buf) 176 { 177 struct drm_device *ddev = dev_get_drvdata(dev); 178 struct amdgpu_device *adev = ddev->dev_private; 179 180 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number); 181 } 182 183 static DEVICE_ATTR(product_number, S_IRUGO, 184 amdgpu_device_get_product_number, NULL); 185 186 /** 187 * DOC: serial_number 188 * 189 * The amdgpu driver provides a sysfs API for reporting the serial number 190 * for the device 191 * The file serial_number is used for this and returns the serial number 192 * as returned from the FRU. 193 * NOTE: This is only available for certain server cards 194 */ 195 196 static ssize_t amdgpu_device_get_serial_number(struct device *dev, 197 struct device_attribute *attr, char *buf) 198 { 199 struct drm_device *ddev = dev_get_drvdata(dev); 200 struct amdgpu_device *adev = ddev->dev_private; 201 202 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial); 203 } 204 205 static DEVICE_ATTR(serial_number, S_IRUGO, 206 amdgpu_device_get_serial_number, NULL); 207 208 /** 209 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control 210 * 211 * @dev: drm_device pointer 212 * 213 * Returns true if the device is a dGPU with HG/PX power control, 214 * otherwise return false. 215 */ 216 bool amdgpu_device_supports_boco(struct drm_device *dev) 217 { 218 struct amdgpu_device *adev = dev->dev_private; 219 220 if (adev->flags & AMD_IS_PX) 221 return true; 222 return false; 223 } 224 225 /** 226 * amdgpu_device_supports_baco - Does the device support BACO 227 * 228 * @dev: drm_device pointer 229 * 230 * Returns true if the device supporte BACO, 231 * otherwise return false. 232 */ 233 bool amdgpu_device_supports_baco(struct drm_device *dev) 234 { 235 struct amdgpu_device *adev = dev->dev_private; 236 237 return amdgpu_asic_supports_baco(adev); 238 } 239 240 /** 241 * VRAM access helper functions. 242 * 243 * amdgpu_device_vram_access - read/write a buffer in vram 244 * 245 * @adev: amdgpu_device pointer 246 * @pos: offset of the buffer in vram 247 * @buf: virtual address of the buffer in system memory 248 * @size: read/write size, sizeof(@buf) must > @size 249 * @write: true - write to vram, otherwise - read from vram 250 */ 251 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, 252 uint32_t *buf, size_t size, bool write) 253 { 254 unsigned long flags; 255 uint32_t hi = ~0; 256 uint64_t last; 257 258 259 #ifdef CONFIG_64BIT 260 last = min(pos + size, adev->gmc.visible_vram_size); 261 if (last > pos) { 262 void __iomem *addr = adev->mman.aper_base_kaddr + pos; 263 size_t count = last - pos; 264 265 if (write) { 266 memcpy_toio(addr, buf, count); 267 mb(); 268 amdgpu_asic_flush_hdp(adev, NULL); 269 } else { 270 amdgpu_asic_invalidate_hdp(adev, NULL); 271 mb(); 272 memcpy_fromio(buf, addr, count); 273 } 274 275 if (count == size) 276 return; 277 278 pos += count; 279 buf += count / 4; 280 size -= count; 281 } 282 #endif 283 284 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 285 for (last = pos + size; pos < last; pos += 4) { 286 uint32_t tmp = pos >> 31; 287 288 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); 289 if (tmp != hi) { 290 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); 291 hi = tmp; 292 } 293 if (write) 294 WREG32_NO_KIQ(mmMM_DATA, *buf++); 295 else 296 *buf++ = RREG32_NO_KIQ(mmMM_DATA); 297 } 298 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 299 } 300 301 /* 302 * device register access helper functions. 303 */ 304 /** 305 * amdgpu_device_rreg - read a register 306 * 307 * @adev: amdgpu_device pointer 308 * @reg: dword aligned register offset 309 * @acc_flags: access flags which require special behavior 310 * 311 * Returns the 32 bit value from the offset specified. 312 */ 313 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, 314 uint32_t acc_flags) 315 { 316 uint32_t ret; 317 318 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 319 return amdgpu_kiq_rreg(adev, reg); 320 321 if ((reg * 4) < adev->rmmio_size) 322 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 323 else 324 ret = adev->pcie_rreg(adev, (reg * 4)); 325 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); 326 return ret; 327 } 328 329 /* 330 * MMIO register read with bytes helper functions 331 * @offset:bytes offset from MMIO start 332 * 333 */ 334 335 /** 336 * amdgpu_mm_rreg8 - read a memory mapped IO register 337 * 338 * @adev: amdgpu_device pointer 339 * @offset: byte aligned register offset 340 * 341 * Returns the 8 bit value from the offset specified. 342 */ 343 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { 344 if (offset < adev->rmmio_size) 345 return (readb(adev->rmmio + offset)); 346 BUG(); 347 } 348 349 /* 350 * MMIO register write with bytes helper functions 351 * @offset:bytes offset from MMIO start 352 * @value: the value want to be written to the register 353 * 354 */ 355 /** 356 * amdgpu_mm_wreg8 - read a memory mapped IO register 357 * 358 * @adev: amdgpu_device pointer 359 * @offset: byte aligned register offset 360 * @value: 8 bit value to write 361 * 362 * Writes the value specified to the offset specified. 363 */ 364 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { 365 if (offset < adev->rmmio_size) 366 writeb(value, adev->rmmio + offset); 367 else 368 BUG(); 369 } 370 371 void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg, 372 uint32_t v, uint32_t acc_flags) 373 { 374 trace_amdgpu_device_wreg(adev->pdev->device, reg, v); 375 376 if ((reg * 4) < adev->rmmio_size) 377 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 378 else 379 adev->pcie_wreg(adev, (reg * 4), v); 380 } 381 382 /** 383 * amdgpu_device_wreg - write to a register 384 * 385 * @adev: amdgpu_device pointer 386 * @reg: dword aligned register offset 387 * @v: 32 bit value to write to the register 388 * @acc_flags: access flags which require special behavior 389 * 390 * Writes the value specified to the offset specified. 391 */ 392 void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 393 uint32_t acc_flags) 394 { 395 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 396 return amdgpu_kiq_wreg(adev, reg, v); 397 398 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); 399 } 400 401 /* 402 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range 403 * 404 * this function is invoked only the debugfs register access 405 * */ 406 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 407 uint32_t acc_flags) 408 { 409 if (amdgpu_sriov_fullaccess(adev) && 410 adev->gfx.rlc.funcs && 411 adev->gfx.rlc.funcs->is_rlcg_access_range) { 412 413 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) 414 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v); 415 } 416 417 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); 418 } 419 420 /** 421 * amdgpu_io_rreg - read an IO register 422 * 423 * @adev: amdgpu_device pointer 424 * @reg: dword aligned register offset 425 * 426 * Returns the 32 bit value from the offset specified. 427 */ 428 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) 429 { 430 if ((reg * 4) < adev->rio_mem_size) 431 return ioread32(adev->rio_mem + (reg * 4)); 432 else { 433 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 434 return ioread32(adev->rio_mem + (mmMM_DATA * 4)); 435 } 436 } 437 438 /** 439 * amdgpu_io_wreg - write to an IO register 440 * 441 * @adev: amdgpu_device pointer 442 * @reg: dword aligned register offset 443 * @v: 32 bit value to write to the register 444 * 445 * Writes the value specified to the offset specified. 446 */ 447 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) 448 { 449 if ((reg * 4) < adev->rio_mem_size) 450 iowrite32(v, adev->rio_mem + (reg * 4)); 451 else { 452 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 453 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); 454 } 455 } 456 457 /** 458 * amdgpu_mm_rdoorbell - read a doorbell dword 459 * 460 * @adev: amdgpu_device pointer 461 * @index: doorbell index 462 * 463 * Returns the value in the doorbell aperture at the 464 * requested doorbell index (CIK). 465 */ 466 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) 467 { 468 if (index < adev->doorbell.num_doorbells) { 469 return readl(adev->doorbell.ptr + index); 470 } else { 471 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 472 return 0; 473 } 474 } 475 476 /** 477 * amdgpu_mm_wdoorbell - write a doorbell dword 478 * 479 * @adev: amdgpu_device pointer 480 * @index: doorbell index 481 * @v: value to write 482 * 483 * Writes @v to the doorbell aperture at the 484 * requested doorbell index (CIK). 485 */ 486 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) 487 { 488 if (index < adev->doorbell.num_doorbells) { 489 writel(v, adev->doorbell.ptr + index); 490 } else { 491 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 492 } 493 } 494 495 /** 496 * amdgpu_mm_rdoorbell64 - read a doorbell Qword 497 * 498 * @adev: amdgpu_device pointer 499 * @index: doorbell index 500 * 501 * Returns the value in the doorbell aperture at the 502 * requested doorbell index (VEGA10+). 503 */ 504 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) 505 { 506 if (index < adev->doorbell.num_doorbells) { 507 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); 508 } else { 509 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 510 return 0; 511 } 512 } 513 514 /** 515 * amdgpu_mm_wdoorbell64 - write a doorbell Qword 516 * 517 * @adev: amdgpu_device pointer 518 * @index: doorbell index 519 * @v: value to write 520 * 521 * Writes @v to the doorbell aperture at the 522 * requested doorbell index (VEGA10+). 523 */ 524 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) 525 { 526 if (index < adev->doorbell.num_doorbells) { 527 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); 528 } else { 529 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 530 } 531 } 532 533 /** 534 * amdgpu_invalid_rreg - dummy reg read function 535 * 536 * @adev: amdgpu device pointer 537 * @reg: offset of register 538 * 539 * Dummy register read function. Used for register blocks 540 * that certain asics don't have (all asics). 541 * Returns the value in the register. 542 */ 543 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 544 { 545 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 546 BUG(); 547 return 0; 548 } 549 550 /** 551 * amdgpu_invalid_wreg - dummy reg write function 552 * 553 * @adev: amdgpu device pointer 554 * @reg: offset of register 555 * @v: value to write to the register 556 * 557 * Dummy register read function. Used for register blocks 558 * that certain asics don't have (all asics). 559 */ 560 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 561 { 562 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 563 reg, v); 564 BUG(); 565 } 566 567 /** 568 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function 569 * 570 * @adev: amdgpu device pointer 571 * @reg: offset of register 572 * 573 * Dummy register read function. Used for register blocks 574 * that certain asics don't have (all asics). 575 * Returns the value in the register. 576 */ 577 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) 578 { 579 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); 580 BUG(); 581 return 0; 582 } 583 584 /** 585 * amdgpu_invalid_wreg64 - dummy reg write function 586 * 587 * @adev: amdgpu device pointer 588 * @reg: offset of register 589 * @v: value to write to the register 590 * 591 * Dummy register read function. Used for register blocks 592 * that certain asics don't have (all asics). 593 */ 594 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) 595 { 596 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", 597 reg, v); 598 BUG(); 599 } 600 601 /** 602 * amdgpu_block_invalid_rreg - dummy reg read function 603 * 604 * @adev: amdgpu device pointer 605 * @block: offset of instance 606 * @reg: offset of register 607 * 608 * Dummy register read function. Used for register blocks 609 * that certain asics don't have (all asics). 610 * Returns the value in the register. 611 */ 612 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 613 uint32_t block, uint32_t reg) 614 { 615 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 616 reg, block); 617 BUG(); 618 return 0; 619 } 620 621 /** 622 * amdgpu_block_invalid_wreg - dummy reg write function 623 * 624 * @adev: amdgpu device pointer 625 * @block: offset of instance 626 * @reg: offset of register 627 * @v: value to write to the register 628 * 629 * Dummy register read function. Used for register blocks 630 * that certain asics don't have (all asics). 631 */ 632 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 633 uint32_t block, 634 uint32_t reg, uint32_t v) 635 { 636 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 637 reg, block, v); 638 BUG(); 639 } 640 641 /** 642 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page 643 * 644 * @adev: amdgpu device pointer 645 * 646 * Allocates a scratch page of VRAM for use by various things in the 647 * driver. 648 */ 649 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) 650 { 651 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, 652 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 653 &adev->vram_scratch.robj, 654 &adev->vram_scratch.gpu_addr, 655 (void **)&adev->vram_scratch.ptr); 656 } 657 658 /** 659 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page 660 * 661 * @adev: amdgpu device pointer 662 * 663 * Frees the VRAM scratch page. 664 */ 665 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) 666 { 667 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); 668 } 669 670 /** 671 * amdgpu_device_program_register_sequence - program an array of registers. 672 * 673 * @adev: amdgpu_device pointer 674 * @registers: pointer to the register array 675 * @array_size: size of the register array 676 * 677 * Programs an array or registers with and and or masks. 678 * This is a helper for setting golden registers. 679 */ 680 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 681 const u32 *registers, 682 const u32 array_size) 683 { 684 u32 tmp, reg, and_mask, or_mask; 685 int i; 686 687 if (array_size % 3) 688 return; 689 690 for (i = 0; i < array_size; i +=3) { 691 reg = registers[i + 0]; 692 and_mask = registers[i + 1]; 693 or_mask = registers[i + 2]; 694 695 if (and_mask == 0xffffffff) { 696 tmp = or_mask; 697 } else { 698 tmp = RREG32(reg); 699 tmp &= ~and_mask; 700 if (adev->family >= AMDGPU_FAMILY_AI) 701 tmp |= (or_mask & and_mask); 702 else 703 tmp |= or_mask; 704 } 705 WREG32(reg, tmp); 706 } 707 } 708 709 /** 710 * amdgpu_device_pci_config_reset - reset the GPU 711 * 712 * @adev: amdgpu_device pointer 713 * 714 * Resets the GPU using the pci config reset sequence. 715 * Only applicable to asics prior to vega10. 716 */ 717 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 718 { 719 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 720 } 721 722 /* 723 * GPU doorbell aperture helpers function. 724 */ 725 /** 726 * amdgpu_device_doorbell_init - Init doorbell driver information. 727 * 728 * @adev: amdgpu_device pointer 729 * 730 * Init doorbell driver information (CIK) 731 * Returns 0 on success, error on failure. 732 */ 733 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) 734 { 735 736 /* No doorbell on SI hardware generation */ 737 if (adev->asic_type < CHIP_BONAIRE) { 738 adev->doorbell.base = 0; 739 adev->doorbell.size = 0; 740 adev->doorbell.num_doorbells = 0; 741 adev->doorbell.ptr = NULL; 742 return 0; 743 } 744 745 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) 746 return -EINVAL; 747 748 amdgpu_asic_init_doorbell_index(adev); 749 750 /* doorbell bar mapping */ 751 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 752 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 753 754 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), 755 adev->doorbell_index.max_assignment+1); 756 if (adev->doorbell.num_doorbells == 0) 757 return -EINVAL; 758 759 /* For Vega, reserve and map two pages on doorbell BAR since SDMA 760 * paging queue doorbell use the second page. The 761 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the 762 * doorbells are in the first page. So with paging queue enabled, 763 * the max num_doorbells should + 1 page (0x400 in dword) 764 */ 765 if (adev->asic_type >= CHIP_VEGA10) 766 adev->doorbell.num_doorbells += 0x400; 767 768 adev->doorbell.ptr = ioremap(adev->doorbell.base, 769 adev->doorbell.num_doorbells * 770 sizeof(u32)); 771 if (adev->doorbell.ptr == NULL) 772 return -ENOMEM; 773 774 return 0; 775 } 776 777 /** 778 * amdgpu_device_doorbell_fini - Tear down doorbell driver information. 779 * 780 * @adev: amdgpu_device pointer 781 * 782 * Tear down doorbell driver information (CIK) 783 */ 784 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) 785 { 786 iounmap(adev->doorbell.ptr); 787 adev->doorbell.ptr = NULL; 788 } 789 790 791 792 /* 793 * amdgpu_device_wb_*() 794 * Writeback is the method by which the GPU updates special pages in memory 795 * with the status of certain GPU events (fences, ring pointers,etc.). 796 */ 797 798 /** 799 * amdgpu_device_wb_fini - Disable Writeback and free memory 800 * 801 * @adev: amdgpu_device pointer 802 * 803 * Disables Writeback and frees the Writeback memory (all asics). 804 * Used at driver shutdown. 805 */ 806 static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 807 { 808 if (adev->wb.wb_obj) { 809 amdgpu_bo_free_kernel(&adev->wb.wb_obj, 810 &adev->wb.gpu_addr, 811 (void **)&adev->wb.wb); 812 adev->wb.wb_obj = NULL; 813 } 814 } 815 816 /** 817 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory 818 * 819 * @adev: amdgpu_device pointer 820 * 821 * Initializes writeback and allocates writeback memory (all asics). 822 * Used at driver startup. 823 * Returns 0 on success or an -error on failure. 824 */ 825 static int amdgpu_device_wb_init(struct amdgpu_device *adev) 826 { 827 int r; 828 829 if (adev->wb.wb_obj == NULL) { 830 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 831 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 832 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 833 &adev->wb.wb_obj, &adev->wb.gpu_addr, 834 (void **)&adev->wb.wb); 835 if (r) { 836 dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 837 return r; 838 } 839 840 adev->wb.num_wb = AMDGPU_MAX_WB; 841 memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 842 843 /* clear wb memory */ 844 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 845 } 846 847 return 0; 848 } 849 850 /** 851 * amdgpu_device_wb_get - Allocate a wb entry 852 * 853 * @adev: amdgpu_device pointer 854 * @wb: wb index 855 * 856 * Allocate a wb slot for use by the driver (all asics). 857 * Returns 0 on success or -EINVAL on failure. 858 */ 859 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 860 { 861 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 862 863 if (offset < adev->wb.num_wb) { 864 __set_bit(offset, adev->wb.used); 865 *wb = offset << 3; /* convert to dw offset */ 866 return 0; 867 } else { 868 return -EINVAL; 869 } 870 } 871 872 /** 873 * amdgpu_device_wb_free - Free a wb entry 874 * 875 * @adev: amdgpu_device pointer 876 * @wb: wb index 877 * 878 * Free a wb slot allocated for use by the driver (all asics) 879 */ 880 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 881 { 882 wb >>= 3; 883 if (wb < adev->wb.num_wb) 884 __clear_bit(wb, adev->wb.used); 885 } 886 887 /** 888 * amdgpu_device_resize_fb_bar - try to resize FB BAR 889 * 890 * @adev: amdgpu_device pointer 891 * 892 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 893 * to fail, but if any of the BARs is not accessible after the size we abort 894 * driver loading by returning -ENODEV. 895 */ 896 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 897 { 898 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size); 899 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; 900 struct pci_bus *root; 901 struct resource *res; 902 unsigned i; 903 u16 cmd; 904 int r; 905 906 /* Bypass for VF */ 907 if (amdgpu_sriov_vf(adev)) 908 return 0; 909 910 /* Check if the root BUS has 64bit memory resources */ 911 root = adev->pdev->bus; 912 while (root->parent) 913 root = root->parent; 914 915 pci_bus_for_each_resource(root, res, i) { 916 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 917 res->start > 0x100000000ull) 918 break; 919 } 920 921 /* Trying to resize is pointless without a root hub window above 4GB */ 922 if (!res) 923 return 0; 924 925 /* Disable memory decoding while we change the BAR addresses and size */ 926 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 927 pci_write_config_word(adev->pdev, PCI_COMMAND, 928 cmd & ~PCI_COMMAND_MEMORY); 929 930 /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 931 amdgpu_device_doorbell_fini(adev); 932 if (adev->asic_type >= CHIP_BONAIRE) 933 pci_release_resource(adev->pdev, 2); 934 935 pci_release_resource(adev->pdev, 0); 936 937 r = pci_resize_resource(adev->pdev, 0, rbar_size); 938 if (r == -ENOSPC) 939 DRM_INFO("Not enough PCI address space for a large BAR."); 940 else if (r && r != -ENOTSUPP) 941 DRM_ERROR("Problem resizing BAR0 (%d).", r); 942 943 pci_assign_unassigned_bus_resources(adev->pdev->bus); 944 945 /* When the doorbell or fb BAR isn't available we have no chance of 946 * using the device. 947 */ 948 r = amdgpu_device_doorbell_init(adev); 949 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 950 return -ENODEV; 951 952 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 953 954 return 0; 955 } 956 957 /* 958 * GPU helpers function. 959 */ 960 /** 961 * amdgpu_device_need_post - check if the hw need post or not 962 * 963 * @adev: amdgpu_device pointer 964 * 965 * Check if the asic has been initialized (all asics) at driver startup 966 * or post is needed if hw reset is performed. 967 * Returns true if need or false if not. 968 */ 969 bool amdgpu_device_need_post(struct amdgpu_device *adev) 970 { 971 uint32_t reg; 972 973 if (amdgpu_sriov_vf(adev)) 974 return false; 975 976 if (amdgpu_passthrough(adev)) { 977 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 978 * some old smc fw still need driver do vPost otherwise gpu hang, while 979 * those smc fw version above 22.15 doesn't have this flaw, so we force 980 * vpost executed for smc version below 22.15 981 */ 982 if (adev->asic_type == CHIP_FIJI) { 983 int err; 984 uint32_t fw_ver; 985 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 986 /* force vPost if error occured */ 987 if (err) 988 return true; 989 990 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 991 if (fw_ver < 0x00160e00) 992 return true; 993 } 994 } 995 996 if (adev->has_hw_reset) { 997 adev->has_hw_reset = false; 998 return true; 999 } 1000 1001 /* bios scratch used on CIK+ */ 1002 if (adev->asic_type >= CHIP_BONAIRE) 1003 return amdgpu_atombios_scratch_need_asic_init(adev); 1004 1005 /* check MEM_SIZE for older asics */ 1006 reg = amdgpu_asic_get_config_memsize(adev); 1007 1008 if ((reg != 0) && (reg != 0xffffffff)) 1009 return false; 1010 1011 return true; 1012 } 1013 1014 /* if we get transitioned to only one device, take VGA back */ 1015 /** 1016 * amdgpu_device_vga_set_decode - enable/disable vga decode 1017 * 1018 * @cookie: amdgpu_device pointer 1019 * @state: enable/disable vga decode 1020 * 1021 * Enable/disable vga decode (all asics). 1022 * Returns VGA resource flags. 1023 */ 1024 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) 1025 { 1026 struct amdgpu_device *adev = cookie; 1027 amdgpu_asic_set_vga_state(adev, state); 1028 if (state) 1029 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 1030 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1031 else 1032 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1033 } 1034 1035 /** 1036 * amdgpu_device_check_block_size - validate the vm block size 1037 * 1038 * @adev: amdgpu_device pointer 1039 * 1040 * Validates the vm block size specified via module parameter. 1041 * The vm block size defines number of bits in page table versus page directory, 1042 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1043 * page table and the remaining bits are in the page directory. 1044 */ 1045 static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 1046 { 1047 /* defines number of bits in page table versus page directory, 1048 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1049 * page table and the remaining bits are in the page directory */ 1050 if (amdgpu_vm_block_size == -1) 1051 return; 1052 1053 if (amdgpu_vm_block_size < 9) { 1054 dev_warn(adev->dev, "VM page table size (%d) too small\n", 1055 amdgpu_vm_block_size); 1056 amdgpu_vm_block_size = -1; 1057 } 1058 } 1059 1060 /** 1061 * amdgpu_device_check_vm_size - validate the vm size 1062 * 1063 * @adev: amdgpu_device pointer 1064 * 1065 * Validates the vm size in GB specified via module parameter. 1066 * The VM size is the size of the GPU virtual memory space in GB. 1067 */ 1068 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 1069 { 1070 /* no need to check the default value */ 1071 if (amdgpu_vm_size == -1) 1072 return; 1073 1074 if (amdgpu_vm_size < 1) { 1075 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 1076 amdgpu_vm_size); 1077 amdgpu_vm_size = -1; 1078 } 1079 } 1080 1081 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 1082 { 1083 struct sysinfo si; 1084 bool is_os_64 = (sizeof(void *) == 8); 1085 uint64_t total_memory; 1086 uint64_t dram_size_seven_GB = 0x1B8000000; 1087 uint64_t dram_size_three_GB = 0xB8000000; 1088 1089 if (amdgpu_smu_memory_pool_size == 0) 1090 return; 1091 1092 if (!is_os_64) { 1093 DRM_WARN("Not 64-bit OS, feature not supported\n"); 1094 goto def_value; 1095 } 1096 si_meminfo(&si); 1097 total_memory = (uint64_t)si.totalram * si.mem_unit; 1098 1099 if ((amdgpu_smu_memory_pool_size == 1) || 1100 (amdgpu_smu_memory_pool_size == 2)) { 1101 if (total_memory < dram_size_three_GB) 1102 goto def_value1; 1103 } else if ((amdgpu_smu_memory_pool_size == 4) || 1104 (amdgpu_smu_memory_pool_size == 8)) { 1105 if (total_memory < dram_size_seven_GB) 1106 goto def_value1; 1107 } else { 1108 DRM_WARN("Smu memory pool size not supported\n"); 1109 goto def_value; 1110 } 1111 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 1112 1113 return; 1114 1115 def_value1: 1116 DRM_WARN("No enough system memory\n"); 1117 def_value: 1118 adev->pm.smu_prv_buffer_size = 0; 1119 } 1120 1121 /** 1122 * amdgpu_device_check_arguments - validate module params 1123 * 1124 * @adev: amdgpu_device pointer 1125 * 1126 * Validates certain module parameters and updates 1127 * the associated values used by the driver (all asics). 1128 */ 1129 static int amdgpu_device_check_arguments(struct amdgpu_device *adev) 1130 { 1131 if (amdgpu_sched_jobs < 4) { 1132 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 1133 amdgpu_sched_jobs); 1134 amdgpu_sched_jobs = 4; 1135 } else if (!is_power_of_2(amdgpu_sched_jobs)){ 1136 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 1137 amdgpu_sched_jobs); 1138 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 1139 } 1140 1141 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 1142 /* gart size must be greater or equal to 32M */ 1143 dev_warn(adev->dev, "gart size (%d) too small\n", 1144 amdgpu_gart_size); 1145 amdgpu_gart_size = -1; 1146 } 1147 1148 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 1149 /* gtt size must be greater or equal to 32M */ 1150 dev_warn(adev->dev, "gtt size (%d) too small\n", 1151 amdgpu_gtt_size); 1152 amdgpu_gtt_size = -1; 1153 } 1154 1155 /* valid range is between 4 and 9 inclusive */ 1156 if (amdgpu_vm_fragment_size != -1 && 1157 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 1158 dev_warn(adev->dev, "valid range is between 4 and 9\n"); 1159 amdgpu_vm_fragment_size = -1; 1160 } 1161 1162 amdgpu_device_check_smu_prv_buffer_size(adev); 1163 1164 amdgpu_device_check_vm_size(adev); 1165 1166 amdgpu_device_check_block_size(adev); 1167 1168 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 1169 1170 amdgpu_gmc_tmz_set(adev); 1171 1172 return 0; 1173 } 1174 1175 /** 1176 * amdgpu_switcheroo_set_state - set switcheroo state 1177 * 1178 * @pdev: pci dev pointer 1179 * @state: vga_switcheroo state 1180 * 1181 * Callback for the switcheroo driver. Suspends or resumes the 1182 * the asics before or after it is powered up using ACPI methods. 1183 */ 1184 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) 1185 { 1186 struct drm_device *dev = pci_get_drvdata(pdev); 1187 int r; 1188 1189 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF) 1190 return; 1191 1192 if (state == VGA_SWITCHEROO_ON) { 1193 pr_info("switched on\n"); 1194 /* don't suspend or resume card normally */ 1195 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1196 1197 pci_set_power_state(dev->pdev, PCI_D0); 1198 pci_restore_state(dev->pdev); 1199 r = pci_enable_device(dev->pdev); 1200 if (r) 1201 DRM_WARN("pci_enable_device failed (%d)\n", r); 1202 amdgpu_device_resume(dev, true); 1203 1204 dev->switch_power_state = DRM_SWITCH_POWER_ON; 1205 drm_kms_helper_poll_enable(dev); 1206 } else { 1207 pr_info("switched off\n"); 1208 drm_kms_helper_poll_disable(dev); 1209 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1210 amdgpu_device_suspend(dev, true); 1211 pci_save_state(dev->pdev); 1212 /* Shut down the device */ 1213 pci_disable_device(dev->pdev); 1214 pci_set_power_state(dev->pdev, PCI_D3cold); 1215 dev->switch_power_state = DRM_SWITCH_POWER_OFF; 1216 } 1217 } 1218 1219 /** 1220 * amdgpu_switcheroo_can_switch - see if switcheroo state can change 1221 * 1222 * @pdev: pci dev pointer 1223 * 1224 * Callback for the switcheroo driver. Check of the switcheroo 1225 * state can be changed. 1226 * Returns true if the state can be changed, false if not. 1227 */ 1228 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 1229 { 1230 struct drm_device *dev = pci_get_drvdata(pdev); 1231 1232 /* 1233 * FIXME: open_count is protected by drm_global_mutex but that would lead to 1234 * locking inversion with the driver load path. And the access here is 1235 * completely racy anyway. So don't bother with locking for now. 1236 */ 1237 return atomic_read(&dev->open_count) == 0; 1238 } 1239 1240 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 1241 .set_gpu_state = amdgpu_switcheroo_set_state, 1242 .reprobe = NULL, 1243 .can_switch = amdgpu_switcheroo_can_switch, 1244 }; 1245 1246 /** 1247 * amdgpu_device_ip_set_clockgating_state - set the CG state 1248 * 1249 * @dev: amdgpu_device pointer 1250 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1251 * @state: clockgating state (gate or ungate) 1252 * 1253 * Sets the requested clockgating state for all instances of 1254 * the hardware IP specified. 1255 * Returns the error code from the last instance. 1256 */ 1257 int amdgpu_device_ip_set_clockgating_state(void *dev, 1258 enum amd_ip_block_type block_type, 1259 enum amd_clockgating_state state) 1260 { 1261 struct amdgpu_device *adev = dev; 1262 int i, r = 0; 1263 1264 for (i = 0; i < adev->num_ip_blocks; i++) { 1265 if (!adev->ip_blocks[i].status.valid) 1266 continue; 1267 if (adev->ip_blocks[i].version->type != block_type) 1268 continue; 1269 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1270 continue; 1271 r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1272 (void *)adev, state); 1273 if (r) 1274 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1275 adev->ip_blocks[i].version->funcs->name, r); 1276 } 1277 return r; 1278 } 1279 1280 /** 1281 * amdgpu_device_ip_set_powergating_state - set the PG state 1282 * 1283 * @dev: amdgpu_device pointer 1284 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1285 * @state: powergating state (gate or ungate) 1286 * 1287 * Sets the requested powergating state for all instances of 1288 * the hardware IP specified. 1289 * Returns the error code from the last instance. 1290 */ 1291 int amdgpu_device_ip_set_powergating_state(void *dev, 1292 enum amd_ip_block_type block_type, 1293 enum amd_powergating_state state) 1294 { 1295 struct amdgpu_device *adev = dev; 1296 int i, r = 0; 1297 1298 for (i = 0; i < adev->num_ip_blocks; i++) { 1299 if (!adev->ip_blocks[i].status.valid) 1300 continue; 1301 if (adev->ip_blocks[i].version->type != block_type) 1302 continue; 1303 if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1304 continue; 1305 r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1306 (void *)adev, state); 1307 if (r) 1308 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1309 adev->ip_blocks[i].version->funcs->name, r); 1310 } 1311 return r; 1312 } 1313 1314 /** 1315 * amdgpu_device_ip_get_clockgating_state - get the CG state 1316 * 1317 * @adev: amdgpu_device pointer 1318 * @flags: clockgating feature flags 1319 * 1320 * Walks the list of IPs on the device and updates the clockgating 1321 * flags for each IP. 1322 * Updates @flags with the feature flags for each hardware IP where 1323 * clockgating is enabled. 1324 */ 1325 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1326 u32 *flags) 1327 { 1328 int i; 1329 1330 for (i = 0; i < adev->num_ip_blocks; i++) { 1331 if (!adev->ip_blocks[i].status.valid) 1332 continue; 1333 if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1334 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1335 } 1336 } 1337 1338 /** 1339 * amdgpu_device_ip_wait_for_idle - wait for idle 1340 * 1341 * @adev: amdgpu_device pointer 1342 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1343 * 1344 * Waits for the request hardware IP to be idle. 1345 * Returns 0 for success or a negative error code on failure. 1346 */ 1347 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1348 enum amd_ip_block_type block_type) 1349 { 1350 int i, r; 1351 1352 for (i = 0; i < adev->num_ip_blocks; i++) { 1353 if (!adev->ip_blocks[i].status.valid) 1354 continue; 1355 if (adev->ip_blocks[i].version->type == block_type) { 1356 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1357 if (r) 1358 return r; 1359 break; 1360 } 1361 } 1362 return 0; 1363 1364 } 1365 1366 /** 1367 * amdgpu_device_ip_is_idle - is the hardware IP idle 1368 * 1369 * @adev: amdgpu_device pointer 1370 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1371 * 1372 * Check if the hardware IP is idle or not. 1373 * Returns true if it the IP is idle, false if not. 1374 */ 1375 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1376 enum amd_ip_block_type block_type) 1377 { 1378 int i; 1379 1380 for (i = 0; i < adev->num_ip_blocks; i++) { 1381 if (!adev->ip_blocks[i].status.valid) 1382 continue; 1383 if (adev->ip_blocks[i].version->type == block_type) 1384 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1385 } 1386 return true; 1387 1388 } 1389 1390 /** 1391 * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1392 * 1393 * @adev: amdgpu_device pointer 1394 * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1395 * 1396 * Returns a pointer to the hardware IP block structure 1397 * if it exists for the asic, otherwise NULL. 1398 */ 1399 struct amdgpu_ip_block * 1400 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1401 enum amd_ip_block_type type) 1402 { 1403 int i; 1404 1405 for (i = 0; i < adev->num_ip_blocks; i++) 1406 if (adev->ip_blocks[i].version->type == type) 1407 return &adev->ip_blocks[i]; 1408 1409 return NULL; 1410 } 1411 1412 /** 1413 * amdgpu_device_ip_block_version_cmp 1414 * 1415 * @adev: amdgpu_device pointer 1416 * @type: enum amd_ip_block_type 1417 * @major: major version 1418 * @minor: minor version 1419 * 1420 * return 0 if equal or greater 1421 * return 1 if smaller or the ip_block doesn't exist 1422 */ 1423 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1424 enum amd_ip_block_type type, 1425 u32 major, u32 minor) 1426 { 1427 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1428 1429 if (ip_block && ((ip_block->version->major > major) || 1430 ((ip_block->version->major == major) && 1431 (ip_block->version->minor >= minor)))) 1432 return 0; 1433 1434 return 1; 1435 } 1436 1437 /** 1438 * amdgpu_device_ip_block_add 1439 * 1440 * @adev: amdgpu_device pointer 1441 * @ip_block_version: pointer to the IP to add 1442 * 1443 * Adds the IP block driver information to the collection of IPs 1444 * on the asic. 1445 */ 1446 int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1447 const struct amdgpu_ip_block_version *ip_block_version) 1448 { 1449 if (!ip_block_version) 1450 return -EINVAL; 1451 1452 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1453 ip_block_version->funcs->name); 1454 1455 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1456 1457 return 0; 1458 } 1459 1460 /** 1461 * amdgpu_device_enable_virtual_display - enable virtual display feature 1462 * 1463 * @adev: amdgpu_device pointer 1464 * 1465 * Enabled the virtual display feature if the user has enabled it via 1466 * the module parameter virtual_display. This feature provides a virtual 1467 * display hardware on headless boards or in virtualized environments. 1468 * This function parses and validates the configuration string specified by 1469 * the user and configues the virtual display configuration (number of 1470 * virtual connectors, crtcs, etc.) specified. 1471 */ 1472 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1473 { 1474 adev->enable_virtual_display = false; 1475 1476 if (amdgpu_virtual_display) { 1477 struct drm_device *ddev = adev->ddev; 1478 const char *pci_address_name = pci_name(ddev->pdev); 1479 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 1480 1481 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 1482 pciaddstr_tmp = pciaddstr; 1483 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 1484 pciaddname = strsep(&pciaddname_tmp, ","); 1485 if (!strcmp("all", pciaddname) 1486 || !strcmp(pci_address_name, pciaddname)) { 1487 long num_crtc; 1488 int res = -1; 1489 1490 adev->enable_virtual_display = true; 1491 1492 if (pciaddname_tmp) 1493 res = kstrtol(pciaddname_tmp, 10, 1494 &num_crtc); 1495 1496 if (!res) { 1497 if (num_crtc < 1) 1498 num_crtc = 1; 1499 if (num_crtc > 6) 1500 num_crtc = 6; 1501 adev->mode_info.num_crtc = num_crtc; 1502 } else { 1503 adev->mode_info.num_crtc = 1; 1504 } 1505 break; 1506 } 1507 } 1508 1509 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 1510 amdgpu_virtual_display, pci_address_name, 1511 adev->enable_virtual_display, adev->mode_info.num_crtc); 1512 1513 kfree(pciaddstr); 1514 } 1515 } 1516 1517 /** 1518 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 1519 * 1520 * @adev: amdgpu_device pointer 1521 * 1522 * Parses the asic configuration parameters specified in the gpu info 1523 * firmware and makes them availale to the driver for use in configuring 1524 * the asic. 1525 * Returns 0 on success, -EINVAL on failure. 1526 */ 1527 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1528 { 1529 const char *chip_name; 1530 char fw_name[30]; 1531 int err; 1532 const struct gpu_info_firmware_header_v1_0 *hdr; 1533 1534 adev->firmware.gpu_info_fw = NULL; 1535 1536 switch (adev->asic_type) { 1537 case CHIP_TOPAZ: 1538 case CHIP_TONGA: 1539 case CHIP_FIJI: 1540 case CHIP_POLARIS10: 1541 case CHIP_POLARIS11: 1542 case CHIP_POLARIS12: 1543 case CHIP_VEGAM: 1544 case CHIP_CARRIZO: 1545 case CHIP_STONEY: 1546 #ifdef CONFIG_DRM_AMDGPU_SI 1547 case CHIP_VERDE: 1548 case CHIP_TAHITI: 1549 case CHIP_PITCAIRN: 1550 case CHIP_OLAND: 1551 case CHIP_HAINAN: 1552 #endif 1553 #ifdef CONFIG_DRM_AMDGPU_CIK 1554 case CHIP_BONAIRE: 1555 case CHIP_HAWAII: 1556 case CHIP_KAVERI: 1557 case CHIP_KABINI: 1558 case CHIP_MULLINS: 1559 #endif 1560 case CHIP_VEGA20: 1561 default: 1562 return 0; 1563 case CHIP_VEGA10: 1564 chip_name = "vega10"; 1565 break; 1566 case CHIP_VEGA12: 1567 chip_name = "vega12"; 1568 break; 1569 case CHIP_RAVEN: 1570 if (adev->rev_id >= 8) 1571 chip_name = "raven2"; 1572 else if (adev->pdev->device == 0x15d8) 1573 chip_name = "picasso"; 1574 else 1575 chip_name = "raven"; 1576 break; 1577 case CHIP_ARCTURUS: 1578 chip_name = "arcturus"; 1579 break; 1580 case CHIP_RENOIR: 1581 chip_name = "renoir"; 1582 break; 1583 case CHIP_NAVI10: 1584 chip_name = "navi10"; 1585 break; 1586 case CHIP_NAVI14: 1587 chip_name = "navi14"; 1588 break; 1589 case CHIP_NAVI12: 1590 chip_name = "navi12"; 1591 break; 1592 } 1593 1594 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 1595 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); 1596 if (err) { 1597 dev_err(adev->dev, 1598 "Failed to load gpu_info firmware \"%s\"\n", 1599 fw_name); 1600 goto out; 1601 } 1602 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); 1603 if (err) { 1604 dev_err(adev->dev, 1605 "Failed to validate gpu_info firmware \"%s\"\n", 1606 fw_name); 1607 goto out; 1608 } 1609 1610 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 1611 amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 1612 1613 switch (hdr->version_major) { 1614 case 1: 1615 { 1616 const struct gpu_info_firmware_v1_0 *gpu_info_fw = 1617 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 1618 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1619 1620 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) 1621 goto parse_soc_bounding_box; 1622 1623 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 1624 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 1625 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 1626 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 1627 adev->gfx.config.max_texture_channel_caches = 1628 le32_to_cpu(gpu_info_fw->gc_num_tccs); 1629 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 1630 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 1631 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 1632 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 1633 adev->gfx.config.double_offchip_lds_buf = 1634 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 1635 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 1636 adev->gfx.cu_info.max_waves_per_simd = 1637 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 1638 adev->gfx.cu_info.max_scratch_slots_per_cu = 1639 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 1640 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 1641 if (hdr->version_minor >= 1) { 1642 const struct gpu_info_firmware_v1_1 *gpu_info_fw = 1643 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data + 1644 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1645 adev->gfx.config.num_sc_per_sh = 1646 le32_to_cpu(gpu_info_fw->num_sc_per_sh); 1647 adev->gfx.config.num_packer_per_sc = 1648 le32_to_cpu(gpu_info_fw->num_packer_per_sc); 1649 } 1650 1651 parse_soc_bounding_box: 1652 /* 1653 * soc bounding box info is not integrated in disocovery table, 1654 * we always need to parse it from gpu info firmware. 1655 */ 1656 if (hdr->version_minor == 2) { 1657 const struct gpu_info_firmware_v1_2 *gpu_info_fw = 1658 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + 1659 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1660 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; 1661 } 1662 break; 1663 } 1664 default: 1665 dev_err(adev->dev, 1666 "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 1667 err = -EINVAL; 1668 goto out; 1669 } 1670 out: 1671 return err; 1672 } 1673 1674 /** 1675 * amdgpu_device_ip_early_init - run early init for hardware IPs 1676 * 1677 * @adev: amdgpu_device pointer 1678 * 1679 * Early initialization pass for hardware IPs. The hardware IPs that make 1680 * up each asic are discovered each IP's early_init callback is run. This 1681 * is the first stage in initializing the asic. 1682 * Returns 0 on success, negative error code on failure. 1683 */ 1684 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 1685 { 1686 int i, r; 1687 1688 amdgpu_device_enable_virtual_display(adev); 1689 1690 switch (adev->asic_type) { 1691 case CHIP_TOPAZ: 1692 case CHIP_TONGA: 1693 case CHIP_FIJI: 1694 case CHIP_POLARIS10: 1695 case CHIP_POLARIS11: 1696 case CHIP_POLARIS12: 1697 case CHIP_VEGAM: 1698 case CHIP_CARRIZO: 1699 case CHIP_STONEY: 1700 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) 1701 adev->family = AMDGPU_FAMILY_CZ; 1702 else 1703 adev->family = AMDGPU_FAMILY_VI; 1704 1705 r = vi_set_ip_blocks(adev); 1706 if (r) 1707 return r; 1708 break; 1709 #ifdef CONFIG_DRM_AMDGPU_SI 1710 case CHIP_VERDE: 1711 case CHIP_TAHITI: 1712 case CHIP_PITCAIRN: 1713 case CHIP_OLAND: 1714 case CHIP_HAINAN: 1715 adev->family = AMDGPU_FAMILY_SI; 1716 r = si_set_ip_blocks(adev); 1717 if (r) 1718 return r; 1719 break; 1720 #endif 1721 #ifdef CONFIG_DRM_AMDGPU_CIK 1722 case CHIP_BONAIRE: 1723 case CHIP_HAWAII: 1724 case CHIP_KAVERI: 1725 case CHIP_KABINI: 1726 case CHIP_MULLINS: 1727 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII)) 1728 adev->family = AMDGPU_FAMILY_CI; 1729 else 1730 adev->family = AMDGPU_FAMILY_KV; 1731 1732 r = cik_set_ip_blocks(adev); 1733 if (r) 1734 return r; 1735 break; 1736 #endif 1737 case CHIP_VEGA10: 1738 case CHIP_VEGA12: 1739 case CHIP_VEGA20: 1740 case CHIP_RAVEN: 1741 case CHIP_ARCTURUS: 1742 case CHIP_RENOIR: 1743 if (adev->asic_type == CHIP_RAVEN || 1744 adev->asic_type == CHIP_RENOIR) 1745 adev->family = AMDGPU_FAMILY_RV; 1746 else 1747 adev->family = AMDGPU_FAMILY_AI; 1748 1749 r = soc15_set_ip_blocks(adev); 1750 if (r) 1751 return r; 1752 break; 1753 case CHIP_NAVI10: 1754 case CHIP_NAVI14: 1755 case CHIP_NAVI12: 1756 adev->family = AMDGPU_FAMILY_NV; 1757 1758 r = nv_set_ip_blocks(adev); 1759 if (r) 1760 return r; 1761 break; 1762 default: 1763 /* FIXME: not supported yet */ 1764 return -EINVAL; 1765 } 1766 1767 r = amdgpu_device_parse_gpu_info_fw(adev); 1768 if (r) 1769 return r; 1770 1771 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) 1772 amdgpu_discovery_get_gfx_info(adev); 1773 1774 amdgpu_amdkfd_device_probe(adev); 1775 1776 if (amdgpu_sriov_vf(adev)) { 1777 /* handle vbios stuff prior full access mode for new handshake */ 1778 if (adev->virt.req_init_data_ver == 1) { 1779 if (!amdgpu_get_bios(adev)) { 1780 DRM_ERROR("failed to get vbios\n"); 1781 return -EINVAL; 1782 } 1783 1784 r = amdgpu_atombios_init(adev); 1785 if (r) { 1786 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 1787 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 1788 return r; 1789 } 1790 } 1791 } 1792 1793 /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios 1794 * will not be prepared by host for this VF */ 1795 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) { 1796 r = amdgpu_virt_request_full_gpu(adev, true); 1797 if (r) 1798 return r; 1799 } 1800 1801 adev->pm.pp_feature = amdgpu_pp_feature_mask; 1802 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) 1803 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1804 1805 for (i = 0; i < adev->num_ip_blocks; i++) { 1806 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 1807 DRM_ERROR("disabled ip block: %d <%s>\n", 1808 i, adev->ip_blocks[i].version->funcs->name); 1809 adev->ip_blocks[i].status.valid = false; 1810 } else { 1811 if (adev->ip_blocks[i].version->funcs->early_init) { 1812 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 1813 if (r == -ENOENT) { 1814 adev->ip_blocks[i].status.valid = false; 1815 } else if (r) { 1816 DRM_ERROR("early_init of IP block <%s> failed %d\n", 1817 adev->ip_blocks[i].version->funcs->name, r); 1818 return r; 1819 } else { 1820 adev->ip_blocks[i].status.valid = true; 1821 } 1822 } else { 1823 adev->ip_blocks[i].status.valid = true; 1824 } 1825 } 1826 /* get the vbios after the asic_funcs are set up */ 1827 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { 1828 /* skip vbios handling for new handshake */ 1829 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1) 1830 continue; 1831 1832 /* Read BIOS */ 1833 if (!amdgpu_get_bios(adev)) 1834 return -EINVAL; 1835 1836 r = amdgpu_atombios_init(adev); 1837 if (r) { 1838 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 1839 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 1840 return r; 1841 } 1842 } 1843 } 1844 1845 adev->cg_flags &= amdgpu_cg_mask; 1846 adev->pg_flags &= amdgpu_pg_mask; 1847 1848 return 0; 1849 } 1850 1851 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) 1852 { 1853 int i, r; 1854 1855 for (i = 0; i < adev->num_ip_blocks; i++) { 1856 if (!adev->ip_blocks[i].status.sw) 1857 continue; 1858 if (adev->ip_blocks[i].status.hw) 1859 continue; 1860 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 1861 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || 1862 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 1863 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1864 if (r) { 1865 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1866 adev->ip_blocks[i].version->funcs->name, r); 1867 return r; 1868 } 1869 adev->ip_blocks[i].status.hw = true; 1870 } 1871 } 1872 1873 return 0; 1874 } 1875 1876 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) 1877 { 1878 int i, r; 1879 1880 for (i = 0; i < adev->num_ip_blocks; i++) { 1881 if (!adev->ip_blocks[i].status.sw) 1882 continue; 1883 if (adev->ip_blocks[i].status.hw) 1884 continue; 1885 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1886 if (r) { 1887 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1888 adev->ip_blocks[i].version->funcs->name, r); 1889 return r; 1890 } 1891 adev->ip_blocks[i].status.hw = true; 1892 } 1893 1894 return 0; 1895 } 1896 1897 static int amdgpu_device_fw_loading(struct amdgpu_device *adev) 1898 { 1899 int r = 0; 1900 int i; 1901 uint32_t smu_version; 1902 1903 if (adev->asic_type >= CHIP_VEGA10) { 1904 for (i = 0; i < adev->num_ip_blocks; i++) { 1905 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) 1906 continue; 1907 1908 /* no need to do the fw loading again if already done*/ 1909 if (adev->ip_blocks[i].status.hw == true) 1910 break; 1911 1912 if (adev->in_gpu_reset || adev->in_suspend) { 1913 r = adev->ip_blocks[i].version->funcs->resume(adev); 1914 if (r) { 1915 DRM_ERROR("resume of IP block <%s> failed %d\n", 1916 adev->ip_blocks[i].version->funcs->name, r); 1917 return r; 1918 } 1919 } else { 1920 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1921 if (r) { 1922 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1923 adev->ip_blocks[i].version->funcs->name, r); 1924 return r; 1925 } 1926 } 1927 1928 adev->ip_blocks[i].status.hw = true; 1929 break; 1930 } 1931 } 1932 1933 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA) 1934 r = amdgpu_pm_load_smu_firmware(adev, &smu_version); 1935 1936 return r; 1937 } 1938 1939 /** 1940 * amdgpu_device_ip_init - run init for hardware IPs 1941 * 1942 * @adev: amdgpu_device pointer 1943 * 1944 * Main initialization pass for hardware IPs. The list of all the hardware 1945 * IPs that make up the asic is walked and the sw_init and hw_init callbacks 1946 * are run. sw_init initializes the software state associated with each IP 1947 * and hw_init initializes the hardware associated with each IP. 1948 * Returns 0 on success, negative error code on failure. 1949 */ 1950 static int amdgpu_device_ip_init(struct amdgpu_device *adev) 1951 { 1952 int i, r; 1953 1954 r = amdgpu_ras_init(adev); 1955 if (r) 1956 return r; 1957 1958 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) { 1959 r = amdgpu_virt_request_full_gpu(adev, true); 1960 if (r) 1961 return -EAGAIN; 1962 } 1963 1964 for (i = 0; i < adev->num_ip_blocks; i++) { 1965 if (!adev->ip_blocks[i].status.valid) 1966 continue; 1967 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 1968 if (r) { 1969 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 1970 adev->ip_blocks[i].version->funcs->name, r); 1971 goto init_failed; 1972 } 1973 adev->ip_blocks[i].status.sw = true; 1974 1975 /* need to do gmc hw init early so we can allocate gpu mem */ 1976 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1977 r = amdgpu_device_vram_scratch_init(adev); 1978 if (r) { 1979 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); 1980 goto init_failed; 1981 } 1982 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 1983 if (r) { 1984 DRM_ERROR("hw_init %d failed %d\n", i, r); 1985 goto init_failed; 1986 } 1987 r = amdgpu_device_wb_init(adev); 1988 if (r) { 1989 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 1990 goto init_failed; 1991 } 1992 adev->ip_blocks[i].status.hw = true; 1993 1994 /* right after GMC hw init, we create CSA */ 1995 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { 1996 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, 1997 AMDGPU_GEM_DOMAIN_VRAM, 1998 AMDGPU_CSA_SIZE); 1999 if (r) { 2000 DRM_ERROR("allocate CSA failed %d\n", r); 2001 goto init_failed; 2002 } 2003 } 2004 } 2005 } 2006 2007 if (amdgpu_sriov_vf(adev)) 2008 amdgpu_virt_init_data_exchange(adev); 2009 2010 r = amdgpu_ib_pool_init(adev); 2011 if (r) { 2012 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2013 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2014 goto init_failed; 2015 } 2016 2017 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 2018 if (r) 2019 goto init_failed; 2020 2021 r = amdgpu_device_ip_hw_init_phase1(adev); 2022 if (r) 2023 goto init_failed; 2024 2025 r = amdgpu_device_fw_loading(adev); 2026 if (r) 2027 goto init_failed; 2028 2029 r = amdgpu_device_ip_hw_init_phase2(adev); 2030 if (r) 2031 goto init_failed; 2032 2033 /* 2034 * retired pages will be loaded from eeprom and reserved here, 2035 * it should be called after amdgpu_device_ip_hw_init_phase2 since 2036 * for some ASICs the RAS EEPROM code relies on SMU fully functioning 2037 * for I2C communication which only true at this point. 2038 * recovery_init may fail, but it can free all resources allocated by 2039 * itself and its failure should not stop amdgpu init process. 2040 * 2041 * Note: theoretically, this should be called before all vram allocations 2042 * to protect retired page from abusing 2043 */ 2044 amdgpu_ras_recovery_init(adev); 2045 2046 if (adev->gmc.xgmi.num_physical_nodes > 1) 2047 amdgpu_xgmi_add_device(adev); 2048 amdgpu_amdkfd_device_init(adev); 2049 2050 amdgpu_fru_get_product_info(adev); 2051 2052 init_failed: 2053 if (amdgpu_sriov_vf(adev)) 2054 amdgpu_virt_release_full_gpu(adev, true); 2055 2056 return r; 2057 } 2058 2059 /** 2060 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 2061 * 2062 * @adev: amdgpu_device pointer 2063 * 2064 * Writes a reset magic value to the gart pointer in VRAM. The driver calls 2065 * this function before a GPU reset. If the value is retained after a 2066 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 2067 */ 2068 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 2069 { 2070 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 2071 } 2072 2073 /** 2074 * amdgpu_device_check_vram_lost - check if vram is valid 2075 * 2076 * @adev: amdgpu_device pointer 2077 * 2078 * Checks the reset magic value written to the gart pointer in VRAM. 2079 * The driver calls this after a GPU reset to see if the contents of 2080 * VRAM is lost or now. 2081 * returns true if vram is lost, false if not. 2082 */ 2083 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 2084 { 2085 if (memcmp(adev->gart.ptr, adev->reset_magic, 2086 AMDGPU_RESET_MAGIC_NUM)) 2087 return true; 2088 2089 if (!adev->in_gpu_reset) 2090 return false; 2091 2092 /* 2093 * For all ASICs with baco/mode1 reset, the VRAM is 2094 * always assumed to be lost. 2095 */ 2096 switch (amdgpu_asic_reset_method(adev)) { 2097 case AMD_RESET_METHOD_BACO: 2098 case AMD_RESET_METHOD_MODE1: 2099 return true; 2100 default: 2101 return false; 2102 } 2103 } 2104 2105 /** 2106 * amdgpu_device_set_cg_state - set clockgating for amdgpu device 2107 * 2108 * @adev: amdgpu_device pointer 2109 * @state: clockgating state (gate or ungate) 2110 * 2111 * The list of all the hardware IPs that make up the asic is walked and the 2112 * set_clockgating_state callbacks are run. 2113 * Late initialization pass enabling clockgating for hardware IPs. 2114 * Fini or suspend, pass disabling clockgating for hardware IPs. 2115 * Returns 0 on success, negative error code on failure. 2116 */ 2117 2118 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 2119 enum amd_clockgating_state state) 2120 { 2121 int i, j, r; 2122 2123 if (amdgpu_emu_mode == 1) 2124 return 0; 2125 2126 for (j = 0; j < adev->num_ip_blocks; j++) { 2127 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2128 if (!adev->ip_blocks[i].status.late_initialized) 2129 continue; 2130 /* skip CG for VCE/UVD, it's handled specially */ 2131 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2132 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2133 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2134 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2135 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 2136 /* enable clockgating to save power */ 2137 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 2138 state); 2139 if (r) { 2140 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 2141 adev->ip_blocks[i].version->funcs->name, r); 2142 return r; 2143 } 2144 } 2145 } 2146 2147 return 0; 2148 } 2149 2150 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state) 2151 { 2152 int i, j, r; 2153 2154 if (amdgpu_emu_mode == 1) 2155 return 0; 2156 2157 for (j = 0; j < adev->num_ip_blocks; j++) { 2158 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2159 if (!adev->ip_blocks[i].status.late_initialized) 2160 continue; 2161 /* skip CG for VCE/UVD, it's handled specially */ 2162 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2163 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2164 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2165 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2166 adev->ip_blocks[i].version->funcs->set_powergating_state) { 2167 /* enable powergating to save power */ 2168 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 2169 state); 2170 if (r) { 2171 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 2172 adev->ip_blocks[i].version->funcs->name, r); 2173 return r; 2174 } 2175 } 2176 } 2177 return 0; 2178 } 2179 2180 static int amdgpu_device_enable_mgpu_fan_boost(void) 2181 { 2182 struct amdgpu_gpu_instance *gpu_ins; 2183 struct amdgpu_device *adev; 2184 int i, ret = 0; 2185 2186 mutex_lock(&mgpu_info.mutex); 2187 2188 /* 2189 * MGPU fan boost feature should be enabled 2190 * only when there are two or more dGPUs in 2191 * the system 2192 */ 2193 if (mgpu_info.num_dgpu < 2) 2194 goto out; 2195 2196 for (i = 0; i < mgpu_info.num_dgpu; i++) { 2197 gpu_ins = &(mgpu_info.gpu_ins[i]); 2198 adev = gpu_ins->adev; 2199 if (!(adev->flags & AMD_IS_APU) && 2200 !gpu_ins->mgpu_fan_enabled && 2201 adev->powerplay.pp_funcs && 2202 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) { 2203 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); 2204 if (ret) 2205 break; 2206 2207 gpu_ins->mgpu_fan_enabled = 1; 2208 } 2209 } 2210 2211 out: 2212 mutex_unlock(&mgpu_info.mutex); 2213 2214 return ret; 2215 } 2216 2217 /** 2218 * amdgpu_device_ip_late_init - run late init for hardware IPs 2219 * 2220 * @adev: amdgpu_device pointer 2221 * 2222 * Late initialization pass for hardware IPs. The list of all the hardware 2223 * IPs that make up the asic is walked and the late_init callbacks are run. 2224 * late_init covers any special initialization that an IP requires 2225 * after all of the have been initialized or something that needs to happen 2226 * late in the init process. 2227 * Returns 0 on success, negative error code on failure. 2228 */ 2229 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 2230 { 2231 struct amdgpu_gpu_instance *gpu_instance; 2232 int i = 0, r; 2233 2234 for (i = 0; i < adev->num_ip_blocks; i++) { 2235 if (!adev->ip_blocks[i].status.hw) 2236 continue; 2237 if (adev->ip_blocks[i].version->funcs->late_init) { 2238 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 2239 if (r) { 2240 DRM_ERROR("late_init of IP block <%s> failed %d\n", 2241 adev->ip_blocks[i].version->funcs->name, r); 2242 return r; 2243 } 2244 } 2245 adev->ip_blocks[i].status.late_initialized = true; 2246 } 2247 2248 amdgpu_ras_set_error_query_ready(adev, true); 2249 2250 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 2251 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 2252 2253 amdgpu_device_fill_reset_magic(adev); 2254 2255 r = amdgpu_device_enable_mgpu_fan_boost(); 2256 if (r) 2257 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 2258 2259 2260 if (adev->gmc.xgmi.num_physical_nodes > 1) { 2261 mutex_lock(&mgpu_info.mutex); 2262 2263 /* 2264 * Reset device p-state to low as this was booted with high. 2265 * 2266 * This should be performed only after all devices from the same 2267 * hive get initialized. 2268 * 2269 * However, it's unknown how many device in the hive in advance. 2270 * As this is counted one by one during devices initializations. 2271 * 2272 * So, we wait for all XGMI interlinked devices initialized. 2273 * This may bring some delays as those devices may come from 2274 * different hives. But that should be OK. 2275 */ 2276 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { 2277 for (i = 0; i < mgpu_info.num_gpu; i++) { 2278 gpu_instance = &(mgpu_info.gpu_ins[i]); 2279 if (gpu_instance->adev->flags & AMD_IS_APU) 2280 continue; 2281 2282 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 2283 AMDGPU_XGMI_PSTATE_MIN); 2284 if (r) { 2285 DRM_ERROR("pstate setting failed (%d).\n", r); 2286 break; 2287 } 2288 } 2289 } 2290 2291 mutex_unlock(&mgpu_info.mutex); 2292 } 2293 2294 return 0; 2295 } 2296 2297 /** 2298 * amdgpu_device_ip_fini - run fini for hardware IPs 2299 * 2300 * @adev: amdgpu_device pointer 2301 * 2302 * Main teardown pass for hardware IPs. The list of all the hardware 2303 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 2304 * are run. hw_fini tears down the hardware associated with each IP 2305 * and sw_fini tears down any software state associated with each IP. 2306 * Returns 0 on success, negative error code on failure. 2307 */ 2308 static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 2309 { 2310 int i, r; 2311 2312 amdgpu_ras_pre_fini(adev); 2313 2314 if (adev->gmc.xgmi.num_physical_nodes > 1) 2315 amdgpu_xgmi_remove_device(adev); 2316 2317 amdgpu_amdkfd_device_fini(adev); 2318 2319 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2320 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2321 2322 /* need to disable SMC first */ 2323 for (i = 0; i < adev->num_ip_blocks; i++) { 2324 if (!adev->ip_blocks[i].status.hw) 2325 continue; 2326 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 2327 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2328 /* XXX handle errors */ 2329 if (r) { 2330 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2331 adev->ip_blocks[i].version->funcs->name, r); 2332 } 2333 adev->ip_blocks[i].status.hw = false; 2334 break; 2335 } 2336 } 2337 2338 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2339 if (!adev->ip_blocks[i].status.hw) 2340 continue; 2341 2342 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2343 /* XXX handle errors */ 2344 if (r) { 2345 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2346 adev->ip_blocks[i].version->funcs->name, r); 2347 } 2348 2349 adev->ip_blocks[i].status.hw = false; 2350 } 2351 2352 2353 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2354 if (!adev->ip_blocks[i].status.sw) 2355 continue; 2356 2357 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2358 amdgpu_ucode_free_bo(adev); 2359 amdgpu_free_static_csa(&adev->virt.csa_obj); 2360 amdgpu_device_wb_fini(adev); 2361 amdgpu_device_vram_scratch_fini(adev); 2362 amdgpu_ib_pool_fini(adev); 2363 } 2364 2365 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 2366 /* XXX handle errors */ 2367 if (r) { 2368 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 2369 adev->ip_blocks[i].version->funcs->name, r); 2370 } 2371 adev->ip_blocks[i].status.sw = false; 2372 adev->ip_blocks[i].status.valid = false; 2373 } 2374 2375 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2376 if (!adev->ip_blocks[i].status.late_initialized) 2377 continue; 2378 if (adev->ip_blocks[i].version->funcs->late_fini) 2379 adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 2380 adev->ip_blocks[i].status.late_initialized = false; 2381 } 2382 2383 amdgpu_ras_fini(adev); 2384 2385 if (amdgpu_sriov_vf(adev)) 2386 if (amdgpu_virt_release_full_gpu(adev, false)) 2387 DRM_ERROR("failed to release exclusive mode on fini\n"); 2388 2389 return 0; 2390 } 2391 2392 /** 2393 * amdgpu_device_delayed_init_work_handler - work handler for IB tests 2394 * 2395 * @work: work_struct. 2396 */ 2397 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) 2398 { 2399 struct amdgpu_device *adev = 2400 container_of(work, struct amdgpu_device, delayed_init_work.work); 2401 int r; 2402 2403 r = amdgpu_ib_ring_tests(adev); 2404 if (r) 2405 DRM_ERROR("ib ring test failed (%d).\n", r); 2406 } 2407 2408 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 2409 { 2410 struct amdgpu_device *adev = 2411 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 2412 2413 mutex_lock(&adev->gfx.gfx_off_mutex); 2414 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { 2415 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 2416 adev->gfx.gfx_off_state = true; 2417 } 2418 mutex_unlock(&adev->gfx.gfx_off_mutex); 2419 } 2420 2421 /** 2422 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 2423 * 2424 * @adev: amdgpu_device pointer 2425 * 2426 * Main suspend function for hardware IPs. The list of all the hardware 2427 * IPs that make up the asic is walked, clockgating is disabled and the 2428 * suspend callbacks are run. suspend puts the hardware and software state 2429 * in each IP into a state suitable for suspend. 2430 * Returns 0 on success, negative error code on failure. 2431 */ 2432 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 2433 { 2434 int i, r; 2435 2436 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2437 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2438 2439 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2440 if (!adev->ip_blocks[i].status.valid) 2441 continue; 2442 /* displays are handled separately */ 2443 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { 2444 /* XXX handle errors */ 2445 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2446 /* XXX handle errors */ 2447 if (r) { 2448 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2449 adev->ip_blocks[i].version->funcs->name, r); 2450 return r; 2451 } 2452 adev->ip_blocks[i].status.hw = false; 2453 } 2454 } 2455 2456 return 0; 2457 } 2458 2459 /** 2460 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 2461 * 2462 * @adev: amdgpu_device pointer 2463 * 2464 * Main suspend function for hardware IPs. The list of all the hardware 2465 * IPs that make up the asic is walked, clockgating is disabled and the 2466 * suspend callbacks are run. suspend puts the hardware and software state 2467 * in each IP into a state suitable for suspend. 2468 * Returns 0 on success, negative error code on failure. 2469 */ 2470 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 2471 { 2472 int i, r; 2473 2474 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2475 if (!adev->ip_blocks[i].status.valid) 2476 continue; 2477 /* displays are handled in phase1 */ 2478 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 2479 continue; 2480 /* PSP lost connection when err_event_athub occurs */ 2481 if (amdgpu_ras_intr_triggered() && 2482 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 2483 adev->ip_blocks[i].status.hw = false; 2484 continue; 2485 } 2486 /* XXX handle errors */ 2487 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2488 /* XXX handle errors */ 2489 if (r) { 2490 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2491 adev->ip_blocks[i].version->funcs->name, r); 2492 } 2493 adev->ip_blocks[i].status.hw = false; 2494 /* handle putting the SMC in the appropriate state */ 2495 if(!amdgpu_sriov_vf(adev)){ 2496 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 2497 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); 2498 if (r) { 2499 DRM_ERROR("SMC failed to set mp1 state %d, %d\n", 2500 adev->mp1_state, r); 2501 return r; 2502 } 2503 } 2504 } 2505 adev->ip_blocks[i].status.hw = false; 2506 } 2507 2508 return 0; 2509 } 2510 2511 /** 2512 * amdgpu_device_ip_suspend - run suspend for hardware IPs 2513 * 2514 * @adev: amdgpu_device pointer 2515 * 2516 * Main suspend function for hardware IPs. The list of all the hardware 2517 * IPs that make up the asic is walked, clockgating is disabled and the 2518 * suspend callbacks are run. suspend puts the hardware and software state 2519 * in each IP into a state suitable for suspend. 2520 * Returns 0 on success, negative error code on failure. 2521 */ 2522 int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 2523 { 2524 int r; 2525 2526 if (amdgpu_sriov_vf(adev)) 2527 amdgpu_virt_request_full_gpu(adev, false); 2528 2529 r = amdgpu_device_ip_suspend_phase1(adev); 2530 if (r) 2531 return r; 2532 r = amdgpu_device_ip_suspend_phase2(adev); 2533 2534 if (amdgpu_sriov_vf(adev)) 2535 amdgpu_virt_release_full_gpu(adev, false); 2536 2537 return r; 2538 } 2539 2540 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 2541 { 2542 int i, r; 2543 2544 static enum amd_ip_block_type ip_order[] = { 2545 AMD_IP_BLOCK_TYPE_GMC, 2546 AMD_IP_BLOCK_TYPE_COMMON, 2547 AMD_IP_BLOCK_TYPE_PSP, 2548 AMD_IP_BLOCK_TYPE_IH, 2549 }; 2550 2551 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2552 int j; 2553 struct amdgpu_ip_block *block; 2554 2555 for (j = 0; j < adev->num_ip_blocks; j++) { 2556 block = &adev->ip_blocks[j]; 2557 2558 block->status.hw = false; 2559 if (block->version->type != ip_order[i] || 2560 !block->status.valid) 2561 continue; 2562 2563 r = block->version->funcs->hw_init(adev); 2564 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2565 if (r) 2566 return r; 2567 block->status.hw = true; 2568 } 2569 } 2570 2571 return 0; 2572 } 2573 2574 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 2575 { 2576 int i, r; 2577 2578 static enum amd_ip_block_type ip_order[] = { 2579 AMD_IP_BLOCK_TYPE_SMC, 2580 AMD_IP_BLOCK_TYPE_DCE, 2581 AMD_IP_BLOCK_TYPE_GFX, 2582 AMD_IP_BLOCK_TYPE_SDMA, 2583 AMD_IP_BLOCK_TYPE_UVD, 2584 AMD_IP_BLOCK_TYPE_VCE, 2585 AMD_IP_BLOCK_TYPE_VCN 2586 }; 2587 2588 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2589 int j; 2590 struct amdgpu_ip_block *block; 2591 2592 for (j = 0; j < adev->num_ip_blocks; j++) { 2593 block = &adev->ip_blocks[j]; 2594 2595 if (block->version->type != ip_order[i] || 2596 !block->status.valid || 2597 block->status.hw) 2598 continue; 2599 2600 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) 2601 r = block->version->funcs->resume(adev); 2602 else 2603 r = block->version->funcs->hw_init(adev); 2604 2605 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2606 if (r) 2607 return r; 2608 block->status.hw = true; 2609 } 2610 } 2611 2612 return 0; 2613 } 2614 2615 /** 2616 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 2617 * 2618 * @adev: amdgpu_device pointer 2619 * 2620 * First resume function for hardware IPs. The list of all the hardware 2621 * IPs that make up the asic is walked and the resume callbacks are run for 2622 * COMMON, GMC, and IH. resume puts the hardware into a functional state 2623 * after a suspend and updates the software state as necessary. This 2624 * function is also used for restoring the GPU after a GPU reset. 2625 * Returns 0 on success, negative error code on failure. 2626 */ 2627 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 2628 { 2629 int i, r; 2630 2631 for (i = 0; i < adev->num_ip_blocks; i++) { 2632 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 2633 continue; 2634 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2635 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2636 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2637 2638 r = adev->ip_blocks[i].version->funcs->resume(adev); 2639 if (r) { 2640 DRM_ERROR("resume of IP block <%s> failed %d\n", 2641 adev->ip_blocks[i].version->funcs->name, r); 2642 return r; 2643 } 2644 adev->ip_blocks[i].status.hw = true; 2645 } 2646 } 2647 2648 return 0; 2649 } 2650 2651 /** 2652 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 2653 * 2654 * @adev: amdgpu_device pointer 2655 * 2656 * First resume function for hardware IPs. The list of all the hardware 2657 * IPs that make up the asic is walked and the resume callbacks are run for 2658 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 2659 * functional state after a suspend and updates the software state as 2660 * necessary. This function is also used for restoring the GPU after a GPU 2661 * reset. 2662 * Returns 0 on success, negative error code on failure. 2663 */ 2664 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 2665 { 2666 int i, r; 2667 2668 for (i = 0; i < adev->num_ip_blocks; i++) { 2669 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 2670 continue; 2671 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2672 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2673 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 2674 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 2675 continue; 2676 r = adev->ip_blocks[i].version->funcs->resume(adev); 2677 if (r) { 2678 DRM_ERROR("resume of IP block <%s> failed %d\n", 2679 adev->ip_blocks[i].version->funcs->name, r); 2680 return r; 2681 } 2682 adev->ip_blocks[i].status.hw = true; 2683 } 2684 2685 return 0; 2686 } 2687 2688 /** 2689 * amdgpu_device_ip_resume - run resume for hardware IPs 2690 * 2691 * @adev: amdgpu_device pointer 2692 * 2693 * Main resume function for hardware IPs. The hardware IPs 2694 * are split into two resume functions because they are 2695 * are also used in in recovering from a GPU reset and some additional 2696 * steps need to be take between them. In this case (S3/S4) they are 2697 * run sequentially. 2698 * Returns 0 on success, negative error code on failure. 2699 */ 2700 static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 2701 { 2702 int r; 2703 2704 r = amdgpu_device_ip_resume_phase1(adev); 2705 if (r) 2706 return r; 2707 2708 r = amdgpu_device_fw_loading(adev); 2709 if (r) 2710 return r; 2711 2712 r = amdgpu_device_ip_resume_phase2(adev); 2713 2714 return r; 2715 } 2716 2717 /** 2718 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 2719 * 2720 * @adev: amdgpu_device pointer 2721 * 2722 * Query the VBIOS data tables to determine if the board supports SR-IOV. 2723 */ 2724 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 2725 { 2726 if (amdgpu_sriov_vf(adev)) { 2727 if (adev->is_atom_fw) { 2728 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) 2729 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2730 } else { 2731 if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 2732 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2733 } 2734 2735 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 2736 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 2737 } 2738 } 2739 2740 /** 2741 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 2742 * 2743 * @asic_type: AMD asic type 2744 * 2745 * Check if there is DC (new modesetting infrastructre) support for an asic. 2746 * returns true if DC has support, false if not. 2747 */ 2748 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 2749 { 2750 switch (asic_type) { 2751 #if defined(CONFIG_DRM_AMD_DC) 2752 case CHIP_BONAIRE: 2753 case CHIP_KAVERI: 2754 case CHIP_KABINI: 2755 case CHIP_MULLINS: 2756 /* 2757 * We have systems in the wild with these ASICs that require 2758 * LVDS and VGA support which is not supported with DC. 2759 * 2760 * Fallback to the non-DC driver here by default so as not to 2761 * cause regressions. 2762 */ 2763 return amdgpu_dc > 0; 2764 case CHIP_HAWAII: 2765 case CHIP_CARRIZO: 2766 case CHIP_STONEY: 2767 case CHIP_POLARIS10: 2768 case CHIP_POLARIS11: 2769 case CHIP_POLARIS12: 2770 case CHIP_VEGAM: 2771 case CHIP_TONGA: 2772 case CHIP_FIJI: 2773 case CHIP_VEGA10: 2774 case CHIP_VEGA12: 2775 case CHIP_VEGA20: 2776 #if defined(CONFIG_DRM_AMD_DC_DCN) 2777 case CHIP_RAVEN: 2778 case CHIP_NAVI10: 2779 case CHIP_NAVI14: 2780 case CHIP_NAVI12: 2781 case CHIP_RENOIR: 2782 #endif 2783 return amdgpu_dc != 0; 2784 #endif 2785 default: 2786 if (amdgpu_dc > 0) 2787 DRM_INFO("Display Core has been requested via kernel parameter " 2788 "but isn't supported by ASIC, ignoring\n"); 2789 return false; 2790 } 2791 } 2792 2793 /** 2794 * amdgpu_device_has_dc_support - check if dc is supported 2795 * 2796 * @adev: amdgpu_device_pointer 2797 * 2798 * Returns true for supported, false for not supported 2799 */ 2800 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 2801 { 2802 if (amdgpu_sriov_vf(adev)) 2803 return false; 2804 2805 return amdgpu_device_asic_has_dc_support(adev->asic_type); 2806 } 2807 2808 2809 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) 2810 { 2811 struct amdgpu_device *adev = 2812 container_of(__work, struct amdgpu_device, xgmi_reset_work); 2813 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); 2814 2815 /* It's a bug to not have a hive within this function */ 2816 if (WARN_ON(!hive)) 2817 return; 2818 2819 /* 2820 * Use task barrier to synchronize all xgmi reset works across the 2821 * hive. task_barrier_enter and task_barrier_exit will block 2822 * until all the threads running the xgmi reset works reach 2823 * those points. task_barrier_full will do both blocks. 2824 */ 2825 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { 2826 2827 task_barrier_enter(&hive->tb); 2828 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev); 2829 2830 if (adev->asic_reset_res) 2831 goto fail; 2832 2833 task_barrier_exit(&hive->tb); 2834 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev); 2835 2836 if (adev->asic_reset_res) 2837 goto fail; 2838 2839 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count) 2840 adev->mmhub.funcs->reset_ras_error_count(adev); 2841 } else { 2842 2843 task_barrier_full(&hive->tb); 2844 adev->asic_reset_res = amdgpu_asic_reset(adev); 2845 } 2846 2847 fail: 2848 if (adev->asic_reset_res) 2849 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", 2850 adev->asic_reset_res, adev->ddev->unique); 2851 } 2852 2853 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) 2854 { 2855 char *input = amdgpu_lockup_timeout; 2856 char *timeout_setting = NULL; 2857 int index = 0; 2858 long timeout; 2859 int ret = 0; 2860 2861 /* 2862 * By default timeout for non compute jobs is 10000. 2863 * And there is no timeout enforced on compute jobs. 2864 * In SR-IOV or passthrough mode, timeout for compute 2865 * jobs are 60000 by default. 2866 */ 2867 adev->gfx_timeout = msecs_to_jiffies(10000); 2868 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 2869 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) 2870 adev->compute_timeout = msecs_to_jiffies(60000); 2871 else 2872 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; 2873 2874 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 2875 while ((timeout_setting = strsep(&input, ",")) && 2876 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 2877 ret = kstrtol(timeout_setting, 0, &timeout); 2878 if (ret) 2879 return ret; 2880 2881 if (timeout == 0) { 2882 index++; 2883 continue; 2884 } else if (timeout < 0) { 2885 timeout = MAX_SCHEDULE_TIMEOUT; 2886 } else { 2887 timeout = msecs_to_jiffies(timeout); 2888 } 2889 2890 switch (index++) { 2891 case 0: 2892 adev->gfx_timeout = timeout; 2893 break; 2894 case 1: 2895 adev->compute_timeout = timeout; 2896 break; 2897 case 2: 2898 adev->sdma_timeout = timeout; 2899 break; 2900 case 3: 2901 adev->video_timeout = timeout; 2902 break; 2903 default: 2904 break; 2905 } 2906 } 2907 /* 2908 * There is only one value specified and 2909 * it should apply to all non-compute jobs. 2910 */ 2911 if (index == 1) { 2912 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 2913 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) 2914 adev->compute_timeout = adev->gfx_timeout; 2915 } 2916 } 2917 2918 return ret; 2919 } 2920 2921 static const struct attribute *amdgpu_dev_attributes[] = { 2922 &dev_attr_product_name.attr, 2923 &dev_attr_product_number.attr, 2924 &dev_attr_serial_number.attr, 2925 &dev_attr_pcie_replay_count.attr, 2926 NULL 2927 }; 2928 2929 /** 2930 * amdgpu_device_init - initialize the driver 2931 * 2932 * @adev: amdgpu_device pointer 2933 * @ddev: drm dev pointer 2934 * @pdev: pci dev pointer 2935 * @flags: driver flags 2936 * 2937 * Initializes the driver info and hw (all asics). 2938 * Returns 0 for success or an error on failure. 2939 * Called at driver startup. 2940 */ 2941 int amdgpu_device_init(struct amdgpu_device *adev, 2942 struct drm_device *ddev, 2943 struct pci_dev *pdev, 2944 uint32_t flags) 2945 { 2946 int r, i; 2947 bool boco = false; 2948 u32 max_MBps; 2949 2950 adev->shutdown = false; 2951 adev->dev = &pdev->dev; 2952 adev->ddev = ddev; 2953 adev->pdev = pdev; 2954 adev->flags = flags; 2955 2956 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) 2957 adev->asic_type = amdgpu_force_asic_type; 2958 else 2959 adev->asic_type = flags & AMD_ASIC_MASK; 2960 2961 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 2962 if (amdgpu_emu_mode == 1) 2963 adev->usec_timeout *= 10; 2964 adev->gmc.gart_size = 512 * 1024 * 1024; 2965 adev->accel_working = false; 2966 adev->num_rings = 0; 2967 adev->mman.buffer_funcs = NULL; 2968 adev->mman.buffer_funcs_ring = NULL; 2969 adev->vm_manager.vm_pte_funcs = NULL; 2970 adev->vm_manager.vm_pte_num_scheds = 0; 2971 adev->gmc.gmc_funcs = NULL; 2972 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2973 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 2974 2975 adev->smc_rreg = &amdgpu_invalid_rreg; 2976 adev->smc_wreg = &amdgpu_invalid_wreg; 2977 adev->pcie_rreg = &amdgpu_invalid_rreg; 2978 adev->pcie_wreg = &amdgpu_invalid_wreg; 2979 adev->pciep_rreg = &amdgpu_invalid_rreg; 2980 adev->pciep_wreg = &amdgpu_invalid_wreg; 2981 adev->pcie_rreg64 = &amdgpu_invalid_rreg64; 2982 adev->pcie_wreg64 = &amdgpu_invalid_wreg64; 2983 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 2984 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 2985 adev->didt_rreg = &amdgpu_invalid_rreg; 2986 adev->didt_wreg = &amdgpu_invalid_wreg; 2987 adev->gc_cac_rreg = &amdgpu_invalid_rreg; 2988 adev->gc_cac_wreg = &amdgpu_invalid_wreg; 2989 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 2990 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 2991 2992 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 2993 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 2994 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 2995 2996 /* mutex initialization are all done here so we 2997 * can recall function without having locking issues */ 2998 atomic_set(&adev->irq.ih.lock, 0); 2999 mutex_init(&adev->firmware.mutex); 3000 mutex_init(&adev->pm.mutex); 3001 mutex_init(&adev->gfx.gpu_clock_mutex); 3002 mutex_init(&adev->srbm_mutex); 3003 mutex_init(&adev->gfx.pipe_reserve_mutex); 3004 mutex_init(&adev->gfx.gfx_off_mutex); 3005 mutex_init(&adev->grbm_idx_mutex); 3006 mutex_init(&adev->mn_lock); 3007 mutex_init(&adev->virt.vf_errors.lock); 3008 hash_init(adev->mn_hash); 3009 mutex_init(&adev->lock_reset); 3010 mutex_init(&adev->psp.mutex); 3011 mutex_init(&adev->notifier_lock); 3012 3013 r = amdgpu_device_check_arguments(adev); 3014 if (r) 3015 return r; 3016 3017 spin_lock_init(&adev->mmio_idx_lock); 3018 spin_lock_init(&adev->smc_idx_lock); 3019 spin_lock_init(&adev->pcie_idx_lock); 3020 spin_lock_init(&adev->uvd_ctx_idx_lock); 3021 spin_lock_init(&adev->didt_idx_lock); 3022 spin_lock_init(&adev->gc_cac_idx_lock); 3023 spin_lock_init(&adev->se_cac_idx_lock); 3024 spin_lock_init(&adev->audio_endpt_idx_lock); 3025 spin_lock_init(&adev->mm_stats.lock); 3026 3027 INIT_LIST_HEAD(&adev->shadow_list); 3028 mutex_init(&adev->shadow_list_lock); 3029 3030 INIT_DELAYED_WORK(&adev->delayed_init_work, 3031 amdgpu_device_delayed_init_work_handler); 3032 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 3033 amdgpu_device_delay_enable_gfx_off); 3034 3035 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); 3036 3037 adev->gfx.gfx_off_req_count = 1; 3038 adev->pm.ac_power = power_supply_is_system_supplied() > 0; 3039 3040 /* Registers mapping */ 3041 /* TODO: block userspace mapping of io register */ 3042 if (adev->asic_type >= CHIP_BONAIRE) { 3043 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 3044 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 3045 } else { 3046 adev->rmmio_base = pci_resource_start(adev->pdev, 2); 3047 adev->rmmio_size = pci_resource_len(adev->pdev, 2); 3048 } 3049 3050 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 3051 if (adev->rmmio == NULL) { 3052 return -ENOMEM; 3053 } 3054 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 3055 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); 3056 3057 /* io port mapping */ 3058 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 3059 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) { 3060 adev->rio_mem_size = pci_resource_len(adev->pdev, i); 3061 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size); 3062 break; 3063 } 3064 } 3065 if (adev->rio_mem == NULL) 3066 DRM_INFO("PCI I/O BAR is not found.\n"); 3067 3068 /* enable PCIE atomic ops */ 3069 r = pci_enable_atomic_ops_to_root(adev->pdev, 3070 PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 3071 PCI_EXP_DEVCAP2_ATOMIC_COMP64); 3072 if (r) { 3073 adev->have_atomics_support = false; 3074 DRM_INFO("PCIE atomic ops is not supported\n"); 3075 } else { 3076 adev->have_atomics_support = true; 3077 } 3078 3079 amdgpu_device_get_pcie_info(adev); 3080 3081 if (amdgpu_mcbp) 3082 DRM_INFO("MCBP is enabled\n"); 3083 3084 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10) 3085 adev->enable_mes = true; 3086 3087 /* detect hw virtualization here */ 3088 amdgpu_detect_virtualization(adev); 3089 3090 r = amdgpu_device_get_job_timeout_settings(adev); 3091 if (r) { 3092 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); 3093 return r; 3094 } 3095 3096 /* early init functions */ 3097 r = amdgpu_device_ip_early_init(adev); 3098 if (r) 3099 return r; 3100 3101 /* doorbell bar mapping and doorbell index init*/ 3102 amdgpu_device_doorbell_init(adev); 3103 3104 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 3105 /* this will fail for cards that aren't VGA class devices, just 3106 * ignore it */ 3107 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); 3108 3109 if (amdgpu_device_supports_boco(ddev)) 3110 boco = true; 3111 if (amdgpu_has_atpx() && 3112 (amdgpu_is_atpx_hybrid() || 3113 amdgpu_has_atpx_dgpu_power_cntl()) && 3114 !pci_is_thunderbolt_attached(adev->pdev)) 3115 vga_switcheroo_register_client(adev->pdev, 3116 &amdgpu_switcheroo_ops, boco); 3117 if (boco) 3118 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 3119 3120 if (amdgpu_emu_mode == 1) { 3121 /* post the asic on emulation mode */ 3122 emu_soc_asic_init(adev); 3123 goto fence_driver_init; 3124 } 3125 3126 /* detect if we are with an SRIOV vbios */ 3127 amdgpu_device_detect_sriov_bios(adev); 3128 3129 /* check if we need to reset the asic 3130 * E.g., driver was not cleanly unloaded previously, etc. 3131 */ 3132 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { 3133 r = amdgpu_asic_reset(adev); 3134 if (r) { 3135 dev_err(adev->dev, "asic reset on init failed\n"); 3136 goto failed; 3137 } 3138 } 3139 3140 /* Post card if necessary */ 3141 if (amdgpu_device_need_post(adev)) { 3142 if (!adev->bios) { 3143 dev_err(adev->dev, "no vBIOS found\n"); 3144 r = -EINVAL; 3145 goto failed; 3146 } 3147 DRM_INFO("GPU posting now...\n"); 3148 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 3149 if (r) { 3150 dev_err(adev->dev, "gpu post error!\n"); 3151 goto failed; 3152 } 3153 } 3154 3155 if (adev->is_atom_fw) { 3156 /* Initialize clocks */ 3157 r = amdgpu_atomfirmware_get_clock_info(adev); 3158 if (r) { 3159 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 3160 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3161 goto failed; 3162 } 3163 } else { 3164 /* Initialize clocks */ 3165 r = amdgpu_atombios_get_clock_info(adev); 3166 if (r) { 3167 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 3168 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3169 goto failed; 3170 } 3171 /* init i2c buses */ 3172 if (!amdgpu_device_has_dc_support(adev)) 3173 amdgpu_atombios_i2c_init(adev); 3174 } 3175 3176 fence_driver_init: 3177 /* Fence driver */ 3178 r = amdgpu_fence_driver_init(adev); 3179 if (r) { 3180 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); 3181 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 3182 goto failed; 3183 } 3184 3185 /* init the mode config */ 3186 drm_mode_config_init(adev->ddev); 3187 3188 r = amdgpu_device_ip_init(adev); 3189 if (r) { 3190 /* failed in exclusive mode due to timeout */ 3191 if (amdgpu_sriov_vf(adev) && 3192 !amdgpu_sriov_runtime(adev) && 3193 amdgpu_virt_mmio_blocked(adev) && 3194 !amdgpu_virt_wait_reset(adev)) { 3195 dev_err(adev->dev, "VF exclusive mode timeout\n"); 3196 /* Don't send request since VF is inactive. */ 3197 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 3198 adev->virt.ops = NULL; 3199 r = -EAGAIN; 3200 goto failed; 3201 } 3202 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 3203 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 3204 goto failed; 3205 } 3206 3207 dev_info(adev->dev, 3208 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", 3209 adev->gfx.config.max_shader_engines, 3210 adev->gfx.config.max_sh_per_se, 3211 adev->gfx.config.max_cu_per_sh, 3212 adev->gfx.cu_info.number); 3213 3214 adev->accel_working = true; 3215 3216 amdgpu_vm_check_compute_bug(adev); 3217 3218 /* Initialize the buffer migration limit. */ 3219 if (amdgpu_moverate >= 0) 3220 max_MBps = amdgpu_moverate; 3221 else 3222 max_MBps = 8; /* Allow 8 MB/s. */ 3223 /* Get a log2 for easy divisions. */ 3224 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 3225 3226 amdgpu_fbdev_init(adev); 3227 3228 r = amdgpu_pm_sysfs_init(adev); 3229 if (r) { 3230 adev->pm_sysfs_en = false; 3231 DRM_ERROR("registering pm debugfs failed (%d).\n", r); 3232 } else 3233 adev->pm_sysfs_en = true; 3234 3235 r = amdgpu_ucode_sysfs_init(adev); 3236 if (r) { 3237 adev->ucode_sysfs_en = false; 3238 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); 3239 } else 3240 adev->ucode_sysfs_en = true; 3241 3242 if ((amdgpu_testing & 1)) { 3243 if (adev->accel_working) 3244 amdgpu_test_moves(adev); 3245 else 3246 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); 3247 } 3248 if (amdgpu_benchmarking) { 3249 if (adev->accel_working) 3250 amdgpu_benchmark(adev, amdgpu_benchmarking); 3251 else 3252 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); 3253 } 3254 3255 /* 3256 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. 3257 * Otherwise the mgpu fan boost feature will be skipped due to the 3258 * gpu instance is counted less. 3259 */ 3260 amdgpu_register_gpu_instance(adev); 3261 3262 /* enable clockgating, etc. after ib tests, etc. since some blocks require 3263 * explicit gating rather than handling it automatically. 3264 */ 3265 r = amdgpu_device_ip_late_init(adev); 3266 if (r) { 3267 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 3268 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 3269 goto failed; 3270 } 3271 3272 /* must succeed. */ 3273 amdgpu_ras_resume(adev); 3274 3275 queue_delayed_work(system_wq, &adev->delayed_init_work, 3276 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3277 3278 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); 3279 if (r) { 3280 dev_err(adev->dev, "Could not create amdgpu device attr\n"); 3281 return r; 3282 } 3283 3284 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 3285 r = amdgpu_pmu_init(adev); 3286 if (r) 3287 dev_err(adev->dev, "amdgpu_pmu_init failed\n"); 3288 3289 return 0; 3290 3291 failed: 3292 amdgpu_vf_error_trans_all(adev); 3293 if (boco) 3294 vga_switcheroo_fini_domain_pm_ops(adev->dev); 3295 3296 return r; 3297 } 3298 3299 /** 3300 * amdgpu_device_fini - tear down the driver 3301 * 3302 * @adev: amdgpu_device pointer 3303 * 3304 * Tear down the driver info (all asics). 3305 * Called at driver shutdown. 3306 */ 3307 void amdgpu_device_fini(struct amdgpu_device *adev) 3308 { 3309 int r; 3310 3311 DRM_INFO("amdgpu: finishing device.\n"); 3312 flush_delayed_work(&adev->delayed_init_work); 3313 adev->shutdown = true; 3314 3315 /* make sure IB test finished before entering exclusive mode 3316 * to avoid preemption on IB test 3317 * */ 3318 if (amdgpu_sriov_vf(adev)) 3319 amdgpu_virt_request_full_gpu(adev, false); 3320 3321 /* disable all interrupts */ 3322 amdgpu_irq_disable_all(adev); 3323 if (adev->mode_info.mode_config_initialized){ 3324 if (!amdgpu_device_has_dc_support(adev)) 3325 drm_helper_force_disable_all(adev->ddev); 3326 else 3327 drm_atomic_helper_shutdown(adev->ddev); 3328 } 3329 amdgpu_fence_driver_fini(adev); 3330 if (adev->pm_sysfs_en) 3331 amdgpu_pm_sysfs_fini(adev); 3332 amdgpu_fbdev_fini(adev); 3333 r = amdgpu_device_ip_fini(adev); 3334 if (adev->firmware.gpu_info_fw) { 3335 release_firmware(adev->firmware.gpu_info_fw); 3336 adev->firmware.gpu_info_fw = NULL; 3337 } 3338 adev->accel_working = false; 3339 /* free i2c buses */ 3340 if (!amdgpu_device_has_dc_support(adev)) 3341 amdgpu_i2c_fini(adev); 3342 3343 if (amdgpu_emu_mode != 1) 3344 amdgpu_atombios_fini(adev); 3345 3346 kfree(adev->bios); 3347 adev->bios = NULL; 3348 if (amdgpu_has_atpx() && 3349 (amdgpu_is_atpx_hybrid() || 3350 amdgpu_has_atpx_dgpu_power_cntl()) && 3351 !pci_is_thunderbolt_attached(adev->pdev)) 3352 vga_switcheroo_unregister_client(adev->pdev); 3353 if (amdgpu_device_supports_boco(adev->ddev)) 3354 vga_switcheroo_fini_domain_pm_ops(adev->dev); 3355 vga_client_register(adev->pdev, NULL, NULL, NULL); 3356 if (adev->rio_mem) 3357 pci_iounmap(adev->pdev, adev->rio_mem); 3358 adev->rio_mem = NULL; 3359 iounmap(adev->rmmio); 3360 adev->rmmio = NULL; 3361 amdgpu_device_doorbell_fini(adev); 3362 3363 if (adev->ucode_sysfs_en) 3364 amdgpu_ucode_sysfs_fini(adev); 3365 3366 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); 3367 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 3368 amdgpu_pmu_fini(adev); 3369 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) 3370 amdgpu_discovery_fini(adev); 3371 } 3372 3373 3374 /* 3375 * Suspend & resume. 3376 */ 3377 /** 3378 * amdgpu_device_suspend - initiate device suspend 3379 * 3380 * @dev: drm dev pointer 3381 * @suspend: suspend state 3382 * @fbcon : notify the fbdev of suspend 3383 * 3384 * Puts the hw in the suspend state (all asics). 3385 * Returns 0 for success or an error on failure. 3386 * Called at driver suspend. 3387 */ 3388 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) 3389 { 3390 struct amdgpu_device *adev; 3391 struct drm_crtc *crtc; 3392 struct drm_connector *connector; 3393 struct drm_connector_list_iter iter; 3394 int r; 3395 3396 if (dev == NULL || dev->dev_private == NULL) { 3397 return -ENODEV; 3398 } 3399 3400 adev = dev->dev_private; 3401 3402 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 3403 return 0; 3404 3405 adev->in_suspend = true; 3406 drm_kms_helper_poll_disable(dev); 3407 3408 if (fbcon) 3409 amdgpu_fbdev_set_suspend(adev, 1); 3410 3411 cancel_delayed_work_sync(&adev->delayed_init_work); 3412 3413 if (!amdgpu_device_has_dc_support(adev)) { 3414 /* turn off display hw */ 3415 drm_modeset_lock_all(dev); 3416 drm_connector_list_iter_begin(dev, &iter); 3417 drm_for_each_connector_iter(connector, &iter) 3418 drm_helper_connector_dpms(connector, 3419 DRM_MODE_DPMS_OFF); 3420 drm_connector_list_iter_end(&iter); 3421 drm_modeset_unlock_all(dev); 3422 /* unpin the front buffers and cursors */ 3423 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 3424 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 3425 struct drm_framebuffer *fb = crtc->primary->fb; 3426 struct amdgpu_bo *robj; 3427 3428 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { 3429 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 3430 r = amdgpu_bo_reserve(aobj, true); 3431 if (r == 0) { 3432 amdgpu_bo_unpin(aobj); 3433 amdgpu_bo_unreserve(aobj); 3434 } 3435 } 3436 3437 if (fb == NULL || fb->obj[0] == NULL) { 3438 continue; 3439 } 3440 robj = gem_to_amdgpu_bo(fb->obj[0]); 3441 /* don't unpin kernel fb objects */ 3442 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { 3443 r = amdgpu_bo_reserve(robj, true); 3444 if (r == 0) { 3445 amdgpu_bo_unpin(robj); 3446 amdgpu_bo_unreserve(robj); 3447 } 3448 } 3449 } 3450 } 3451 3452 amdgpu_ras_suspend(adev); 3453 3454 r = amdgpu_device_ip_suspend_phase1(adev); 3455 3456 amdgpu_amdkfd_suspend(adev, !fbcon); 3457 3458 /* evict vram memory */ 3459 amdgpu_bo_evict_vram(adev); 3460 3461 amdgpu_fence_driver_suspend(adev); 3462 3463 r = amdgpu_device_ip_suspend_phase2(adev); 3464 3465 /* evict remaining vram memory 3466 * This second call to evict vram is to evict the gart page table 3467 * using the CPU. 3468 */ 3469 amdgpu_bo_evict_vram(adev); 3470 3471 return 0; 3472 } 3473 3474 /** 3475 * amdgpu_device_resume - initiate device resume 3476 * 3477 * @dev: drm dev pointer 3478 * @resume: resume state 3479 * @fbcon : notify the fbdev of resume 3480 * 3481 * Bring the hw back to operating state (all asics). 3482 * Returns 0 for success or an error on failure. 3483 * Called at driver resume. 3484 */ 3485 int amdgpu_device_resume(struct drm_device *dev, bool fbcon) 3486 { 3487 struct drm_connector *connector; 3488 struct drm_connector_list_iter iter; 3489 struct amdgpu_device *adev = dev->dev_private; 3490 struct drm_crtc *crtc; 3491 int r = 0; 3492 3493 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 3494 return 0; 3495 3496 /* post card */ 3497 if (amdgpu_device_need_post(adev)) { 3498 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 3499 if (r) 3500 DRM_ERROR("amdgpu asic init failed\n"); 3501 } 3502 3503 r = amdgpu_device_ip_resume(adev); 3504 if (r) { 3505 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r); 3506 return r; 3507 } 3508 amdgpu_fence_driver_resume(adev); 3509 3510 3511 r = amdgpu_device_ip_late_init(adev); 3512 if (r) 3513 return r; 3514 3515 queue_delayed_work(system_wq, &adev->delayed_init_work, 3516 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3517 3518 if (!amdgpu_device_has_dc_support(adev)) { 3519 /* pin cursors */ 3520 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 3521 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 3522 3523 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { 3524 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 3525 r = amdgpu_bo_reserve(aobj, true); 3526 if (r == 0) { 3527 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 3528 if (r != 0) 3529 DRM_ERROR("Failed to pin cursor BO (%d)\n", r); 3530 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); 3531 amdgpu_bo_unreserve(aobj); 3532 } 3533 } 3534 } 3535 } 3536 r = amdgpu_amdkfd_resume(adev, !fbcon); 3537 if (r) 3538 return r; 3539 3540 /* Make sure IB tests flushed */ 3541 flush_delayed_work(&adev->delayed_init_work); 3542 3543 /* blat the mode back in */ 3544 if (fbcon) { 3545 if (!amdgpu_device_has_dc_support(adev)) { 3546 /* pre DCE11 */ 3547 drm_helper_resume_force_mode(dev); 3548 3549 /* turn on display hw */ 3550 drm_modeset_lock_all(dev); 3551 3552 drm_connector_list_iter_begin(dev, &iter); 3553 drm_for_each_connector_iter(connector, &iter) 3554 drm_helper_connector_dpms(connector, 3555 DRM_MODE_DPMS_ON); 3556 drm_connector_list_iter_end(&iter); 3557 3558 drm_modeset_unlock_all(dev); 3559 } 3560 amdgpu_fbdev_set_suspend(adev, 0); 3561 } 3562 3563 drm_kms_helper_poll_enable(dev); 3564 3565 amdgpu_ras_resume(adev); 3566 3567 /* 3568 * Most of the connector probing functions try to acquire runtime pm 3569 * refs to ensure that the GPU is powered on when connector polling is 3570 * performed. Since we're calling this from a runtime PM callback, 3571 * trying to acquire rpm refs will cause us to deadlock. 3572 * 3573 * Since we're guaranteed to be holding the rpm lock, it's safe to 3574 * temporarily disable the rpm helpers so this doesn't deadlock us. 3575 */ 3576 #ifdef CONFIG_PM 3577 dev->dev->power.disable_depth++; 3578 #endif 3579 if (!amdgpu_device_has_dc_support(adev)) 3580 drm_helper_hpd_irq_event(dev); 3581 else 3582 drm_kms_helper_hotplug_event(dev); 3583 #ifdef CONFIG_PM 3584 dev->dev->power.disable_depth--; 3585 #endif 3586 adev->in_suspend = false; 3587 3588 return 0; 3589 } 3590 3591 /** 3592 * amdgpu_device_ip_check_soft_reset - did soft reset succeed 3593 * 3594 * @adev: amdgpu_device pointer 3595 * 3596 * The list of all the hardware IPs that make up the asic is walked and 3597 * the check_soft_reset callbacks are run. check_soft_reset determines 3598 * if the asic is still hung or not. 3599 * Returns true if any of the IPs are still in a hung state, false if not. 3600 */ 3601 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 3602 { 3603 int i; 3604 bool asic_hang = false; 3605 3606 if (amdgpu_sriov_vf(adev)) 3607 return true; 3608 3609 if (amdgpu_asic_need_full_reset(adev)) 3610 return true; 3611 3612 for (i = 0; i < adev->num_ip_blocks; i++) { 3613 if (!adev->ip_blocks[i].status.valid) 3614 continue; 3615 if (adev->ip_blocks[i].version->funcs->check_soft_reset) 3616 adev->ip_blocks[i].status.hang = 3617 adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 3618 if (adev->ip_blocks[i].status.hang) { 3619 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 3620 asic_hang = true; 3621 } 3622 } 3623 return asic_hang; 3624 } 3625 3626 /** 3627 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 3628 * 3629 * @adev: amdgpu_device pointer 3630 * 3631 * The list of all the hardware IPs that make up the asic is walked and the 3632 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 3633 * handles any IP specific hardware or software state changes that are 3634 * necessary for a soft reset to succeed. 3635 * Returns 0 on success, negative error code on failure. 3636 */ 3637 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 3638 { 3639 int i, r = 0; 3640 3641 for (i = 0; i < adev->num_ip_blocks; i++) { 3642 if (!adev->ip_blocks[i].status.valid) 3643 continue; 3644 if (adev->ip_blocks[i].status.hang && 3645 adev->ip_blocks[i].version->funcs->pre_soft_reset) { 3646 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 3647 if (r) 3648 return r; 3649 } 3650 } 3651 3652 return 0; 3653 } 3654 3655 /** 3656 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 3657 * 3658 * @adev: amdgpu_device pointer 3659 * 3660 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 3661 * reset is necessary to recover. 3662 * Returns true if a full asic reset is required, false if not. 3663 */ 3664 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 3665 { 3666 int i; 3667 3668 if (amdgpu_asic_need_full_reset(adev)) 3669 return true; 3670 3671 for (i = 0; i < adev->num_ip_blocks; i++) { 3672 if (!adev->ip_blocks[i].status.valid) 3673 continue; 3674 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 3675 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 3676 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 3677 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 3678 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 3679 if (adev->ip_blocks[i].status.hang) { 3680 DRM_INFO("Some block need full reset!\n"); 3681 return true; 3682 } 3683 } 3684 } 3685 return false; 3686 } 3687 3688 /** 3689 * amdgpu_device_ip_soft_reset - do a soft reset 3690 * 3691 * @adev: amdgpu_device pointer 3692 * 3693 * The list of all the hardware IPs that make up the asic is walked and the 3694 * soft_reset callbacks are run if the block is hung. soft_reset handles any 3695 * IP specific hardware or software state changes that are necessary to soft 3696 * reset the IP. 3697 * Returns 0 on success, negative error code on failure. 3698 */ 3699 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 3700 { 3701 int i, r = 0; 3702 3703 for (i = 0; i < adev->num_ip_blocks; i++) { 3704 if (!adev->ip_blocks[i].status.valid) 3705 continue; 3706 if (adev->ip_blocks[i].status.hang && 3707 adev->ip_blocks[i].version->funcs->soft_reset) { 3708 r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 3709 if (r) 3710 return r; 3711 } 3712 } 3713 3714 return 0; 3715 } 3716 3717 /** 3718 * amdgpu_device_ip_post_soft_reset - clean up from soft reset 3719 * 3720 * @adev: amdgpu_device pointer 3721 * 3722 * The list of all the hardware IPs that make up the asic is walked and the 3723 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 3724 * handles any IP specific hardware or software state changes that are 3725 * necessary after the IP has been soft reset. 3726 * Returns 0 on success, negative error code on failure. 3727 */ 3728 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 3729 { 3730 int i, r = 0; 3731 3732 for (i = 0; i < adev->num_ip_blocks; i++) { 3733 if (!adev->ip_blocks[i].status.valid) 3734 continue; 3735 if (adev->ip_blocks[i].status.hang && 3736 adev->ip_blocks[i].version->funcs->post_soft_reset) 3737 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 3738 if (r) 3739 return r; 3740 } 3741 3742 return 0; 3743 } 3744 3745 /** 3746 * amdgpu_device_recover_vram - Recover some VRAM contents 3747 * 3748 * @adev: amdgpu_device pointer 3749 * 3750 * Restores the contents of VRAM buffers from the shadows in GTT. Used to 3751 * restore things like GPUVM page tables after a GPU reset where 3752 * the contents of VRAM might be lost. 3753 * 3754 * Returns: 3755 * 0 on success, negative error code on failure. 3756 */ 3757 static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 3758 { 3759 struct dma_fence *fence = NULL, *next = NULL; 3760 struct amdgpu_bo *shadow; 3761 long r = 1, tmo; 3762 3763 if (amdgpu_sriov_runtime(adev)) 3764 tmo = msecs_to_jiffies(8000); 3765 else 3766 tmo = msecs_to_jiffies(100); 3767 3768 DRM_INFO("recover vram bo from shadow start\n"); 3769 mutex_lock(&adev->shadow_list_lock); 3770 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) { 3771 3772 /* No need to recover an evicted BO */ 3773 if (shadow->tbo.mem.mem_type != TTM_PL_TT || 3774 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET || 3775 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) 3776 continue; 3777 3778 r = amdgpu_bo_restore_shadow(shadow, &next); 3779 if (r) 3780 break; 3781 3782 if (fence) { 3783 tmo = dma_fence_wait_timeout(fence, false, tmo); 3784 dma_fence_put(fence); 3785 fence = next; 3786 if (tmo == 0) { 3787 r = -ETIMEDOUT; 3788 break; 3789 } else if (tmo < 0) { 3790 r = tmo; 3791 break; 3792 } 3793 } else { 3794 fence = next; 3795 } 3796 } 3797 mutex_unlock(&adev->shadow_list_lock); 3798 3799 if (fence) 3800 tmo = dma_fence_wait_timeout(fence, false, tmo); 3801 dma_fence_put(fence); 3802 3803 if (r < 0 || tmo <= 0) { 3804 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); 3805 return -EIO; 3806 } 3807 3808 DRM_INFO("recover vram bo from shadow done\n"); 3809 return 0; 3810 } 3811 3812 3813 /** 3814 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 3815 * 3816 * @adev: amdgpu device pointer 3817 * @from_hypervisor: request from hypervisor 3818 * 3819 * do VF FLR and reinitialize Asic 3820 * return 0 means succeeded otherwise failed 3821 */ 3822 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 3823 bool from_hypervisor) 3824 { 3825 int r; 3826 3827 if (from_hypervisor) 3828 r = amdgpu_virt_request_full_gpu(adev, true); 3829 else 3830 r = amdgpu_virt_reset_gpu(adev); 3831 if (r) 3832 return r; 3833 3834 amdgpu_amdkfd_pre_reset(adev); 3835 3836 /* Resume IP prior to SMC */ 3837 r = amdgpu_device_ip_reinit_early_sriov(adev); 3838 if (r) 3839 goto error; 3840 3841 amdgpu_virt_init_data_exchange(adev); 3842 /* we need recover gart prior to run SMC/CP/SDMA resume */ 3843 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); 3844 3845 r = amdgpu_device_fw_loading(adev); 3846 if (r) 3847 return r; 3848 3849 /* now we are okay to resume SMC/CP/SDMA */ 3850 r = amdgpu_device_ip_reinit_late_sriov(adev); 3851 if (r) 3852 goto error; 3853 3854 amdgpu_irq_gpu_reset_resume_helper(adev); 3855 r = amdgpu_ib_ring_tests(adev); 3856 amdgpu_amdkfd_post_reset(adev); 3857 3858 error: 3859 amdgpu_virt_release_full_gpu(adev, true); 3860 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 3861 amdgpu_inc_vram_lost(adev); 3862 r = amdgpu_device_recover_vram(adev); 3863 } 3864 3865 return r; 3866 } 3867 3868 /** 3869 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 3870 * 3871 * @adev: amdgpu device pointer 3872 * 3873 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 3874 * a hung GPU. 3875 */ 3876 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 3877 { 3878 if (!amdgpu_device_ip_check_soft_reset(adev)) { 3879 DRM_INFO("Timeout, but no hardware hang detected.\n"); 3880 return false; 3881 } 3882 3883 if (amdgpu_gpu_recovery == 0) 3884 goto disabled; 3885 3886 if (amdgpu_sriov_vf(adev)) 3887 return true; 3888 3889 if (amdgpu_gpu_recovery == -1) { 3890 switch (adev->asic_type) { 3891 case CHIP_BONAIRE: 3892 case CHIP_HAWAII: 3893 case CHIP_TOPAZ: 3894 case CHIP_TONGA: 3895 case CHIP_FIJI: 3896 case CHIP_POLARIS10: 3897 case CHIP_POLARIS11: 3898 case CHIP_POLARIS12: 3899 case CHIP_VEGAM: 3900 case CHIP_VEGA20: 3901 case CHIP_VEGA10: 3902 case CHIP_VEGA12: 3903 case CHIP_RAVEN: 3904 case CHIP_ARCTURUS: 3905 case CHIP_RENOIR: 3906 case CHIP_NAVI10: 3907 case CHIP_NAVI14: 3908 case CHIP_NAVI12: 3909 break; 3910 default: 3911 goto disabled; 3912 } 3913 } 3914 3915 return true; 3916 3917 disabled: 3918 DRM_INFO("GPU recovery disabled.\n"); 3919 return false; 3920 } 3921 3922 3923 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, 3924 struct amdgpu_job *job, 3925 bool *need_full_reset_arg) 3926 { 3927 int i, r = 0; 3928 bool need_full_reset = *need_full_reset_arg; 3929 3930 amdgpu_debugfs_wait_dump(adev); 3931 3932 /* block all schedulers and reset given job's ring */ 3933 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3934 struct amdgpu_ring *ring = adev->rings[i]; 3935 3936 if (!ring || !ring->sched.thread) 3937 continue; 3938 3939 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 3940 amdgpu_fence_driver_force_completion(ring); 3941 } 3942 3943 if(job) 3944 drm_sched_increase_karma(&job->base); 3945 3946 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ 3947 if (!amdgpu_sriov_vf(adev)) { 3948 3949 if (!need_full_reset) 3950 need_full_reset = amdgpu_device_ip_need_full_reset(adev); 3951 3952 if (!need_full_reset) { 3953 amdgpu_device_ip_pre_soft_reset(adev); 3954 r = amdgpu_device_ip_soft_reset(adev); 3955 amdgpu_device_ip_post_soft_reset(adev); 3956 if (r || amdgpu_device_ip_check_soft_reset(adev)) { 3957 DRM_INFO("soft reset failed, will fallback to full reset!\n"); 3958 need_full_reset = true; 3959 } 3960 } 3961 3962 if (need_full_reset) 3963 r = amdgpu_device_ip_suspend(adev); 3964 3965 *need_full_reset_arg = need_full_reset; 3966 } 3967 3968 return r; 3969 } 3970 3971 static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, 3972 struct list_head *device_list_handle, 3973 bool *need_full_reset_arg) 3974 { 3975 struct amdgpu_device *tmp_adev = NULL; 3976 bool need_full_reset = *need_full_reset_arg, vram_lost = false; 3977 int r = 0; 3978 3979 /* 3980 * ASIC reset has to be done on all HGMI hive nodes ASAP 3981 * to allow proper links negotiation in FW (within 1 sec) 3982 */ 3983 if (need_full_reset) { 3984 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 3985 /* For XGMI run all resets in parallel to speed up the process */ 3986 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 3987 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) 3988 r = -EALREADY; 3989 } else 3990 r = amdgpu_asic_reset(tmp_adev); 3991 3992 if (r) { 3993 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s", 3994 r, tmp_adev->ddev->unique); 3995 break; 3996 } 3997 } 3998 3999 /* For XGMI wait for all resets to complete before proceed */ 4000 if (!r) { 4001 list_for_each_entry(tmp_adev, device_list_handle, 4002 gmc.xgmi.head) { 4003 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 4004 flush_work(&tmp_adev->xgmi_reset_work); 4005 r = tmp_adev->asic_reset_res; 4006 if (r) 4007 break; 4008 } 4009 } 4010 } 4011 } 4012 4013 if (!r && amdgpu_ras_intr_triggered()) { 4014 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4015 if (tmp_adev->mmhub.funcs && 4016 tmp_adev->mmhub.funcs->reset_ras_error_count) 4017 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev); 4018 } 4019 4020 amdgpu_ras_intr_cleared(); 4021 } 4022 4023 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4024 if (need_full_reset) { 4025 /* post card */ 4026 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) 4027 DRM_WARN("asic atom init failed!"); 4028 4029 if (!r) { 4030 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); 4031 r = amdgpu_device_ip_resume_phase1(tmp_adev); 4032 if (r) 4033 goto out; 4034 4035 vram_lost = amdgpu_device_check_vram_lost(tmp_adev); 4036 if (vram_lost) { 4037 DRM_INFO("VRAM is lost due to GPU reset!\n"); 4038 amdgpu_inc_vram_lost(tmp_adev); 4039 } 4040 4041 r = amdgpu_gtt_mgr_recover( 4042 &tmp_adev->mman.bdev.man[TTM_PL_TT]); 4043 if (r) 4044 goto out; 4045 4046 r = amdgpu_device_fw_loading(tmp_adev); 4047 if (r) 4048 return r; 4049 4050 r = amdgpu_device_ip_resume_phase2(tmp_adev); 4051 if (r) 4052 goto out; 4053 4054 if (vram_lost) 4055 amdgpu_device_fill_reset_magic(tmp_adev); 4056 4057 /* 4058 * Add this ASIC as tracked as reset was already 4059 * complete successfully. 4060 */ 4061 amdgpu_register_gpu_instance(tmp_adev); 4062 4063 r = amdgpu_device_ip_late_init(tmp_adev); 4064 if (r) 4065 goto out; 4066 4067 amdgpu_fbdev_set_suspend(tmp_adev, 0); 4068 4069 /* must succeed. */ 4070 amdgpu_ras_resume(tmp_adev); 4071 4072 /* Update PSP FW topology after reset */ 4073 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) 4074 r = amdgpu_xgmi_update_topology(hive, tmp_adev); 4075 } 4076 } 4077 4078 4079 out: 4080 if (!r) { 4081 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 4082 r = amdgpu_ib_ring_tests(tmp_adev); 4083 if (r) { 4084 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); 4085 r = amdgpu_device_ip_suspend(tmp_adev); 4086 need_full_reset = true; 4087 r = -EAGAIN; 4088 goto end; 4089 } 4090 } 4091 4092 if (!r) 4093 r = amdgpu_device_recover_vram(tmp_adev); 4094 else 4095 tmp_adev->asic_reset_res = r; 4096 } 4097 4098 end: 4099 *need_full_reset_arg = need_full_reset; 4100 return r; 4101 } 4102 4103 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) 4104 { 4105 if (trylock) { 4106 if (!mutex_trylock(&adev->lock_reset)) 4107 return false; 4108 } else 4109 mutex_lock(&adev->lock_reset); 4110 4111 atomic_inc(&adev->gpu_reset_counter); 4112 adev->in_gpu_reset = true; 4113 switch (amdgpu_asic_reset_method(adev)) { 4114 case AMD_RESET_METHOD_MODE1: 4115 adev->mp1_state = PP_MP1_STATE_SHUTDOWN; 4116 break; 4117 case AMD_RESET_METHOD_MODE2: 4118 adev->mp1_state = PP_MP1_STATE_RESET; 4119 break; 4120 default: 4121 adev->mp1_state = PP_MP1_STATE_NONE; 4122 break; 4123 } 4124 4125 return true; 4126 } 4127 4128 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) 4129 { 4130 amdgpu_vf_error_trans_all(adev); 4131 adev->mp1_state = PP_MP1_STATE_NONE; 4132 adev->in_gpu_reset = false; 4133 mutex_unlock(&adev->lock_reset); 4134 } 4135 4136 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) 4137 { 4138 struct pci_dev *p = NULL; 4139 4140 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 4141 adev->pdev->bus->number, 1); 4142 if (p) { 4143 pm_runtime_enable(&(p->dev)); 4144 pm_runtime_resume(&(p->dev)); 4145 } 4146 } 4147 4148 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) 4149 { 4150 enum amd_reset_method reset_method; 4151 struct pci_dev *p = NULL; 4152 u64 expires; 4153 4154 /* 4155 * For now, only BACO and mode1 reset are confirmed 4156 * to suffer the audio issue without proper suspended. 4157 */ 4158 reset_method = amdgpu_asic_reset_method(adev); 4159 if ((reset_method != AMD_RESET_METHOD_BACO) && 4160 (reset_method != AMD_RESET_METHOD_MODE1)) 4161 return -EINVAL; 4162 4163 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 4164 adev->pdev->bus->number, 1); 4165 if (!p) 4166 return -ENODEV; 4167 4168 expires = pm_runtime_autosuspend_expiration(&(p->dev)); 4169 if (!expires) 4170 /* 4171 * If we cannot get the audio device autosuspend delay, 4172 * a fixed 4S interval will be used. Considering 3S is 4173 * the audio controller default autosuspend delay setting. 4174 * 4S used here is guaranteed to cover that. 4175 */ 4176 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL; 4177 4178 while (!pm_runtime_status_suspended(&(p->dev))) { 4179 if (!pm_runtime_suspend(&(p->dev))) 4180 break; 4181 4182 if (expires < ktime_get_mono_fast_ns()) { 4183 dev_warn(adev->dev, "failed to suspend display audio\n"); 4184 /* TODO: abort the succeeding gpu reset? */ 4185 return -ETIMEDOUT; 4186 } 4187 } 4188 4189 pm_runtime_disable(&(p->dev)); 4190 4191 return 0; 4192 } 4193 4194 /** 4195 * amdgpu_device_gpu_recover - reset the asic and recover scheduler 4196 * 4197 * @adev: amdgpu device pointer 4198 * @job: which job trigger hang 4199 * 4200 * Attempt to reset the GPU if it has hung (all asics). 4201 * Attempt to do soft-reset or full-reset and reinitialize Asic 4202 * Returns 0 for success or an error on failure. 4203 */ 4204 4205 int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 4206 struct amdgpu_job *job) 4207 { 4208 struct list_head device_list, *device_list_handle = NULL; 4209 bool need_full_reset = false; 4210 bool job_signaled = false; 4211 struct amdgpu_hive_info *hive = NULL; 4212 struct amdgpu_device *tmp_adev = NULL; 4213 int i, r = 0; 4214 bool in_ras_intr = amdgpu_ras_intr_triggered(); 4215 bool use_baco = 4216 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? 4217 true : false; 4218 bool audio_suspended = false; 4219 4220 /* 4221 * Flush RAM to disk so that after reboot 4222 * the user can read log and see why the system rebooted. 4223 */ 4224 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) { 4225 4226 DRM_WARN("Emergency reboot."); 4227 4228 ksys_sync_helper(); 4229 emergency_restart(); 4230 } 4231 4232 dev_info(adev->dev, "GPU %s begin!\n", 4233 (in_ras_intr && !use_baco) ? "jobs stop":"reset"); 4234 4235 /* 4236 * Here we trylock to avoid chain of resets executing from 4237 * either trigger by jobs on different adevs in XGMI hive or jobs on 4238 * different schedulers for same device while this TO handler is running. 4239 * We always reset all schedulers for device and all devices for XGMI 4240 * hive so that should take care of them too. 4241 */ 4242 hive = amdgpu_get_xgmi_hive(adev, true); 4243 if (hive && !mutex_trylock(&hive->reset_lock)) { 4244 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", 4245 job ? job->base.id : -1, hive->hive_id); 4246 mutex_unlock(&hive->hive_lock); 4247 return 0; 4248 } 4249 4250 /* 4251 * Build list of devices to reset. 4252 * In case we are in XGMI hive mode, resort the device list 4253 * to put adev in the 1st position. 4254 */ 4255 INIT_LIST_HEAD(&device_list); 4256 if (adev->gmc.xgmi.num_physical_nodes > 1) { 4257 if (!hive) 4258 return -ENODEV; 4259 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list)) 4260 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list); 4261 device_list_handle = &hive->device_list; 4262 } else { 4263 list_add_tail(&adev->gmc.xgmi.head, &device_list); 4264 device_list_handle = &device_list; 4265 } 4266 4267 /* block all schedulers and reset given job's ring */ 4268 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4269 if (!amdgpu_device_lock_adev(tmp_adev, !hive)) { 4270 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", 4271 job ? job->base.id : -1); 4272 mutex_unlock(&hive->hive_lock); 4273 return 0; 4274 } 4275 4276 /* 4277 * Try to put the audio codec into suspend state 4278 * before gpu reset started. 4279 * 4280 * Due to the power domain of the graphics device 4281 * is shared with AZ power domain. Without this, 4282 * we may change the audio hardware from behind 4283 * the audio driver's back. That will trigger 4284 * some audio codec errors. 4285 */ 4286 if (!amdgpu_device_suspend_display_audio(tmp_adev)) 4287 audio_suspended = true; 4288 4289 amdgpu_ras_set_error_query_ready(tmp_adev, false); 4290 4291 cancel_delayed_work_sync(&tmp_adev->delayed_init_work); 4292 4293 if (!amdgpu_sriov_vf(tmp_adev)) 4294 amdgpu_amdkfd_pre_reset(tmp_adev); 4295 4296 /* 4297 * Mark these ASICs to be reseted as untracked first 4298 * And add them back after reset completed 4299 */ 4300 amdgpu_unregister_gpu_instance(tmp_adev); 4301 4302 amdgpu_fbdev_set_suspend(tmp_adev, 1); 4303 4304 /* disable ras on ALL IPs */ 4305 if (!(in_ras_intr && !use_baco) && 4306 amdgpu_device_ip_need_full_reset(tmp_adev)) 4307 amdgpu_ras_suspend(tmp_adev); 4308 4309 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4310 struct amdgpu_ring *ring = tmp_adev->rings[i]; 4311 4312 if (!ring || !ring->sched.thread) 4313 continue; 4314 4315 drm_sched_stop(&ring->sched, job ? &job->base : NULL); 4316 4317 if (in_ras_intr && !use_baco) 4318 amdgpu_job_stop_all_jobs_on_sched(&ring->sched); 4319 } 4320 } 4321 4322 if (in_ras_intr && !use_baco) 4323 goto skip_sched_resume; 4324 4325 /* 4326 * Must check guilty signal here since after this point all old 4327 * HW fences are force signaled. 4328 * 4329 * job->base holds a reference to parent fence 4330 */ 4331 if (job && job->base.s_fence->parent && 4332 dma_fence_is_signaled(job->base.s_fence->parent)) { 4333 job_signaled = true; 4334 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); 4335 goto skip_hw_reset; 4336 } 4337 4338 retry: /* Rest of adevs pre asic reset from XGMI hive. */ 4339 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4340 r = amdgpu_device_pre_asic_reset(tmp_adev, 4341 NULL, 4342 &need_full_reset); 4343 /*TODO Should we stop ?*/ 4344 if (r) { 4345 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", 4346 r, tmp_adev->ddev->unique); 4347 tmp_adev->asic_reset_res = r; 4348 } 4349 } 4350 4351 /* Actual ASIC resets if needed.*/ 4352 /* TODO Implement XGMI hive reset logic for SRIOV */ 4353 if (amdgpu_sriov_vf(adev)) { 4354 r = amdgpu_device_reset_sriov(adev, job ? false : true); 4355 if (r) 4356 adev->asic_reset_res = r; 4357 } else { 4358 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset); 4359 if (r && r == -EAGAIN) 4360 goto retry; 4361 } 4362 4363 skip_hw_reset: 4364 4365 /* Post ASIC reset for all devs .*/ 4366 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4367 4368 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4369 struct amdgpu_ring *ring = tmp_adev->rings[i]; 4370 4371 if (!ring || !ring->sched.thread) 4372 continue; 4373 4374 /* No point to resubmit jobs if we didn't HW reset*/ 4375 if (!tmp_adev->asic_reset_res && !job_signaled) 4376 drm_sched_resubmit_jobs(&ring->sched); 4377 4378 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); 4379 } 4380 4381 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) { 4382 drm_helper_resume_force_mode(tmp_adev->ddev); 4383 } 4384 4385 tmp_adev->asic_reset_res = 0; 4386 4387 if (r) { 4388 /* bad news, how to tell it to userspace ? */ 4389 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); 4390 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 4391 } else { 4392 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); 4393 } 4394 } 4395 4396 skip_sched_resume: 4397 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4398 /*unlock kfd: SRIOV would do it separately */ 4399 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) 4400 amdgpu_amdkfd_post_reset(tmp_adev); 4401 if (audio_suspended) 4402 amdgpu_device_resume_display_audio(tmp_adev); 4403 amdgpu_device_unlock_adev(tmp_adev); 4404 } 4405 4406 if (hive) { 4407 mutex_unlock(&hive->reset_lock); 4408 mutex_unlock(&hive->hive_lock); 4409 } 4410 4411 if (r) 4412 dev_info(adev->dev, "GPU reset end with ret = %d\n", r); 4413 return r; 4414 } 4415 4416 /** 4417 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 4418 * 4419 * @adev: amdgpu_device pointer 4420 * 4421 * Fetchs and stores in the driver the PCIE capabilities (gen speed 4422 * and lanes) of the slot the device is in. Handles APUs and 4423 * virtualized environments where PCIE config space may not be available. 4424 */ 4425 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 4426 { 4427 struct pci_dev *pdev; 4428 enum pci_bus_speed speed_cap, platform_speed_cap; 4429 enum pcie_link_width platform_link_width; 4430 4431 if (amdgpu_pcie_gen_cap) 4432 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 4433 4434 if (amdgpu_pcie_lane_cap) 4435 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 4436 4437 /* covers APUs as well */ 4438 if (pci_is_root_bus(adev->pdev->bus)) { 4439 if (adev->pm.pcie_gen_mask == 0) 4440 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 4441 if (adev->pm.pcie_mlw_mask == 0) 4442 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 4443 return; 4444 } 4445 4446 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) 4447 return; 4448 4449 pcie_bandwidth_available(adev->pdev, NULL, 4450 &platform_speed_cap, &platform_link_width); 4451 4452 if (adev->pm.pcie_gen_mask == 0) { 4453 /* asic caps */ 4454 pdev = adev->pdev; 4455 speed_cap = pcie_get_speed_cap(pdev); 4456 if (speed_cap == PCI_SPEED_UNKNOWN) { 4457 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4458 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4459 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 4460 } else { 4461 if (speed_cap == PCIE_SPEED_16_0GT) 4462 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4463 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4464 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 4465 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 4466 else if (speed_cap == PCIE_SPEED_8_0GT) 4467 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4468 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4469 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 4470 else if (speed_cap == PCIE_SPEED_5_0GT) 4471 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4472 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 4473 else 4474 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 4475 } 4476 /* platform caps */ 4477 if (platform_speed_cap == PCI_SPEED_UNKNOWN) { 4478 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4479 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 4480 } else { 4481 if (platform_speed_cap == PCIE_SPEED_16_0GT) 4482 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4483 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4484 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 4485 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 4486 else if (platform_speed_cap == PCIE_SPEED_8_0GT) 4487 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4488 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4489 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 4490 else if (platform_speed_cap == PCIE_SPEED_5_0GT) 4491 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4492 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 4493 else 4494 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 4495 4496 } 4497 } 4498 if (adev->pm.pcie_mlw_mask == 0) { 4499 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { 4500 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 4501 } else { 4502 switch (platform_link_width) { 4503 case PCIE_LNK_X32: 4504 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 4505 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 4506 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 4507 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4508 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4509 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4510 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4511 break; 4512 case PCIE_LNK_X16: 4513 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 4514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 4515 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4517 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4518 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4519 break; 4520 case PCIE_LNK_X12: 4521 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 4522 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4524 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4525 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4526 break; 4527 case PCIE_LNK_X8: 4528 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4529 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4530 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4531 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4532 break; 4533 case PCIE_LNK_X4: 4534 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4535 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4536 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4537 break; 4538 case PCIE_LNK_X2: 4539 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4540 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4541 break; 4542 case PCIE_LNK_X1: 4543 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 4544 break; 4545 default: 4546 break; 4547 } 4548 } 4549 } 4550 } 4551 4552 int amdgpu_device_baco_enter(struct drm_device *dev) 4553 { 4554 struct amdgpu_device *adev = dev->dev_private; 4555 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 4556 4557 if (!amdgpu_device_supports_baco(adev->ddev)) 4558 return -ENOTSUPP; 4559 4560 if (ras && ras->supported) 4561 adev->nbio.funcs->enable_doorbell_interrupt(adev, false); 4562 4563 return amdgpu_dpm_baco_enter(adev); 4564 } 4565 4566 int amdgpu_device_baco_exit(struct drm_device *dev) 4567 { 4568 struct amdgpu_device *adev = dev->dev_private; 4569 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 4570 int ret = 0; 4571 4572 if (!amdgpu_device_supports_baco(adev->ddev)) 4573 return -ENOTSUPP; 4574 4575 ret = amdgpu_dpm_baco_exit(adev); 4576 if (ret) 4577 return ret; 4578 4579 if (ras && ras->supported) 4580 adev->nbio.funcs->enable_doorbell_interrupt(adev, true); 4581 4582 return 0; 4583 } 4584