1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUTargetMachine.h" 18 #include "AMDGPUCallLowering.h" 19 #include "AMDGPUInstructionSelector.h" 20 #include "AMDGPULegalizerInfo.h" 21 #include "AMDGPURegisterBankInfo.h" 22 #include "SIMachineFunctionInfo.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/CodeGen/MachineScheduler.h" 25 #include "llvm/IR/MDBuilder.h" 26 #include "llvm/CodeGen/TargetFrameLowering.h" 27 #include <algorithm> 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "amdgpu-subtarget" 32 33 #define GET_SUBTARGETINFO_TARGET_DESC 34 #define GET_SUBTARGETINFO_CTOR 35 #include "AMDGPUGenSubtargetInfo.inc" 36 37 AMDGPUSubtarget::~AMDGPUSubtarget() = default; 38 39 AMDGPUSubtarget & 40 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 41 StringRef GPU, StringRef FS) { 42 // Determine default and user-specified characteristics 43 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 44 // enabled, but some instructions do not respect them and they run at the 45 // double precision rate, so don't enable by default. 46 // 47 // We want to be able to turn these off, but making this a subtarget feature 48 // for SI has the unhelpful behavior that it unsets everything else if you 49 // disable it. 50 51 SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,"); 52 53 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 54 FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,"; 55 56 // FIXME: I don't think think Evergreen has any useful support for 57 // denormals, but should be checked. Should we issue a warning somewhere 58 // if someone tries to enable these? 59 if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 60 FullFS += "+fp64-fp16-denormals,"; 61 } else { 62 FullFS += "-fp32-denormals,"; 63 } 64 65 FullFS += FS; 66 67 ParseSubtargetFeatures(GPU, FullFS); 68 69 // We don't support FP64 for EG/NI atm. 70 assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)); 71 72 // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es 73 // on VI and newer hardware to avoid assertion failures due to missing ADDR64 74 // variants of MUBUF instructions. 75 if (!hasAddr64() && !FS.contains("flat-for-global")) { 76 FlatForGlobal = true; 77 } 78 79 // Set defaults if needed. 80 if (MaxPrivateElementSize == 0) 81 MaxPrivateElementSize = 4; 82 83 if (LDSBankCount == 0) 84 LDSBankCount = 32; 85 86 if (TT.getArch() == Triple::amdgcn) { 87 if (LocalMemorySize == 0) 88 LocalMemorySize = 32768; 89 90 // Do something sensible for unspecified target. 91 if (!HasMovrel && !HasVGPRIndexMode) 92 HasMovrel = true; 93 } 94 95 return *this; 96 } 97 98 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 99 const TargetMachine &TM) 100 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 101 TargetTriple(TT), 102 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), 103 IsaVersion(ISAVersion0_0_0), 104 WavefrontSize(0), 105 LocalMemorySize(0), 106 LDSBankCount(0), 107 MaxPrivateElementSize(0), 108 109 FastFMAF32(false), 110 HalfRate64Ops(false), 111 112 FP32Denormals(false), 113 FP64FP16Denormals(false), 114 FPExceptions(false), 115 DX10Clamp(false), 116 FlatForGlobal(false), 117 AutoWaitcntBeforeBarrier(false), 118 CodeObjectV3(false), 119 UnalignedScratchAccess(false), 120 UnalignedBufferAccess(false), 121 122 HasApertureRegs(false), 123 EnableXNACK(false), 124 TrapHandler(false), 125 DebuggerInsertNops(false), 126 DebuggerReserveRegs(false), 127 DebuggerEmitPrologue(false), 128 129 EnableHugePrivateBuffer(false), 130 EnableVGPRSpilling(false), 131 EnablePromoteAlloca(false), 132 EnableLoadStoreOpt(false), 133 EnableUnsafeDSOffsetFolding(false), 134 EnableSIScheduler(false), 135 DumpCode(false), 136 137 FP64(false), 138 FMA(false), 139 IsGCN(false), 140 GCN3Encoding(false), 141 CIInsts(false), 142 GFX9Insts(false), 143 SGPRInitBug(false), 144 HasSMemRealTime(false), 145 Has16BitInsts(false), 146 HasIntClamp(false), 147 HasVOP3PInsts(false), 148 HasMadMixInsts(false), 149 HasMovrel(false), 150 HasVGPRIndexMode(false), 151 HasScalarStores(false), 152 HasInv2PiInlineImm(false), 153 HasSDWA(false), 154 HasSDWAOmod(false), 155 HasSDWAScalar(false), 156 HasSDWASdst(false), 157 HasSDWAMac(false), 158 HasSDWAOutModsVOPC(false), 159 HasDPP(false), 160 FlatAddressSpace(false), 161 FlatInstOffsets(false), 162 FlatGlobalInsts(false), 163 FlatScratchInsts(false), 164 AddNoCarryInsts(false), 165 166 R600ALUInst(false), 167 CaymanISA(false), 168 CFALUBug(false), 169 HasVertexCache(false), 170 TexVTXClauseSize(0), 171 ScalarizeGlobal(false), 172 173 FeatureDisable(false), 174 InstrItins(getInstrItineraryForCPU(GPU)) { 175 AS = AMDGPU::getAMDGPUAS(TT); 176 initializeSubtargetDependencies(TT, GPU, FS); 177 } 178 179 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, 180 const Function &F) const { 181 if (NWaves == 1) 182 return getLocalMemorySize(); 183 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; 184 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); 185 unsigned MaxWaves = getMaxWavesPerEU(); 186 return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves; 187 } 188 189 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, 190 const Function &F) const { 191 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; 192 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); 193 unsigned MaxWaves = getMaxWavesPerEU(); 194 unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu; 195 unsigned NumWaves = Limit / (Bytes ? Bytes : 1u); 196 NumWaves = std::min(NumWaves, MaxWaves); 197 NumWaves = std::max(NumWaves, 1u); 198 return NumWaves; 199 } 200 201 std::pair<unsigned, unsigned> 202 AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { 203 switch (CC) { 204 case CallingConv::AMDGPU_CS: 205 case CallingConv::AMDGPU_KERNEL: 206 case CallingConv::SPIR_KERNEL: 207 return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4); 208 case CallingConv::AMDGPU_VS: 209 case CallingConv::AMDGPU_LS: 210 case CallingConv::AMDGPU_HS: 211 case CallingConv::AMDGPU_ES: 212 case CallingConv::AMDGPU_GS: 213 case CallingConv::AMDGPU_PS: 214 return std::make_pair(1, getWavefrontSize()); 215 default: 216 return std::make_pair(1, 16 * getWavefrontSize()); 217 } 218 } 219 220 std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes( 221 const Function &F) const { 222 // FIXME: 1024 if function. 223 // Default minimum/maximum flat work group sizes. 224 std::pair<unsigned, unsigned> Default = 225 getDefaultFlatWorkGroupSize(F.getCallingConv()); 226 227 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa 228 // starts using "amdgpu-flat-work-group-size" attribute. 229 Default.second = AMDGPU::getIntegerAttribute( 230 F, "amdgpu-max-work-group-size", Default.second); 231 Default.first = std::min(Default.first, Default.second); 232 233 // Requested minimum/maximum flat work group sizes. 234 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( 235 F, "amdgpu-flat-work-group-size", Default); 236 237 // Make sure requested minimum is less than requested maximum. 238 if (Requested.first > Requested.second) 239 return Default; 240 241 // Make sure requested values do not violate subtarget's specifications. 242 if (Requested.first < getMinFlatWorkGroupSize()) 243 return Default; 244 if (Requested.second > getMaxFlatWorkGroupSize()) 245 return Default; 246 247 return Requested; 248 } 249 250 std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU( 251 const Function &F) const { 252 // Default minimum/maximum number of waves per execution unit. 253 std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU()); 254 255 // Default/requested minimum/maximum flat work group sizes. 256 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F); 257 258 // If minimum/maximum flat work group sizes were explicitly requested using 259 // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum 260 // number of waves per execution unit to values implied by requested 261 // minimum/maximum flat work group sizes. 262 unsigned MinImpliedByFlatWorkGroupSize = 263 getMaxWavesPerEU(FlatWorkGroupSizes.second); 264 bool RequestedFlatWorkGroupSize = false; 265 266 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa 267 // starts using "amdgpu-flat-work-group-size" attribute. 268 if (F.hasFnAttribute("amdgpu-max-work-group-size") || 269 F.hasFnAttribute("amdgpu-flat-work-group-size")) { 270 Default.first = MinImpliedByFlatWorkGroupSize; 271 RequestedFlatWorkGroupSize = true; 272 } 273 274 // Requested minimum/maximum number of waves per execution unit. 275 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( 276 F, "amdgpu-waves-per-eu", Default, true); 277 278 // Make sure requested minimum is less than requested maximum. 279 if (Requested.second && Requested.first > Requested.second) 280 return Default; 281 282 // Make sure requested values do not violate subtarget's specifications. 283 if (Requested.first < getMinWavesPerEU() || 284 Requested.first > getMaxWavesPerEU()) 285 return Default; 286 if (Requested.second > getMaxWavesPerEU()) 287 return Default; 288 289 // Make sure requested values are compatible with values implied by requested 290 // minimum/maximum flat work group sizes. 291 if (RequestedFlatWorkGroupSize && 292 Requested.first < MinImpliedByFlatWorkGroupSize) 293 return Default; 294 295 return Requested; 296 } 297 298 bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { 299 Function *Kernel = I->getParent()->getParent(); 300 unsigned MinSize = 0; 301 unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; 302 bool IdQuery = false; 303 304 // If reqd_work_group_size is present it narrows value down. 305 if (auto *CI = dyn_cast<CallInst>(I)) { 306 const Function *F = CI->getCalledFunction(); 307 if (F) { 308 unsigned Dim = UINT_MAX; 309 switch (F->getIntrinsicID()) { 310 case Intrinsic::amdgcn_workitem_id_x: 311 case Intrinsic::r600_read_tidig_x: 312 IdQuery = true; 313 LLVM_FALLTHROUGH; 314 case Intrinsic::r600_read_local_size_x: 315 Dim = 0; 316 break; 317 case Intrinsic::amdgcn_workitem_id_y: 318 case Intrinsic::r600_read_tidig_y: 319 IdQuery = true; 320 LLVM_FALLTHROUGH; 321 case Intrinsic::r600_read_local_size_y: 322 Dim = 1; 323 break; 324 case Intrinsic::amdgcn_workitem_id_z: 325 case Intrinsic::r600_read_tidig_z: 326 IdQuery = true; 327 LLVM_FALLTHROUGH; 328 case Intrinsic::r600_read_local_size_z: 329 Dim = 2; 330 break; 331 default: 332 break; 333 } 334 if (Dim <= 3) { 335 if (auto Node = Kernel->getMetadata("reqd_work_group_size")) 336 if (Node->getNumOperands() == 3) 337 MinSize = MaxSize = mdconst::extract<ConstantInt>( 338 Node->getOperand(Dim))->getZExtValue(); 339 } 340 } 341 } 342 343 if (!MaxSize) 344 return false; 345 346 // Range metadata is [Lo, Hi). For ID query we need to pass max size 347 // as Hi. For size query we need to pass Hi + 1. 348 if (IdQuery) 349 MinSize = 0; 350 else 351 ++MaxSize; 352 353 MDBuilder MDB(I->getContext()); 354 MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize), 355 APInt(32, MaxSize)); 356 I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); 357 return true; 358 } 359 360 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, 361 const TargetMachine &TM) : 362 AMDGPUSubtarget(TT, GPU, FS, TM), 363 InstrInfo(*this), 364 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 365 TLInfo(TM, *this) {} 366 367 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, 368 const TargetMachine &TM) 369 : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), 370 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 371 TLInfo(TM, *this) { 372 CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); 373 Legalizer.reset(new AMDGPULegalizerInfo()); 374 375 RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); 376 InstSelector.reset(new AMDGPUInstructionSelector( 377 *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()))); 378 } 379 380 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 381 unsigned NumRegionInstrs) const { 382 // Track register pressure so the scheduler can try to decrease 383 // pressure once register usage is above the threshold defined by 384 // SIRegisterInfo::getRegPressureSetLimit() 385 Policy.ShouldTrackPressure = true; 386 387 // Enabling both top down and bottom up scheduling seems to give us less 388 // register spills than just using one of these approaches on its own. 389 Policy.OnlyTopDown = false; 390 Policy.OnlyBottomUp = false; 391 392 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 393 if (!enableSIScheduler()) 394 Policy.ShouldTrackLaneMasks = true; 395 } 396 397 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { 398 return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); 399 } 400 401 unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF, 402 unsigned ExplicitArgBytes) const { 403 unsigned ImplicitBytes = getImplicitArgNumBytes(MF); 404 if (ImplicitBytes == 0) 405 return ExplicitArgBytes; 406 407 unsigned Alignment = getAlignmentForImplicitArgPtr(); 408 return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; 409 } 410 411 unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { 412 if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 413 if (SGPRs <= 80) 414 return 10; 415 if (SGPRs <= 88) 416 return 9; 417 if (SGPRs <= 100) 418 return 8; 419 return 7; 420 } 421 if (SGPRs <= 48) 422 return 10; 423 if (SGPRs <= 56) 424 return 9; 425 if (SGPRs <= 64) 426 return 8; 427 if (SGPRs <= 72) 428 return 7; 429 if (SGPRs <= 80) 430 return 6; 431 return 5; 432 } 433 434 unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { 435 if (VGPRs <= 24) 436 return 10; 437 if (VGPRs <= 28) 438 return 9; 439 if (VGPRs <= 32) 440 return 8; 441 if (VGPRs <= 36) 442 return 7; 443 if (VGPRs <= 40) 444 return 6; 445 if (VGPRs <= 48) 446 return 5; 447 if (VGPRs <= 64) 448 return 4; 449 if (VGPRs <= 84) 450 return 3; 451 if (VGPRs <= 128) 452 return 2; 453 return 1; 454 } 455 456 unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { 457 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 458 if (MFI.hasFlatScratchInit()) { 459 if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 460 return 6; // FLAT_SCRATCH, XNACK, VCC (in that order). 461 if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) 462 return 4; // FLAT_SCRATCH, VCC (in that order). 463 } 464 465 if (isXNACKEnabled()) 466 return 4; // XNACK, VCC (in that order). 467 return 2; // VCC. 468 } 469 470 unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { 471 const Function &F = *MF.getFunction(); 472 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 473 474 // Compute maximum number of SGPRs function can use using default/requested 475 // minimum number of waves per execution unit. 476 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 477 unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false); 478 unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true); 479 480 // Check if maximum number of SGPRs was explicitly requested using 481 // "amdgpu-num-sgpr" attribute. 482 if (F.hasFnAttribute("amdgpu-num-sgpr")) { 483 unsigned Requested = AMDGPU::getIntegerAttribute( 484 F, "amdgpu-num-sgpr", MaxNumSGPRs); 485 486 // Make sure requested value does not violate subtarget's specifications. 487 if (Requested && (Requested <= getReservedNumSGPRs(MF))) 488 Requested = 0; 489 490 // If more SGPRs are required to support the input user/system SGPRs, 491 // increase to accommodate them. 492 // 493 // FIXME: This really ends up using the requested number of SGPRs + number 494 // of reserved special registers in total. Theoretically you could re-use 495 // the last input registers for these special registers, but this would 496 // require a lot of complexity to deal with the weird aliasing. 497 unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs(); 498 if (Requested && Requested < InputNumSGPRs) 499 Requested = InputNumSGPRs; 500 501 // Make sure requested value is compatible with values implied by 502 // default/requested minimum/maximum number of waves per execution unit. 503 if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false)) 504 Requested = 0; 505 if (WavesPerEU.second && 506 Requested && Requested < getMinNumSGPRs(WavesPerEU.second)) 507 Requested = 0; 508 509 if (Requested) 510 MaxNumSGPRs = Requested; 511 } 512 513 if (hasSGPRInitBug()) 514 MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 515 516 return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF), 517 MaxAddressableNumSGPRs); 518 } 519 520 unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { 521 const Function &F = *MF.getFunction(); 522 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 523 524 // Compute maximum number of VGPRs function can use using default/requested 525 // minimum number of waves per execution unit. 526 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 527 unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first); 528 529 // Check if maximum number of VGPRs was explicitly requested using 530 // "amdgpu-num-vgpr" attribute. 531 if (F.hasFnAttribute("amdgpu-num-vgpr")) { 532 unsigned Requested = AMDGPU::getIntegerAttribute( 533 F, "amdgpu-num-vgpr", MaxNumVGPRs); 534 535 // Make sure requested value does not violate subtarget's specifications. 536 if (Requested && Requested <= getReservedNumVGPRs(MF)) 537 Requested = 0; 538 539 // Make sure requested value is compatible with values implied by 540 // default/requested minimum/maximum number of waves per execution unit. 541 if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first)) 542 Requested = 0; 543 if (WavesPerEU.second && 544 Requested && Requested < getMinNumVGPRs(WavesPerEU.second)) 545 Requested = 0; 546 547 if (Requested) 548 MaxNumVGPRs = Requested; 549 } 550 551 return MaxNumVGPRs - getReservedNumVGPRs(MF); 552 } 553 554 namespace { 555 struct MemOpClusterMutation : ScheduleDAGMutation { 556 const SIInstrInfo *TII; 557 558 MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {} 559 560 void apply(ScheduleDAGInstrs *DAGInstrs) override { 561 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); 562 563 SUnit *SUa = nullptr; 564 // Search for two consequent memory operations and link them 565 // to prevent scheduler from moving them apart. 566 // In DAG pre-process SUnits are in the original order of 567 // the instructions before scheduling. 568 for (SUnit &SU : DAG->SUnits) { 569 MachineInstr &MI2 = *SU.getInstr(); 570 if (!MI2.mayLoad() && !MI2.mayStore()) { 571 SUa = nullptr; 572 continue; 573 } 574 if (!SUa) { 575 SUa = &SU; 576 continue; 577 } 578 579 MachineInstr &MI1 = *SUa->getInstr(); 580 if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) || 581 (TII->isFLAT(MI1) && TII->isFLAT(MI2)) || 582 (TII->isSMRD(MI1) && TII->isSMRD(MI2)) || 583 (TII->isDS(MI1) && TII->isDS(MI2))) { 584 SU.addPredBarrier(SUa); 585 586 for (const SDep &SI : SU.Preds) { 587 if (SI.getSUnit() != SUa) 588 SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial)); 589 } 590 591 if (&SU != &DAG->ExitSU) { 592 for (const SDep &SI : SUa->Succs) { 593 if (SI.getSUnit() != &SU) 594 SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial)); 595 } 596 } 597 } 598 599 SUa = &SU; 600 } 601 } 602 }; 603 } // namespace 604 605 void SISubtarget::getPostRAMutations( 606 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { 607 Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo)); 608 } 609