1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUTargetMachine.h" 18 #include "AMDGPUCallLowering.h" 19 #include "AMDGPUInstructionSelector.h" 20 #include "AMDGPULegalizerInfo.h" 21 #include "AMDGPURegisterBankInfo.h" 22 #include "SIMachineFunctionInfo.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/CodeGen/MachineScheduler.h" 25 #include "llvm/IR/MDBuilder.h" 26 #include "llvm/CodeGen/TargetFrameLowering.h" 27 #include <algorithm> 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "amdgpu-subtarget" 32 33 #define GET_SUBTARGETINFO_TARGET_DESC 34 #define GET_SUBTARGETINFO_CTOR 35 #include "AMDGPUGenSubtargetInfo.inc" 36 37 AMDGPUSubtarget::~AMDGPUSubtarget() = default; 38 39 AMDGPUSubtarget & 40 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 41 StringRef GPU, StringRef FS) { 42 // Determine default and user-specified characteristics 43 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 44 // enabled, but some instructions do not respect them and they run at the 45 // double precision rate, so don't enable by default. 46 // 47 // We want to be able to turn these off, but making this a subtarget feature 48 // for SI has the unhelpful behavior that it unsets everything else if you 49 // disable it. 50 51 SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+dx10-clamp,+load-store-opt,"); 52 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 53 FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,"; 54 55 FullFS += FS; 56 57 ParseSubtargetFeatures(GPU, FullFS); 58 59 // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es 60 // on VI and newer hardware to avoid assertion failures due to missing ADDR64 61 // variants of MUBUF instructions. 62 if (!hasAddr64() && !FS.contains("flat-for-global")) { 63 FlatForGlobal = true; 64 } 65 66 // FIXME: I don't think think Evergreen has any useful support for 67 // denormals, but should be checked. Should we issue a warning somewhere 68 // if someone tries to enable these? 69 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 70 FP64FP16Denormals = false; 71 FP32Denormals = false; 72 } 73 74 // Set defaults if needed. 75 if (MaxPrivateElementSize == 0) 76 MaxPrivateElementSize = 4; 77 78 if (LDSBankCount == 0) 79 LDSBankCount = 32; 80 81 if (TT.getArch() == Triple::amdgcn) { 82 if (LocalMemorySize == 0) 83 LocalMemorySize = 32768; 84 85 // Do something sensible for unspecified target. 86 if (!HasMovrel && !HasVGPRIndexMode) 87 HasMovrel = true; 88 } 89 90 return *this; 91 } 92 93 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 94 const TargetMachine &TM) 95 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 96 TargetTriple(TT), 97 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), 98 IsaVersion(ISAVersion0_0_0), 99 WavefrontSize(0), 100 LocalMemorySize(0), 101 LDSBankCount(0), 102 MaxPrivateElementSize(0), 103 104 FastFMAF32(false), 105 HalfRate64Ops(false), 106 107 FP32Denormals(false), 108 FP64FP16Denormals(false), 109 FPExceptions(false), 110 DX10Clamp(false), 111 FlatForGlobal(false), 112 AutoWaitcntBeforeBarrier(false), 113 CodeObjectV3(false), 114 UnalignedScratchAccess(false), 115 UnalignedBufferAccess(false), 116 117 HasApertureRegs(false), 118 EnableXNACK(false), 119 TrapHandler(false), 120 DebuggerInsertNops(false), 121 DebuggerReserveRegs(false), 122 DebuggerEmitPrologue(false), 123 124 EnableHugePrivateBuffer(false), 125 EnableVGPRSpilling(false), 126 EnablePromoteAlloca(false), 127 EnableLoadStoreOpt(false), 128 EnableUnsafeDSOffsetFolding(false), 129 EnableSIScheduler(false), 130 DumpCode(false), 131 132 FP64(false), 133 IsGCN(false), 134 GCN3Encoding(false), 135 CIInsts(false), 136 GFX9Insts(false), 137 SGPRInitBug(false), 138 HasSMemRealTime(false), 139 Has16BitInsts(false), 140 HasIntClamp(false), 141 HasVOP3PInsts(false), 142 HasMadMixInsts(false), 143 HasMovrel(false), 144 HasVGPRIndexMode(false), 145 HasScalarStores(false), 146 HasInv2PiInlineImm(false), 147 HasSDWA(false), 148 HasSDWAOmod(false), 149 HasSDWAScalar(false), 150 HasSDWASdst(false), 151 HasSDWAMac(false), 152 HasSDWAOutModsVOPC(false), 153 HasDPP(false), 154 FlatAddressSpace(false), 155 FlatInstOffsets(false), 156 FlatGlobalInsts(false), 157 FlatScratchInsts(false), 158 AddNoCarryInsts(false), 159 160 R600ALUInst(false), 161 CaymanISA(false), 162 CFALUBug(false), 163 HasVertexCache(false), 164 TexVTXClauseSize(0), 165 ScalarizeGlobal(false), 166 167 FeatureDisable(false), 168 InstrItins(getInstrItineraryForCPU(GPU)) { 169 AS = AMDGPU::getAMDGPUAS(TT); 170 initializeSubtargetDependencies(TT, GPU, FS); 171 } 172 173 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, 174 const Function &F) const { 175 if (NWaves == 1) 176 return getLocalMemorySize(); 177 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; 178 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); 179 unsigned MaxWaves = getMaxWavesPerEU(); 180 return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves; 181 } 182 183 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, 184 const Function &F) const { 185 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; 186 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); 187 unsigned MaxWaves = getMaxWavesPerEU(); 188 unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu; 189 unsigned NumWaves = Limit / (Bytes ? Bytes : 1u); 190 NumWaves = std::min(NumWaves, MaxWaves); 191 NumWaves = std::max(NumWaves, 1u); 192 return NumWaves; 193 } 194 195 std::pair<unsigned, unsigned> 196 AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { 197 switch (CC) { 198 case CallingConv::AMDGPU_CS: 199 case CallingConv::AMDGPU_KERNEL: 200 case CallingConv::SPIR_KERNEL: 201 return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4); 202 case CallingConv::AMDGPU_VS: 203 case CallingConv::AMDGPU_LS: 204 case CallingConv::AMDGPU_HS: 205 case CallingConv::AMDGPU_ES: 206 case CallingConv::AMDGPU_GS: 207 case CallingConv::AMDGPU_PS: 208 return std::make_pair(1, getWavefrontSize()); 209 default: 210 return std::make_pair(1, 16 * getWavefrontSize()); 211 } 212 } 213 214 std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes( 215 const Function &F) const { 216 // FIXME: 1024 if function. 217 // Default minimum/maximum flat work group sizes. 218 std::pair<unsigned, unsigned> Default = 219 getDefaultFlatWorkGroupSize(F.getCallingConv()); 220 221 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa 222 // starts using "amdgpu-flat-work-group-size" attribute. 223 Default.second = AMDGPU::getIntegerAttribute( 224 F, "amdgpu-max-work-group-size", Default.second); 225 Default.first = std::min(Default.first, Default.second); 226 227 // Requested minimum/maximum flat work group sizes. 228 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( 229 F, "amdgpu-flat-work-group-size", Default); 230 231 // Make sure requested minimum is less than requested maximum. 232 if (Requested.first > Requested.second) 233 return Default; 234 235 // Make sure requested values do not violate subtarget's specifications. 236 if (Requested.first < getMinFlatWorkGroupSize()) 237 return Default; 238 if (Requested.second > getMaxFlatWorkGroupSize()) 239 return Default; 240 241 return Requested; 242 } 243 244 std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU( 245 const Function &F) const { 246 // Default minimum/maximum number of waves per execution unit. 247 std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU()); 248 249 // Default/requested minimum/maximum flat work group sizes. 250 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F); 251 252 // If minimum/maximum flat work group sizes were explicitly requested using 253 // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum 254 // number of waves per execution unit to values implied by requested 255 // minimum/maximum flat work group sizes. 256 unsigned MinImpliedByFlatWorkGroupSize = 257 getMaxWavesPerEU(FlatWorkGroupSizes.second); 258 bool RequestedFlatWorkGroupSize = false; 259 260 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa 261 // starts using "amdgpu-flat-work-group-size" attribute. 262 if (F.hasFnAttribute("amdgpu-max-work-group-size") || 263 F.hasFnAttribute("amdgpu-flat-work-group-size")) { 264 Default.first = MinImpliedByFlatWorkGroupSize; 265 RequestedFlatWorkGroupSize = true; 266 } 267 268 // Requested minimum/maximum number of waves per execution unit. 269 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( 270 F, "amdgpu-waves-per-eu", Default, true); 271 272 // Make sure requested minimum is less than requested maximum. 273 if (Requested.second && Requested.first > Requested.second) 274 return Default; 275 276 // Make sure requested values do not violate subtarget's specifications. 277 if (Requested.first < getMinWavesPerEU() || 278 Requested.first > getMaxWavesPerEU()) 279 return Default; 280 if (Requested.second > getMaxWavesPerEU()) 281 return Default; 282 283 // Make sure requested values are compatible with values implied by requested 284 // minimum/maximum flat work group sizes. 285 if (RequestedFlatWorkGroupSize && 286 Requested.first < MinImpliedByFlatWorkGroupSize) 287 return Default; 288 289 return Requested; 290 } 291 292 bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { 293 Function *Kernel = I->getParent()->getParent(); 294 unsigned MinSize = 0; 295 unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; 296 bool IdQuery = false; 297 298 // If reqd_work_group_size is present it narrows value down. 299 if (auto *CI = dyn_cast<CallInst>(I)) { 300 const Function *F = CI->getCalledFunction(); 301 if (F) { 302 unsigned Dim = UINT_MAX; 303 switch (F->getIntrinsicID()) { 304 case Intrinsic::amdgcn_workitem_id_x: 305 case Intrinsic::r600_read_tidig_x: 306 IdQuery = true; 307 LLVM_FALLTHROUGH; 308 case Intrinsic::r600_read_local_size_x: 309 Dim = 0; 310 break; 311 case Intrinsic::amdgcn_workitem_id_y: 312 case Intrinsic::r600_read_tidig_y: 313 IdQuery = true; 314 LLVM_FALLTHROUGH; 315 case Intrinsic::r600_read_local_size_y: 316 Dim = 1; 317 break; 318 case Intrinsic::amdgcn_workitem_id_z: 319 case Intrinsic::r600_read_tidig_z: 320 IdQuery = true; 321 LLVM_FALLTHROUGH; 322 case Intrinsic::r600_read_local_size_z: 323 Dim = 2; 324 break; 325 default: 326 break; 327 } 328 if (Dim <= 3) { 329 if (auto Node = Kernel->getMetadata("reqd_work_group_size")) 330 if (Node->getNumOperands() == 3) 331 MinSize = MaxSize = mdconst::extract<ConstantInt>( 332 Node->getOperand(Dim))->getZExtValue(); 333 } 334 } 335 } 336 337 if (!MaxSize) 338 return false; 339 340 // Range metadata is [Lo, Hi). For ID query we need to pass max size 341 // as Hi. For size query we need to pass Hi + 1. 342 if (IdQuery) 343 MinSize = 0; 344 else 345 ++MaxSize; 346 347 MDBuilder MDB(I->getContext()); 348 MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize), 349 APInt(32, MaxSize)); 350 I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); 351 return true; 352 } 353 354 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, 355 const TargetMachine &TM) : 356 AMDGPUSubtarget(TT, GPU, FS, TM), 357 InstrInfo(*this), 358 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 359 TLInfo(TM, *this) {} 360 361 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, 362 const TargetMachine &TM) 363 : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), 364 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 365 TLInfo(TM, *this) { 366 CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); 367 Legalizer.reset(new AMDGPULegalizerInfo()); 368 369 RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); 370 InstSelector.reset(new AMDGPUInstructionSelector( 371 *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()))); 372 } 373 374 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 375 unsigned NumRegionInstrs) const { 376 // Track register pressure so the scheduler can try to decrease 377 // pressure once register usage is above the threshold defined by 378 // SIRegisterInfo::getRegPressureSetLimit() 379 Policy.ShouldTrackPressure = true; 380 381 // Enabling both top down and bottom up scheduling seems to give us less 382 // register spills than just using one of these approaches on its own. 383 Policy.OnlyTopDown = false; 384 Policy.OnlyBottomUp = false; 385 386 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 387 if (!enableSIScheduler()) 388 Policy.ShouldTrackLaneMasks = true; 389 } 390 391 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { 392 return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); 393 } 394 395 unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF, 396 unsigned ExplicitArgBytes) const { 397 unsigned ImplicitBytes = getImplicitArgNumBytes(MF); 398 if (ImplicitBytes == 0) 399 return ExplicitArgBytes; 400 401 unsigned Alignment = getAlignmentForImplicitArgPtr(); 402 return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; 403 } 404 405 unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { 406 if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 407 if (SGPRs <= 80) 408 return 10; 409 if (SGPRs <= 88) 410 return 9; 411 if (SGPRs <= 100) 412 return 8; 413 return 7; 414 } 415 if (SGPRs <= 48) 416 return 10; 417 if (SGPRs <= 56) 418 return 9; 419 if (SGPRs <= 64) 420 return 8; 421 if (SGPRs <= 72) 422 return 7; 423 if (SGPRs <= 80) 424 return 6; 425 return 5; 426 } 427 428 unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { 429 if (VGPRs <= 24) 430 return 10; 431 if (VGPRs <= 28) 432 return 9; 433 if (VGPRs <= 32) 434 return 8; 435 if (VGPRs <= 36) 436 return 7; 437 if (VGPRs <= 40) 438 return 6; 439 if (VGPRs <= 48) 440 return 5; 441 if (VGPRs <= 64) 442 return 4; 443 if (VGPRs <= 84) 444 return 3; 445 if (VGPRs <= 128) 446 return 2; 447 return 1; 448 } 449 450 unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { 451 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 452 if (MFI.hasFlatScratchInit()) { 453 if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 454 return 6; // FLAT_SCRATCH, XNACK, VCC (in that order). 455 if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) 456 return 4; // FLAT_SCRATCH, VCC (in that order). 457 } 458 459 if (isXNACKEnabled()) 460 return 4; // XNACK, VCC (in that order). 461 return 2; // VCC. 462 } 463 464 unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { 465 const Function &F = *MF.getFunction(); 466 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 467 468 // Compute maximum number of SGPRs function can use using default/requested 469 // minimum number of waves per execution unit. 470 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 471 unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false); 472 unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true); 473 474 // Check if maximum number of SGPRs was explicitly requested using 475 // "amdgpu-num-sgpr" attribute. 476 if (F.hasFnAttribute("amdgpu-num-sgpr")) { 477 unsigned Requested = AMDGPU::getIntegerAttribute( 478 F, "amdgpu-num-sgpr", MaxNumSGPRs); 479 480 // Make sure requested value does not violate subtarget's specifications. 481 if (Requested && (Requested <= getReservedNumSGPRs(MF))) 482 Requested = 0; 483 484 // If more SGPRs are required to support the input user/system SGPRs, 485 // increase to accommodate them. 486 // 487 // FIXME: This really ends up using the requested number of SGPRs + number 488 // of reserved special registers in total. Theoretically you could re-use 489 // the last input registers for these special registers, but this would 490 // require a lot of complexity to deal with the weird aliasing. 491 unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs(); 492 if (Requested && Requested < InputNumSGPRs) 493 Requested = InputNumSGPRs; 494 495 // Make sure requested value is compatible with values implied by 496 // default/requested minimum/maximum number of waves per execution unit. 497 if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false)) 498 Requested = 0; 499 if (WavesPerEU.second && 500 Requested && Requested < getMinNumSGPRs(WavesPerEU.second)) 501 Requested = 0; 502 503 if (Requested) 504 MaxNumSGPRs = Requested; 505 } 506 507 if (hasSGPRInitBug()) 508 MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 509 510 return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF), 511 MaxAddressableNumSGPRs); 512 } 513 514 unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { 515 const Function &F = *MF.getFunction(); 516 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 517 518 // Compute maximum number of VGPRs function can use using default/requested 519 // minimum number of waves per execution unit. 520 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 521 unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first); 522 523 // Check if maximum number of VGPRs was explicitly requested using 524 // "amdgpu-num-vgpr" attribute. 525 if (F.hasFnAttribute("amdgpu-num-vgpr")) { 526 unsigned Requested = AMDGPU::getIntegerAttribute( 527 F, "amdgpu-num-vgpr", MaxNumVGPRs); 528 529 // Make sure requested value does not violate subtarget's specifications. 530 if (Requested && Requested <= getReservedNumVGPRs(MF)) 531 Requested = 0; 532 533 // Make sure requested value is compatible with values implied by 534 // default/requested minimum/maximum number of waves per execution unit. 535 if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first)) 536 Requested = 0; 537 if (WavesPerEU.second && 538 Requested && Requested < getMinNumVGPRs(WavesPerEU.second)) 539 Requested = 0; 540 541 if (Requested) 542 MaxNumVGPRs = Requested; 543 } 544 545 return MaxNumVGPRs - getReservedNumVGPRs(MF); 546 } 547 548 namespace { 549 struct MemOpClusterMutation : ScheduleDAGMutation { 550 const SIInstrInfo *TII; 551 552 MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {} 553 554 void apply(ScheduleDAGInstrs *DAGInstrs) override { 555 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); 556 557 SUnit *SUa = nullptr; 558 // Search for two consequent memory operations and link them 559 // to prevent scheduler from moving them apart. 560 // In DAG pre-process SUnits are in the original order of 561 // the instructions before scheduling. 562 for (SUnit &SU : DAG->SUnits) { 563 MachineInstr &MI2 = *SU.getInstr(); 564 if (!MI2.mayLoad() && !MI2.mayStore()) { 565 SUa = nullptr; 566 continue; 567 } 568 if (!SUa) { 569 SUa = &SU; 570 continue; 571 } 572 573 MachineInstr &MI1 = *SUa->getInstr(); 574 if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) || 575 (TII->isFLAT(MI1) && TII->isFLAT(MI2)) || 576 (TII->isSMRD(MI1) && TII->isSMRD(MI2)) || 577 (TII->isDS(MI1) && TII->isDS(MI2))) { 578 SU.addPredBarrier(SUa); 579 580 for (const SDep &SI : SU.Preds) { 581 if (SI.getSUnit() != SUa) 582 SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial)); 583 } 584 585 if (&SU != &DAG->ExitSU) { 586 for (const SDep &SI : SUa->Succs) { 587 if (SI.getSUnit() != &SU) 588 SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial)); 589 } 590 } 591 } 592 593 SUa = &SU; 594 } 595 } 596 }; 597 } // namespace 598 599 void SISubtarget::getPostRAMutations( 600 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { 601 Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo)); 602 } 603