1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUTargetMachine.h" 18 #include "AMDGPUCallLowering.h" 19 #include "AMDGPUInstructionSelector.h" 20 #include "AMDGPULegalizerInfo.h" 21 #include "AMDGPURegisterBankInfo.h" 22 #include "SIMachineFunctionInfo.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/CodeGen/MachineScheduler.h" 25 #include "llvm/IR/MDBuilder.h" 26 #include "llvm/Target/TargetFrameLowering.h" 27 #include <algorithm> 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "amdgpu-subtarget" 32 33 #define GET_SUBTARGETINFO_TARGET_DESC 34 #define GET_SUBTARGETINFO_CTOR 35 #include "AMDGPUGenSubtargetInfo.inc" 36 37 AMDGPUSubtarget::~AMDGPUSubtarget() = default; 38 39 AMDGPUSubtarget & 40 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 41 StringRef GPU, StringRef FS) { 42 // Determine default and user-specified characteristics 43 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 44 // enabled, but some instructions do not respect them and they run at the 45 // double precision rate, so don't enable by default. 46 // 47 // We want to be able to turn these off, but making this a subtarget feature 48 // for SI has the unhelpful behavior that it unsets everything else if you 49 // disable it. 50 51 SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+dx10-clamp,+load-store-opt,"); 52 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 53 FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,"; 54 55 FullFS += FS; 56 57 ParseSubtargetFeatures(GPU, FullFS); 58 59 // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es 60 // on VI and newer hardware to avoid assertion failures due to missing ADDR64 61 // variants of MUBUF instructions. 62 if (!hasAddr64() && !FS.contains("flat-for-global")) { 63 FlatForGlobal = true; 64 } 65 66 // FIXME: I don't think think Evergreen has any useful support for 67 // denormals, but should be checked. Should we issue a warning somewhere 68 // if someone tries to enable these? 69 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 70 FP64FP16Denormals = false; 71 FP32Denormals = false; 72 } 73 74 // Set defaults if needed. 75 if (MaxPrivateElementSize == 0) 76 MaxPrivateElementSize = 4; 77 78 if (LDSBankCount == 0) 79 LDSBankCount = 32; 80 81 if (TT.getArch() == Triple::amdgcn) { 82 if (LocalMemorySize == 0) 83 LocalMemorySize = 32768; 84 85 // Do something sensible for unspecified target. 86 if (!HasMovrel && !HasVGPRIndexMode) 87 HasMovrel = true; 88 } 89 90 return *this; 91 } 92 93 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 94 const TargetMachine &TM) 95 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 96 TargetTriple(TT), 97 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), 98 IsaVersion(ISAVersion0_0_0), 99 WavefrontSize(0), 100 LocalMemorySize(0), 101 LDSBankCount(0), 102 MaxPrivateElementSize(0), 103 104 FastFMAF32(false), 105 HalfRate64Ops(false), 106 107 FP32Denormals(false), 108 FP64FP16Denormals(false), 109 FPExceptions(false), 110 DX10Clamp(false), 111 FlatForGlobal(false), 112 AutoWaitcntBeforeBarrier(false), 113 CodeObjectV3(false), 114 UnalignedScratchAccess(false), 115 UnalignedBufferAccess(false), 116 117 HasApertureRegs(false), 118 EnableXNACK(false), 119 TrapHandler(false), 120 DebuggerInsertNops(false), 121 DebuggerReserveRegs(false), 122 DebuggerEmitPrologue(false), 123 124 EnableVGPRSpilling(false), 125 EnablePromoteAlloca(false), 126 EnableLoadStoreOpt(false), 127 EnableUnsafeDSOffsetFolding(false), 128 EnableSIScheduler(false), 129 DumpCode(false), 130 131 FP64(false), 132 IsGCN(false), 133 GCN3Encoding(false), 134 CIInsts(false), 135 GFX9Insts(false), 136 SGPRInitBug(false), 137 HasSMemRealTime(false), 138 Has16BitInsts(false), 139 HasIntClamp(false), 140 HasVOP3PInsts(false), 141 HasMadMixInsts(false), 142 HasMovrel(false), 143 HasVGPRIndexMode(false), 144 HasScalarStores(false), 145 HasInv2PiInlineImm(false), 146 HasSDWA(false), 147 HasSDWAOmod(false), 148 HasSDWAScalar(false), 149 HasSDWASdst(false), 150 HasSDWAMac(false), 151 HasSDWAOutModsVOPC(false), 152 HasDPP(false), 153 FlatAddressSpace(false), 154 FlatInstOffsets(false), 155 FlatGlobalInsts(false), 156 FlatScratchInsts(false), 157 AddNoCarryInsts(false), 158 159 R600ALUInst(false), 160 CaymanISA(false), 161 CFALUBug(false), 162 HasVertexCache(false), 163 TexVTXClauseSize(0), 164 ScalarizeGlobal(false), 165 166 FeatureDisable(false), 167 InstrItins(getInstrItineraryForCPU(GPU)) { 168 AS = AMDGPU::getAMDGPUAS(TT); 169 initializeSubtargetDependencies(TT, GPU, FS); 170 } 171 172 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, 173 const Function &F) const { 174 if (NWaves == 1) 175 return getLocalMemorySize(); 176 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; 177 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); 178 unsigned MaxWaves = getMaxWavesPerEU(); 179 return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves; 180 } 181 182 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, 183 const Function &F) const { 184 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; 185 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); 186 unsigned MaxWaves = getMaxWavesPerEU(); 187 unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu; 188 unsigned NumWaves = Limit / (Bytes ? Bytes : 1u); 189 NumWaves = std::min(NumWaves, MaxWaves); 190 NumWaves = std::max(NumWaves, 1u); 191 return NumWaves; 192 } 193 194 std::pair<unsigned, unsigned> 195 AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { 196 switch (CC) { 197 case CallingConv::AMDGPU_CS: 198 case CallingConv::AMDGPU_KERNEL: 199 case CallingConv::SPIR_KERNEL: 200 return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4); 201 case CallingConv::AMDGPU_VS: 202 case CallingConv::AMDGPU_LS: 203 case CallingConv::AMDGPU_HS: 204 case CallingConv::AMDGPU_ES: 205 case CallingConv::AMDGPU_GS: 206 case CallingConv::AMDGPU_PS: 207 return std::make_pair(1, getWavefrontSize()); 208 default: 209 return std::make_pair(1, 16 * getWavefrontSize()); 210 } 211 } 212 213 std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes( 214 const Function &F) const { 215 // FIXME: 1024 if function. 216 // Default minimum/maximum flat work group sizes. 217 std::pair<unsigned, unsigned> Default = 218 getDefaultFlatWorkGroupSize(F.getCallingConv()); 219 220 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa 221 // starts using "amdgpu-flat-work-group-size" attribute. 222 Default.second = AMDGPU::getIntegerAttribute( 223 F, "amdgpu-max-work-group-size", Default.second); 224 Default.first = std::min(Default.first, Default.second); 225 226 // Requested minimum/maximum flat work group sizes. 227 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( 228 F, "amdgpu-flat-work-group-size", Default); 229 230 // Make sure requested minimum is less than requested maximum. 231 if (Requested.first > Requested.second) 232 return Default; 233 234 // Make sure requested values do not violate subtarget's specifications. 235 if (Requested.first < getMinFlatWorkGroupSize()) 236 return Default; 237 if (Requested.second > getMaxFlatWorkGroupSize()) 238 return Default; 239 240 return Requested; 241 } 242 243 std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU( 244 const Function &F) const { 245 // Default minimum/maximum number of waves per execution unit. 246 std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU()); 247 248 // Default/requested minimum/maximum flat work group sizes. 249 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F); 250 251 // If minimum/maximum flat work group sizes were explicitly requested using 252 // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum 253 // number of waves per execution unit to values implied by requested 254 // minimum/maximum flat work group sizes. 255 unsigned MinImpliedByFlatWorkGroupSize = 256 getMaxWavesPerEU(FlatWorkGroupSizes.second); 257 bool RequestedFlatWorkGroupSize = false; 258 259 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa 260 // starts using "amdgpu-flat-work-group-size" attribute. 261 if (F.hasFnAttribute("amdgpu-max-work-group-size") || 262 F.hasFnAttribute("amdgpu-flat-work-group-size")) { 263 Default.first = MinImpliedByFlatWorkGroupSize; 264 RequestedFlatWorkGroupSize = true; 265 } 266 267 // Requested minimum/maximum number of waves per execution unit. 268 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( 269 F, "amdgpu-waves-per-eu", Default, true); 270 271 // Make sure requested minimum is less than requested maximum. 272 if (Requested.second && Requested.first > Requested.second) 273 return Default; 274 275 // Make sure requested values do not violate subtarget's specifications. 276 if (Requested.first < getMinWavesPerEU() || 277 Requested.first > getMaxWavesPerEU()) 278 return Default; 279 if (Requested.second > getMaxWavesPerEU()) 280 return Default; 281 282 // Make sure requested values are compatible with values implied by requested 283 // minimum/maximum flat work group sizes. 284 if (RequestedFlatWorkGroupSize && 285 Requested.first < MinImpliedByFlatWorkGroupSize) 286 return Default; 287 288 return Requested; 289 } 290 291 bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { 292 Function *Kernel = I->getParent()->getParent(); 293 unsigned MinSize = 0; 294 unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; 295 bool IdQuery = false; 296 297 // If reqd_work_group_size is present it narrows value down. 298 if (auto *CI = dyn_cast<CallInst>(I)) { 299 const Function *F = CI->getCalledFunction(); 300 if (F) { 301 unsigned Dim = UINT_MAX; 302 switch (F->getIntrinsicID()) { 303 case Intrinsic::amdgcn_workitem_id_x: 304 case Intrinsic::r600_read_tidig_x: 305 IdQuery = true; 306 LLVM_FALLTHROUGH; 307 case Intrinsic::r600_read_local_size_x: 308 Dim = 0; 309 break; 310 case Intrinsic::amdgcn_workitem_id_y: 311 case Intrinsic::r600_read_tidig_y: 312 IdQuery = true; 313 LLVM_FALLTHROUGH; 314 case Intrinsic::r600_read_local_size_y: 315 Dim = 1; 316 break; 317 case Intrinsic::amdgcn_workitem_id_z: 318 case Intrinsic::r600_read_tidig_z: 319 IdQuery = true; 320 LLVM_FALLTHROUGH; 321 case Intrinsic::r600_read_local_size_z: 322 Dim = 2; 323 break; 324 default: 325 break; 326 } 327 if (Dim <= 3) { 328 if (auto Node = Kernel->getMetadata("reqd_work_group_size")) 329 if (Node->getNumOperands() == 3) 330 MinSize = MaxSize = mdconst::extract<ConstantInt>( 331 Node->getOperand(Dim))->getZExtValue(); 332 } 333 } 334 } 335 336 if (!MaxSize) 337 return false; 338 339 // Range metadata is [Lo, Hi). For ID query we need to pass max size 340 // as Hi. For size query we need to pass Hi + 1. 341 if (IdQuery) 342 MinSize = 0; 343 else 344 ++MaxSize; 345 346 MDBuilder MDB(I->getContext()); 347 MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize), 348 APInt(32, MaxSize)); 349 I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); 350 return true; 351 } 352 353 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, 354 const TargetMachine &TM) : 355 AMDGPUSubtarget(TT, GPU, FS, TM), 356 InstrInfo(*this), 357 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 358 TLInfo(TM, *this) {} 359 360 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, 361 const TargetMachine &TM) 362 : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), 363 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 364 TLInfo(TM, *this) { 365 CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); 366 Legalizer.reset(new AMDGPULegalizerInfo()); 367 368 RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); 369 InstSelector.reset(new AMDGPUInstructionSelector( 370 *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()))); 371 } 372 373 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 374 unsigned NumRegionInstrs) const { 375 // Track register pressure so the scheduler can try to decrease 376 // pressure once register usage is above the threshold defined by 377 // SIRegisterInfo::getRegPressureSetLimit() 378 Policy.ShouldTrackPressure = true; 379 380 // Enabling both top down and bottom up scheduling seems to give us less 381 // register spills than just using one of these approaches on its own. 382 Policy.OnlyTopDown = false; 383 Policy.OnlyBottomUp = false; 384 385 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 386 if (!enableSIScheduler()) 387 Policy.ShouldTrackLaneMasks = true; 388 } 389 390 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { 391 return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); 392 } 393 394 unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF, 395 unsigned ExplicitArgBytes) const { 396 unsigned ImplicitBytes = getImplicitArgNumBytes(MF); 397 if (ImplicitBytes == 0) 398 return ExplicitArgBytes; 399 400 unsigned Alignment = getAlignmentForImplicitArgPtr(); 401 return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; 402 } 403 404 unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { 405 if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 406 if (SGPRs <= 80) 407 return 10; 408 if (SGPRs <= 88) 409 return 9; 410 if (SGPRs <= 100) 411 return 8; 412 return 7; 413 } 414 if (SGPRs <= 48) 415 return 10; 416 if (SGPRs <= 56) 417 return 9; 418 if (SGPRs <= 64) 419 return 8; 420 if (SGPRs <= 72) 421 return 7; 422 if (SGPRs <= 80) 423 return 6; 424 return 5; 425 } 426 427 unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { 428 if (VGPRs <= 24) 429 return 10; 430 if (VGPRs <= 28) 431 return 9; 432 if (VGPRs <= 32) 433 return 8; 434 if (VGPRs <= 36) 435 return 7; 436 if (VGPRs <= 40) 437 return 6; 438 if (VGPRs <= 48) 439 return 5; 440 if (VGPRs <= 64) 441 return 4; 442 if (VGPRs <= 84) 443 return 3; 444 if (VGPRs <= 128) 445 return 2; 446 return 1; 447 } 448 449 unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { 450 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 451 if (MFI.hasFlatScratchInit()) { 452 if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 453 return 6; // FLAT_SCRATCH, XNACK, VCC (in that order). 454 if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) 455 return 4; // FLAT_SCRATCH, VCC (in that order). 456 } 457 458 if (isXNACKEnabled()) 459 return 4; // XNACK, VCC (in that order). 460 return 2; // VCC. 461 } 462 463 unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { 464 const Function &F = *MF.getFunction(); 465 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 466 467 // Compute maximum number of SGPRs function can use using default/requested 468 // minimum number of waves per execution unit. 469 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 470 unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false); 471 unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true); 472 473 // Check if maximum number of SGPRs was explicitly requested using 474 // "amdgpu-num-sgpr" attribute. 475 if (F.hasFnAttribute("amdgpu-num-sgpr")) { 476 unsigned Requested = AMDGPU::getIntegerAttribute( 477 F, "amdgpu-num-sgpr", MaxNumSGPRs); 478 479 // Make sure requested value does not violate subtarget's specifications. 480 if (Requested && (Requested <= getReservedNumSGPRs(MF))) 481 Requested = 0; 482 483 // If more SGPRs are required to support the input user/system SGPRs, 484 // increase to accommodate them. 485 // 486 // FIXME: This really ends up using the requested number of SGPRs + number 487 // of reserved special registers in total. Theoretically you could re-use 488 // the last input registers for these special registers, but this would 489 // require a lot of complexity to deal with the weird aliasing. 490 unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs(); 491 if (Requested && Requested < InputNumSGPRs) 492 Requested = InputNumSGPRs; 493 494 // Make sure requested value is compatible with values implied by 495 // default/requested minimum/maximum number of waves per execution unit. 496 if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false)) 497 Requested = 0; 498 if (WavesPerEU.second && 499 Requested && Requested < getMinNumSGPRs(WavesPerEU.second)) 500 Requested = 0; 501 502 if (Requested) 503 MaxNumSGPRs = Requested; 504 } 505 506 if (hasSGPRInitBug()) 507 MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 508 509 return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF), 510 MaxAddressableNumSGPRs); 511 } 512 513 unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { 514 const Function &F = *MF.getFunction(); 515 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 516 517 // Compute maximum number of VGPRs function can use using default/requested 518 // minimum number of waves per execution unit. 519 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 520 unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first); 521 522 // Check if maximum number of VGPRs was explicitly requested using 523 // "amdgpu-num-vgpr" attribute. 524 if (F.hasFnAttribute("amdgpu-num-vgpr")) { 525 unsigned Requested = AMDGPU::getIntegerAttribute( 526 F, "amdgpu-num-vgpr", MaxNumVGPRs); 527 528 // Make sure requested value does not violate subtarget's specifications. 529 if (Requested && Requested <= getReservedNumVGPRs(MF)) 530 Requested = 0; 531 532 // Make sure requested value is compatible with values implied by 533 // default/requested minimum/maximum number of waves per execution unit. 534 if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first)) 535 Requested = 0; 536 if (WavesPerEU.second && 537 Requested && Requested < getMinNumVGPRs(WavesPerEU.second)) 538 Requested = 0; 539 540 if (Requested) 541 MaxNumVGPRs = Requested; 542 } 543 544 return MaxNumVGPRs - getReservedNumVGPRs(MF); 545 } 546 547 namespace { 548 struct MemOpClusterMutation : ScheduleDAGMutation { 549 const SIInstrInfo *TII; 550 551 MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {} 552 553 void apply(ScheduleDAGInstrs *DAGInstrs) override { 554 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); 555 556 SUnit *SUa = nullptr; 557 // Search for two consequent memory operations and link them 558 // to prevent scheduler from moving them apart. 559 // In DAG pre-process SUnits are in the original order of 560 // the instructions before scheduling. 561 for (SUnit &SU : DAG->SUnits) { 562 MachineInstr &MI2 = *SU.getInstr(); 563 if (!MI2.mayLoad() && !MI2.mayStore()) { 564 SUa = nullptr; 565 continue; 566 } 567 if (!SUa) { 568 SUa = &SU; 569 continue; 570 } 571 572 MachineInstr &MI1 = *SUa->getInstr(); 573 if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) || 574 (TII->isFLAT(MI1) && TII->isFLAT(MI2)) || 575 (TII->isSMRD(MI1) && TII->isSMRD(MI2)) || 576 (TII->isDS(MI1) && TII->isDS(MI2))) { 577 SU.addPredBarrier(SUa); 578 579 for (const SDep &SI : SU.Preds) { 580 if (SI.getSUnit() != SUa) 581 SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial)); 582 } 583 584 if (&SU != &DAG->ExitSU) { 585 for (const SDep &SI : SUa->Succs) { 586 if (SI.getSUnit() != &SU) 587 SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial)); 588 } 589 } 590 } 591 592 SUa = &SU; 593 } 594 } 595 }; 596 } // namespace 597 598 void SISubtarget::getPostRAMutations( 599 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { 600 Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo)); 601 } 602