1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUTargetMachine.h" 18 #include "AMDGPUCallLowering.h" 19 #include "AMDGPUInstructionSelector.h" 20 #include "AMDGPULegalizerInfo.h" 21 #include "AMDGPURegisterBankInfo.h" 22 #include "SIMachineFunctionInfo.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/CodeGen/MachineScheduler.h" 25 #include "llvm/IR/MDBuilder.h" 26 #include "llvm/CodeGen/TargetFrameLowering.h" 27 #include <algorithm> 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "amdgpu-subtarget" 32 33 #define GET_SUBTARGETINFO_TARGET_DESC 34 #define GET_SUBTARGETINFO_CTOR 35 #include "AMDGPUGenSubtargetInfo.inc" 36 37 AMDGPUSubtarget::~AMDGPUSubtarget() = default; 38 39 AMDGPUSubtarget & 40 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 41 StringRef GPU, StringRef FS) { 42 // Determine default and user-specified characteristics 43 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 44 // enabled, but some instructions do not respect them and they run at the 45 // double precision rate, so don't enable by default. 46 // 47 // We want to be able to turn these off, but making this a subtarget feature 48 // for SI has the unhelpful behavior that it unsets everything else if you 49 // disable it. 50 51 SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,"); 52 53 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 54 FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,"; 55 56 // FIXME: I don't think think Evergreen has any useful support for 57 // denormals, but should be checked. Should we issue a warning somewhere 58 // if someone tries to enable these? 59 if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 60 FullFS += "+fp64-fp16-denormals,"; 61 } else { 62 FullFS += "-fp32-denormals,"; 63 } 64 65 FullFS += FS; 66 67 ParseSubtargetFeatures(GPU, FullFS); 68 69 // We don't support FP64 for EG/NI atm. 70 assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)); 71 72 // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es 73 // on VI and newer hardware to avoid assertion failures due to missing ADDR64 74 // variants of MUBUF instructions. 75 if (!hasAddr64() && !FS.contains("flat-for-global")) { 76 FlatForGlobal = true; 77 } 78 79 // Set defaults if needed. 80 if (MaxPrivateElementSize == 0) 81 MaxPrivateElementSize = 4; 82 83 if (LDSBankCount == 0) 84 LDSBankCount = 32; 85 86 if (TT.getArch() == Triple::amdgcn) { 87 if (LocalMemorySize == 0) 88 LocalMemorySize = 32768; 89 90 // Do something sensible for unspecified target. 91 if (!HasMovrel && !HasVGPRIndexMode) 92 HasMovrel = true; 93 } 94 95 return *this; 96 } 97 98 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 99 const TargetMachine &TM) 100 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 101 TargetTriple(TT), 102 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), 103 IsaVersion(ISAVersion0_0_0), 104 WavefrontSize(0), 105 LocalMemorySize(0), 106 LDSBankCount(0), 107 MaxPrivateElementSize(0), 108 109 FastFMAF32(false), 110 HalfRate64Ops(false), 111 112 FP32Denormals(false), 113 FP64FP16Denormals(false), 114 FPExceptions(false), 115 DX10Clamp(false), 116 FlatForGlobal(false), 117 AutoWaitcntBeforeBarrier(false), 118 CodeObjectV3(false), 119 UnalignedScratchAccess(false), 120 UnalignedBufferAccess(false), 121 122 HasApertureRegs(false), 123 EnableXNACK(false), 124 TrapHandler(false), 125 DebuggerInsertNops(false), 126 DebuggerReserveRegs(false), 127 DebuggerEmitPrologue(false), 128 129 EnableHugePrivateBuffer(false), 130 EnableVGPRSpilling(false), 131 EnablePromoteAlloca(false), 132 EnableLoadStoreOpt(false), 133 EnableUnsafeDSOffsetFolding(false), 134 EnableSIScheduler(false), 135 DumpCode(false), 136 137 FP64(false), 138 FMA(false), 139 MIMG_R128(false), 140 IsGCN(false), 141 GCN3Encoding(false), 142 CIInsts(false), 143 GFX9Insts(false), 144 SGPRInitBug(false), 145 HasSMemRealTime(false), 146 Has16BitInsts(false), 147 HasIntClamp(false), 148 HasVOP3PInsts(false), 149 HasMadMixInsts(false), 150 HasMovrel(false), 151 HasVGPRIndexMode(false), 152 HasScalarStores(false), 153 HasScalarAtomics(false), 154 HasInv2PiInlineImm(false), 155 HasSDWA(false), 156 HasSDWAOmod(false), 157 HasSDWAScalar(false), 158 HasSDWASdst(false), 159 HasSDWAMac(false), 160 HasSDWAOutModsVOPC(false), 161 HasDPP(false), 162 FlatAddressSpace(false), 163 FlatInstOffsets(false), 164 FlatGlobalInsts(false), 165 FlatScratchInsts(false), 166 AddNoCarryInsts(false), 167 HasUnpackedD16VMem(false), 168 169 R600ALUInst(false), 170 CaymanISA(false), 171 CFALUBug(false), 172 HasVertexCache(false), 173 TexVTXClauseSize(0), 174 ScalarizeGlobal(false), 175 176 FeatureDisable(false), 177 InstrItins(getInstrItineraryForCPU(GPU)) { 178 AS = AMDGPU::getAMDGPUAS(TT); 179 initializeSubtargetDependencies(TT, GPU, FS); 180 } 181 182 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, 183 const Function &F) const { 184 if (NWaves == 1) 185 return getLocalMemorySize(); 186 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; 187 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); 188 unsigned MaxWaves = getMaxWavesPerEU(); 189 return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves; 190 } 191 192 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, 193 const Function &F) const { 194 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; 195 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); 196 unsigned MaxWaves = getMaxWavesPerEU(); 197 unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu; 198 unsigned NumWaves = Limit / (Bytes ? Bytes : 1u); 199 NumWaves = std::min(NumWaves, MaxWaves); 200 NumWaves = std::max(NumWaves, 1u); 201 return NumWaves; 202 } 203 204 std::pair<unsigned, unsigned> 205 AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { 206 switch (CC) { 207 case CallingConv::AMDGPU_CS: 208 case CallingConv::AMDGPU_KERNEL: 209 case CallingConv::SPIR_KERNEL: 210 return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4); 211 case CallingConv::AMDGPU_VS: 212 case CallingConv::AMDGPU_LS: 213 case CallingConv::AMDGPU_HS: 214 case CallingConv::AMDGPU_ES: 215 case CallingConv::AMDGPU_GS: 216 case CallingConv::AMDGPU_PS: 217 return std::make_pair(1, getWavefrontSize()); 218 default: 219 return std::make_pair(1, 16 * getWavefrontSize()); 220 } 221 } 222 223 std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes( 224 const Function &F) const { 225 // FIXME: 1024 if function. 226 // Default minimum/maximum flat work group sizes. 227 std::pair<unsigned, unsigned> Default = 228 getDefaultFlatWorkGroupSize(F.getCallingConv()); 229 230 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa 231 // starts using "amdgpu-flat-work-group-size" attribute. 232 Default.second = AMDGPU::getIntegerAttribute( 233 F, "amdgpu-max-work-group-size", Default.second); 234 Default.first = std::min(Default.first, Default.second); 235 236 // Requested minimum/maximum flat work group sizes. 237 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( 238 F, "amdgpu-flat-work-group-size", Default); 239 240 // Make sure requested minimum is less than requested maximum. 241 if (Requested.first > Requested.second) 242 return Default; 243 244 // Make sure requested values do not violate subtarget's specifications. 245 if (Requested.first < getMinFlatWorkGroupSize()) 246 return Default; 247 if (Requested.second > getMaxFlatWorkGroupSize()) 248 return Default; 249 250 return Requested; 251 } 252 253 std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU( 254 const Function &F) const { 255 // Default minimum/maximum number of waves per execution unit. 256 std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU()); 257 258 // Default/requested minimum/maximum flat work group sizes. 259 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F); 260 261 // If minimum/maximum flat work group sizes were explicitly requested using 262 // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum 263 // number of waves per execution unit to values implied by requested 264 // minimum/maximum flat work group sizes. 265 unsigned MinImpliedByFlatWorkGroupSize = 266 getMaxWavesPerEU(FlatWorkGroupSizes.second); 267 bool RequestedFlatWorkGroupSize = false; 268 269 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa 270 // starts using "amdgpu-flat-work-group-size" attribute. 271 if (F.hasFnAttribute("amdgpu-max-work-group-size") || 272 F.hasFnAttribute("amdgpu-flat-work-group-size")) { 273 Default.first = MinImpliedByFlatWorkGroupSize; 274 RequestedFlatWorkGroupSize = true; 275 } 276 277 // Requested minimum/maximum number of waves per execution unit. 278 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( 279 F, "amdgpu-waves-per-eu", Default, true); 280 281 // Make sure requested minimum is less than requested maximum. 282 if (Requested.second && Requested.first > Requested.second) 283 return Default; 284 285 // Make sure requested values do not violate subtarget's specifications. 286 if (Requested.first < getMinWavesPerEU() || 287 Requested.first > getMaxWavesPerEU()) 288 return Default; 289 if (Requested.second > getMaxWavesPerEU()) 290 return Default; 291 292 // Make sure requested values are compatible with values implied by requested 293 // minimum/maximum flat work group sizes. 294 if (RequestedFlatWorkGroupSize && 295 Requested.first < MinImpliedByFlatWorkGroupSize) 296 return Default; 297 298 return Requested; 299 } 300 301 bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { 302 Function *Kernel = I->getParent()->getParent(); 303 unsigned MinSize = 0; 304 unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; 305 bool IdQuery = false; 306 307 // If reqd_work_group_size is present it narrows value down. 308 if (auto *CI = dyn_cast<CallInst>(I)) { 309 const Function *F = CI->getCalledFunction(); 310 if (F) { 311 unsigned Dim = UINT_MAX; 312 switch (F->getIntrinsicID()) { 313 case Intrinsic::amdgcn_workitem_id_x: 314 case Intrinsic::r600_read_tidig_x: 315 IdQuery = true; 316 LLVM_FALLTHROUGH; 317 case Intrinsic::r600_read_local_size_x: 318 Dim = 0; 319 break; 320 case Intrinsic::amdgcn_workitem_id_y: 321 case Intrinsic::r600_read_tidig_y: 322 IdQuery = true; 323 LLVM_FALLTHROUGH; 324 case Intrinsic::r600_read_local_size_y: 325 Dim = 1; 326 break; 327 case Intrinsic::amdgcn_workitem_id_z: 328 case Intrinsic::r600_read_tidig_z: 329 IdQuery = true; 330 LLVM_FALLTHROUGH; 331 case Intrinsic::r600_read_local_size_z: 332 Dim = 2; 333 break; 334 default: 335 break; 336 } 337 if (Dim <= 3) { 338 if (auto Node = Kernel->getMetadata("reqd_work_group_size")) 339 if (Node->getNumOperands() == 3) 340 MinSize = MaxSize = mdconst::extract<ConstantInt>( 341 Node->getOperand(Dim))->getZExtValue(); 342 } 343 } 344 } 345 346 if (!MaxSize) 347 return false; 348 349 // Range metadata is [Lo, Hi). For ID query we need to pass max size 350 // as Hi. For size query we need to pass Hi + 1. 351 if (IdQuery) 352 MinSize = 0; 353 else 354 ++MaxSize; 355 356 MDBuilder MDB(I->getContext()); 357 MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize), 358 APInt(32, MaxSize)); 359 I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); 360 return true; 361 } 362 363 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, 364 const TargetMachine &TM) : 365 AMDGPUSubtarget(TT, GPU, FS, TM), 366 InstrInfo(*this), 367 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 368 TLInfo(TM, *this) {} 369 370 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, 371 const GCNTargetMachine &TM) 372 : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), 373 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 374 TLInfo(TM, *this) { 375 CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); 376 Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); 377 378 RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); 379 InstSelector.reset(new AMDGPUInstructionSelector( 380 *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()))); 381 } 382 383 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 384 unsigned NumRegionInstrs) const { 385 // Track register pressure so the scheduler can try to decrease 386 // pressure once register usage is above the threshold defined by 387 // SIRegisterInfo::getRegPressureSetLimit() 388 Policy.ShouldTrackPressure = true; 389 390 // Enabling both top down and bottom up scheduling seems to give us less 391 // register spills than just using one of these approaches on its own. 392 Policy.OnlyTopDown = false; 393 Policy.OnlyBottomUp = false; 394 395 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 396 if (!enableSIScheduler()) 397 Policy.ShouldTrackLaneMasks = true; 398 } 399 400 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { 401 return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); 402 } 403 404 unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF, 405 unsigned ExplicitArgBytes) const { 406 unsigned ImplicitBytes = getImplicitArgNumBytes(MF); 407 if (ImplicitBytes == 0) 408 return ExplicitArgBytes; 409 410 unsigned Alignment = getAlignmentForImplicitArgPtr(); 411 return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; 412 } 413 414 unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { 415 if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 416 if (SGPRs <= 80) 417 return 10; 418 if (SGPRs <= 88) 419 return 9; 420 if (SGPRs <= 100) 421 return 8; 422 return 7; 423 } 424 if (SGPRs <= 48) 425 return 10; 426 if (SGPRs <= 56) 427 return 9; 428 if (SGPRs <= 64) 429 return 8; 430 if (SGPRs <= 72) 431 return 7; 432 if (SGPRs <= 80) 433 return 6; 434 return 5; 435 } 436 437 unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { 438 if (VGPRs <= 24) 439 return 10; 440 if (VGPRs <= 28) 441 return 9; 442 if (VGPRs <= 32) 443 return 8; 444 if (VGPRs <= 36) 445 return 7; 446 if (VGPRs <= 40) 447 return 6; 448 if (VGPRs <= 48) 449 return 5; 450 if (VGPRs <= 64) 451 return 4; 452 if (VGPRs <= 84) 453 return 3; 454 if (VGPRs <= 128) 455 return 2; 456 return 1; 457 } 458 459 unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { 460 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 461 if (MFI.hasFlatScratchInit()) { 462 if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 463 return 6; // FLAT_SCRATCH, XNACK, VCC (in that order). 464 if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) 465 return 4; // FLAT_SCRATCH, VCC (in that order). 466 } 467 468 if (isXNACKEnabled()) 469 return 4; // XNACK, VCC (in that order). 470 return 2; // VCC. 471 } 472 473 unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { 474 const Function &F = MF.getFunction(); 475 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 476 477 // Compute maximum number of SGPRs function can use using default/requested 478 // minimum number of waves per execution unit. 479 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 480 unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false); 481 unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true); 482 483 // Check if maximum number of SGPRs was explicitly requested using 484 // "amdgpu-num-sgpr" attribute. 485 if (F.hasFnAttribute("amdgpu-num-sgpr")) { 486 unsigned Requested = AMDGPU::getIntegerAttribute( 487 F, "amdgpu-num-sgpr", MaxNumSGPRs); 488 489 // Make sure requested value does not violate subtarget's specifications. 490 if (Requested && (Requested <= getReservedNumSGPRs(MF))) 491 Requested = 0; 492 493 // If more SGPRs are required to support the input user/system SGPRs, 494 // increase to accommodate them. 495 // 496 // FIXME: This really ends up using the requested number of SGPRs + number 497 // of reserved special registers in total. Theoretically you could re-use 498 // the last input registers for these special registers, but this would 499 // require a lot of complexity to deal with the weird aliasing. 500 unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs(); 501 if (Requested && Requested < InputNumSGPRs) 502 Requested = InputNumSGPRs; 503 504 // Make sure requested value is compatible with values implied by 505 // default/requested minimum/maximum number of waves per execution unit. 506 if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false)) 507 Requested = 0; 508 if (WavesPerEU.second && 509 Requested && Requested < getMinNumSGPRs(WavesPerEU.second)) 510 Requested = 0; 511 512 if (Requested) 513 MaxNumSGPRs = Requested; 514 } 515 516 if (hasSGPRInitBug()) 517 MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 518 519 return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF), 520 MaxAddressableNumSGPRs); 521 } 522 523 unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { 524 const Function &F = MF.getFunction(); 525 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 526 527 // Compute maximum number of VGPRs function can use using default/requested 528 // minimum number of waves per execution unit. 529 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 530 unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first); 531 532 // Check if maximum number of VGPRs was explicitly requested using 533 // "amdgpu-num-vgpr" attribute. 534 if (F.hasFnAttribute("amdgpu-num-vgpr")) { 535 unsigned Requested = AMDGPU::getIntegerAttribute( 536 F, "amdgpu-num-vgpr", MaxNumVGPRs); 537 538 // Make sure requested value does not violate subtarget's specifications. 539 if (Requested && Requested <= getReservedNumVGPRs(MF)) 540 Requested = 0; 541 542 // Make sure requested value is compatible with values implied by 543 // default/requested minimum/maximum number of waves per execution unit. 544 if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first)) 545 Requested = 0; 546 if (WavesPerEU.second && 547 Requested && Requested < getMinNumVGPRs(WavesPerEU.second)) 548 Requested = 0; 549 550 if (Requested) 551 MaxNumVGPRs = Requested; 552 } 553 554 return MaxNumVGPRs - getReservedNumVGPRs(MF); 555 } 556 557 namespace { 558 struct MemOpClusterMutation : ScheduleDAGMutation { 559 const SIInstrInfo *TII; 560 561 MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {} 562 563 void apply(ScheduleDAGInstrs *DAGInstrs) override { 564 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); 565 566 SUnit *SUa = nullptr; 567 // Search for two consequent memory operations and link them 568 // to prevent scheduler from moving them apart. 569 // In DAG pre-process SUnits are in the original order of 570 // the instructions before scheduling. 571 for (SUnit &SU : DAG->SUnits) { 572 MachineInstr &MI2 = *SU.getInstr(); 573 if (!MI2.mayLoad() && !MI2.mayStore()) { 574 SUa = nullptr; 575 continue; 576 } 577 if (!SUa) { 578 SUa = &SU; 579 continue; 580 } 581 582 MachineInstr &MI1 = *SUa->getInstr(); 583 if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) || 584 (TII->isFLAT(MI1) && TII->isFLAT(MI2)) || 585 (TII->isSMRD(MI1) && TII->isSMRD(MI2)) || 586 (TII->isDS(MI1) && TII->isDS(MI2))) { 587 SU.addPredBarrier(SUa); 588 589 for (const SDep &SI : SU.Preds) { 590 if (SI.getSUnit() != SUa) 591 SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial)); 592 } 593 594 if (&SU != &DAG->ExitSU) { 595 for (const SDep &SI : SUa->Succs) { 596 if (SI.getSUnit() != &SU) 597 SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial)); 598 } 599 } 600 } 601 602 SUa = &SU; 603 } 604 } 605 }; 606 } // namespace 607 608 void SISubtarget::getPostRAMutations( 609 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { 610 Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo)); 611 } 612