1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUTargetMachine.h" 18 #include "AMDGPUCallLowering.h" 19 #include "AMDGPUInstructionSelector.h" 20 #include "AMDGPULegalizerInfo.h" 21 #include "AMDGPURegisterBankInfo.h" 22 #include "SIMachineFunctionInfo.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/CodeGen/MachineScheduler.h" 25 #include "llvm/IR/MDBuilder.h" 26 #include "llvm/CodeGen/TargetFrameLowering.h" 27 #include <algorithm> 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "amdgpu-subtarget" 32 33 #define GET_SUBTARGETINFO_TARGET_DESC 34 #define GET_SUBTARGETINFO_CTOR 35 #include "AMDGPUGenSubtargetInfo.inc" 36 37 AMDGPUSubtarget::~AMDGPUSubtarget() = default; 38 39 AMDGPUSubtarget & 40 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 41 StringRef GPU, StringRef FS) { 42 // Determine default and user-specified characteristics 43 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 44 // enabled, but some instructions do not respect them and they run at the 45 // double precision rate, so don't enable by default. 46 // 47 // We want to be able to turn these off, but making this a subtarget feature 48 // for SI has the unhelpful behavior that it unsets everything else if you 49 // disable it. 50 51 SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,"); 52 53 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 54 FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,"; 55 56 // FIXME: I don't think think Evergreen has any useful support for 57 // denormals, but should be checked. Should we issue a warning somewhere 58 // if someone tries to enable these? 59 if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 60 FullFS += "+fp64-fp16-denormals,"; 61 } else { 62 FullFS += "-fp32-denormals,"; 63 } 64 65 FullFS += FS; 66 67 ParseSubtargetFeatures(GPU, FullFS); 68 69 // We don't support FP64 for EG/NI atm. 70 assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)); 71 72 // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es 73 // on VI and newer hardware to avoid assertion failures due to missing ADDR64 74 // variants of MUBUF instructions. 75 if (!hasAddr64() && !FS.contains("flat-for-global")) { 76 FlatForGlobal = true; 77 } 78 79 // Set defaults if needed. 80 if (MaxPrivateElementSize == 0) 81 MaxPrivateElementSize = 4; 82 83 if (LDSBankCount == 0) 84 LDSBankCount = 32; 85 86 if (TT.getArch() == Triple::amdgcn) { 87 if (LocalMemorySize == 0) 88 LocalMemorySize = 32768; 89 90 // Do something sensible for unspecified target. 91 if (!HasMovrel && !HasVGPRIndexMode) 92 HasMovrel = true; 93 } 94 95 return *this; 96 } 97 98 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 99 const TargetMachine &TM) 100 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 101 TargetTriple(TT), 102 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), 103 IsaVersion(ISAVersion0_0_0), 104 WavefrontSize(0), 105 LocalMemorySize(0), 106 LDSBankCount(0), 107 MaxPrivateElementSize(0), 108 109 FastFMAF32(false), 110 HalfRate64Ops(false), 111 112 FP32Denormals(false), 113 FP64FP16Denormals(false), 114 FPExceptions(false), 115 DX10Clamp(false), 116 FlatForGlobal(false), 117 AutoWaitcntBeforeBarrier(false), 118 CodeObjectV3(false), 119 UnalignedScratchAccess(false), 120 UnalignedBufferAccess(false), 121 122 HasApertureRegs(false), 123 EnableXNACK(false), 124 TrapHandler(false), 125 DebuggerInsertNops(false), 126 DebuggerReserveRegs(false), 127 DebuggerEmitPrologue(false), 128 129 EnableHugePrivateBuffer(false), 130 EnableVGPRSpilling(false), 131 EnablePromoteAlloca(false), 132 EnableLoadStoreOpt(false), 133 EnableUnsafeDSOffsetFolding(false), 134 EnableSIScheduler(false), 135 EnableDS128(false), 136 DumpCode(false), 137 138 FP64(false), 139 FMA(false), 140 MIMG_R128(false), 141 IsGCN(false), 142 GCN3Encoding(false), 143 CIInsts(false), 144 GFX9Insts(false), 145 SGPRInitBug(false), 146 HasSMemRealTime(false), 147 Has16BitInsts(false), 148 HasIntClamp(false), 149 HasVOP3PInsts(false), 150 HasMadMixInsts(false), 151 HasMovrel(false), 152 HasVGPRIndexMode(false), 153 HasScalarStores(false), 154 HasScalarAtomics(false), 155 HasInv2PiInlineImm(false), 156 HasSDWA(false), 157 HasSDWAOmod(false), 158 HasSDWAScalar(false), 159 HasSDWASdst(false), 160 HasSDWAMac(false), 161 HasSDWAOutModsVOPC(false), 162 HasDPP(false), 163 FlatAddressSpace(false), 164 FlatInstOffsets(false), 165 FlatGlobalInsts(false), 166 FlatScratchInsts(false), 167 AddNoCarryInsts(false), 168 HasUnpackedD16VMem(false), 169 170 R600ALUInst(false), 171 CaymanISA(false), 172 CFALUBug(false), 173 HasVertexCache(false), 174 TexVTXClauseSize(0), 175 ScalarizeGlobal(false), 176 177 FeatureDisable(false), 178 InstrItins(getInstrItineraryForCPU(GPU)) { 179 AS = AMDGPU::getAMDGPUAS(TT); 180 initializeSubtargetDependencies(TT, GPU, FS); 181 } 182 183 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, 184 const Function &F) const { 185 if (NWaves == 1) 186 return getLocalMemorySize(); 187 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; 188 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); 189 unsigned MaxWaves = getMaxWavesPerEU(); 190 return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves; 191 } 192 193 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, 194 const Function &F) const { 195 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; 196 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); 197 unsigned MaxWaves = getMaxWavesPerEU(); 198 unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu; 199 unsigned NumWaves = Limit / (Bytes ? Bytes : 1u); 200 NumWaves = std::min(NumWaves, MaxWaves); 201 NumWaves = std::max(NumWaves, 1u); 202 return NumWaves; 203 } 204 205 std::pair<unsigned, unsigned> 206 AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { 207 switch (CC) { 208 case CallingConv::AMDGPU_CS: 209 case CallingConv::AMDGPU_KERNEL: 210 case CallingConv::SPIR_KERNEL: 211 return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4); 212 case CallingConv::AMDGPU_VS: 213 case CallingConv::AMDGPU_LS: 214 case CallingConv::AMDGPU_HS: 215 case CallingConv::AMDGPU_ES: 216 case CallingConv::AMDGPU_GS: 217 case CallingConv::AMDGPU_PS: 218 return std::make_pair(1, getWavefrontSize()); 219 default: 220 return std::make_pair(1, 16 * getWavefrontSize()); 221 } 222 } 223 224 std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes( 225 const Function &F) const { 226 // FIXME: 1024 if function. 227 // Default minimum/maximum flat work group sizes. 228 std::pair<unsigned, unsigned> Default = 229 getDefaultFlatWorkGroupSize(F.getCallingConv()); 230 231 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa 232 // starts using "amdgpu-flat-work-group-size" attribute. 233 Default.second = AMDGPU::getIntegerAttribute( 234 F, "amdgpu-max-work-group-size", Default.second); 235 Default.first = std::min(Default.first, Default.second); 236 237 // Requested minimum/maximum flat work group sizes. 238 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( 239 F, "amdgpu-flat-work-group-size", Default); 240 241 // Make sure requested minimum is less than requested maximum. 242 if (Requested.first > Requested.second) 243 return Default; 244 245 // Make sure requested values do not violate subtarget's specifications. 246 if (Requested.first < getMinFlatWorkGroupSize()) 247 return Default; 248 if (Requested.second > getMaxFlatWorkGroupSize()) 249 return Default; 250 251 return Requested; 252 } 253 254 std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU( 255 const Function &F) const { 256 // Default minimum/maximum number of waves per execution unit. 257 std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU()); 258 259 // Default/requested minimum/maximum flat work group sizes. 260 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F); 261 262 // If minimum/maximum flat work group sizes were explicitly requested using 263 // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum 264 // number of waves per execution unit to values implied by requested 265 // minimum/maximum flat work group sizes. 266 unsigned MinImpliedByFlatWorkGroupSize = 267 getMaxWavesPerEU(FlatWorkGroupSizes.second); 268 bool RequestedFlatWorkGroupSize = false; 269 270 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa 271 // starts using "amdgpu-flat-work-group-size" attribute. 272 if (F.hasFnAttribute("amdgpu-max-work-group-size") || 273 F.hasFnAttribute("amdgpu-flat-work-group-size")) { 274 Default.first = MinImpliedByFlatWorkGroupSize; 275 RequestedFlatWorkGroupSize = true; 276 } 277 278 // Requested minimum/maximum number of waves per execution unit. 279 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( 280 F, "amdgpu-waves-per-eu", Default, true); 281 282 // Make sure requested minimum is less than requested maximum. 283 if (Requested.second && Requested.first > Requested.second) 284 return Default; 285 286 // Make sure requested values do not violate subtarget's specifications. 287 if (Requested.first < getMinWavesPerEU() || 288 Requested.first > getMaxWavesPerEU()) 289 return Default; 290 if (Requested.second > getMaxWavesPerEU()) 291 return Default; 292 293 // Make sure requested values are compatible with values implied by requested 294 // minimum/maximum flat work group sizes. 295 if (RequestedFlatWorkGroupSize && 296 Requested.first < MinImpliedByFlatWorkGroupSize) 297 return Default; 298 299 return Requested; 300 } 301 302 bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { 303 Function *Kernel = I->getParent()->getParent(); 304 unsigned MinSize = 0; 305 unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; 306 bool IdQuery = false; 307 308 // If reqd_work_group_size is present it narrows value down. 309 if (auto *CI = dyn_cast<CallInst>(I)) { 310 const Function *F = CI->getCalledFunction(); 311 if (F) { 312 unsigned Dim = UINT_MAX; 313 switch (F->getIntrinsicID()) { 314 case Intrinsic::amdgcn_workitem_id_x: 315 case Intrinsic::r600_read_tidig_x: 316 IdQuery = true; 317 LLVM_FALLTHROUGH; 318 case Intrinsic::r600_read_local_size_x: 319 Dim = 0; 320 break; 321 case Intrinsic::amdgcn_workitem_id_y: 322 case Intrinsic::r600_read_tidig_y: 323 IdQuery = true; 324 LLVM_FALLTHROUGH; 325 case Intrinsic::r600_read_local_size_y: 326 Dim = 1; 327 break; 328 case Intrinsic::amdgcn_workitem_id_z: 329 case Intrinsic::r600_read_tidig_z: 330 IdQuery = true; 331 LLVM_FALLTHROUGH; 332 case Intrinsic::r600_read_local_size_z: 333 Dim = 2; 334 break; 335 default: 336 break; 337 } 338 if (Dim <= 3) { 339 if (auto Node = Kernel->getMetadata("reqd_work_group_size")) 340 if (Node->getNumOperands() == 3) 341 MinSize = MaxSize = mdconst::extract<ConstantInt>( 342 Node->getOperand(Dim))->getZExtValue(); 343 } 344 } 345 } 346 347 if (!MaxSize) 348 return false; 349 350 // Range metadata is [Lo, Hi). For ID query we need to pass max size 351 // as Hi. For size query we need to pass Hi + 1. 352 if (IdQuery) 353 MinSize = 0; 354 else 355 ++MaxSize; 356 357 MDBuilder MDB(I->getContext()); 358 MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize), 359 APInt(32, MaxSize)); 360 I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); 361 return true; 362 } 363 364 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, 365 const TargetMachine &TM) : 366 AMDGPUSubtarget(TT, GPU, FS, TM), 367 InstrInfo(*this), 368 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 369 TLInfo(TM, *this) {} 370 371 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, 372 const GCNTargetMachine &TM) 373 : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), 374 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 375 TLInfo(TM, *this) { 376 CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); 377 Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); 378 379 RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); 380 InstSelector.reset(new AMDGPUInstructionSelector( 381 *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()))); 382 } 383 384 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 385 unsigned NumRegionInstrs) const { 386 // Track register pressure so the scheduler can try to decrease 387 // pressure once register usage is above the threshold defined by 388 // SIRegisterInfo::getRegPressureSetLimit() 389 Policy.ShouldTrackPressure = true; 390 391 // Enabling both top down and bottom up scheduling seems to give us less 392 // register spills than just using one of these approaches on its own. 393 Policy.OnlyTopDown = false; 394 Policy.OnlyBottomUp = false; 395 396 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 397 if (!enableSIScheduler()) 398 Policy.ShouldTrackLaneMasks = true; 399 } 400 401 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { 402 return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); 403 } 404 405 unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF, 406 unsigned ExplicitArgBytes) const { 407 unsigned ImplicitBytes = getImplicitArgNumBytes(MF); 408 if (ImplicitBytes == 0) 409 return ExplicitArgBytes; 410 411 unsigned Alignment = getAlignmentForImplicitArgPtr(); 412 return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; 413 } 414 415 unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { 416 if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 417 if (SGPRs <= 80) 418 return 10; 419 if (SGPRs <= 88) 420 return 9; 421 if (SGPRs <= 100) 422 return 8; 423 return 7; 424 } 425 if (SGPRs <= 48) 426 return 10; 427 if (SGPRs <= 56) 428 return 9; 429 if (SGPRs <= 64) 430 return 8; 431 if (SGPRs <= 72) 432 return 7; 433 if (SGPRs <= 80) 434 return 6; 435 return 5; 436 } 437 438 unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { 439 if (VGPRs <= 24) 440 return 10; 441 if (VGPRs <= 28) 442 return 9; 443 if (VGPRs <= 32) 444 return 8; 445 if (VGPRs <= 36) 446 return 7; 447 if (VGPRs <= 40) 448 return 6; 449 if (VGPRs <= 48) 450 return 5; 451 if (VGPRs <= 64) 452 return 4; 453 if (VGPRs <= 84) 454 return 3; 455 if (VGPRs <= 128) 456 return 2; 457 return 1; 458 } 459 460 unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { 461 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 462 if (MFI.hasFlatScratchInit()) { 463 if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 464 return 6; // FLAT_SCRATCH, XNACK, VCC (in that order). 465 if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) 466 return 4; // FLAT_SCRATCH, VCC (in that order). 467 } 468 469 if (isXNACKEnabled()) 470 return 4; // XNACK, VCC (in that order). 471 return 2; // VCC. 472 } 473 474 unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { 475 const Function &F = MF.getFunction(); 476 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 477 478 // Compute maximum number of SGPRs function can use using default/requested 479 // minimum number of waves per execution unit. 480 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 481 unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false); 482 unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true); 483 484 // Check if maximum number of SGPRs was explicitly requested using 485 // "amdgpu-num-sgpr" attribute. 486 if (F.hasFnAttribute("amdgpu-num-sgpr")) { 487 unsigned Requested = AMDGPU::getIntegerAttribute( 488 F, "amdgpu-num-sgpr", MaxNumSGPRs); 489 490 // Make sure requested value does not violate subtarget's specifications. 491 if (Requested && (Requested <= getReservedNumSGPRs(MF))) 492 Requested = 0; 493 494 // If more SGPRs are required to support the input user/system SGPRs, 495 // increase to accommodate them. 496 // 497 // FIXME: This really ends up using the requested number of SGPRs + number 498 // of reserved special registers in total. Theoretically you could re-use 499 // the last input registers for these special registers, but this would 500 // require a lot of complexity to deal with the weird aliasing. 501 unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs(); 502 if (Requested && Requested < InputNumSGPRs) 503 Requested = InputNumSGPRs; 504 505 // Make sure requested value is compatible with values implied by 506 // default/requested minimum/maximum number of waves per execution unit. 507 if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false)) 508 Requested = 0; 509 if (WavesPerEU.second && 510 Requested && Requested < getMinNumSGPRs(WavesPerEU.second)) 511 Requested = 0; 512 513 if (Requested) 514 MaxNumSGPRs = Requested; 515 } 516 517 if (hasSGPRInitBug()) 518 MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 519 520 return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF), 521 MaxAddressableNumSGPRs); 522 } 523 524 unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { 525 const Function &F = MF.getFunction(); 526 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 527 528 // Compute maximum number of VGPRs function can use using default/requested 529 // minimum number of waves per execution unit. 530 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 531 unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first); 532 533 // Check if maximum number of VGPRs was explicitly requested using 534 // "amdgpu-num-vgpr" attribute. 535 if (F.hasFnAttribute("amdgpu-num-vgpr")) { 536 unsigned Requested = AMDGPU::getIntegerAttribute( 537 F, "amdgpu-num-vgpr", MaxNumVGPRs); 538 539 // Make sure requested value does not violate subtarget's specifications. 540 if (Requested && Requested <= getReservedNumVGPRs(MF)) 541 Requested = 0; 542 543 // Make sure requested value is compatible with values implied by 544 // default/requested minimum/maximum number of waves per execution unit. 545 if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first)) 546 Requested = 0; 547 if (WavesPerEU.second && 548 Requested && Requested < getMinNumVGPRs(WavesPerEU.second)) 549 Requested = 0; 550 551 if (Requested) 552 MaxNumVGPRs = Requested; 553 } 554 555 return MaxNumVGPRs - getReservedNumVGPRs(MF); 556 } 557 558 namespace { 559 struct MemOpClusterMutation : ScheduleDAGMutation { 560 const SIInstrInfo *TII; 561 562 MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {} 563 564 void apply(ScheduleDAGInstrs *DAGInstrs) override { 565 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); 566 567 SUnit *SUa = nullptr; 568 // Search for two consequent memory operations and link them 569 // to prevent scheduler from moving them apart. 570 // In DAG pre-process SUnits are in the original order of 571 // the instructions before scheduling. 572 for (SUnit &SU : DAG->SUnits) { 573 MachineInstr &MI2 = *SU.getInstr(); 574 if (!MI2.mayLoad() && !MI2.mayStore()) { 575 SUa = nullptr; 576 continue; 577 } 578 if (!SUa) { 579 SUa = &SU; 580 continue; 581 } 582 583 MachineInstr &MI1 = *SUa->getInstr(); 584 if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) || 585 (TII->isFLAT(MI1) && TII->isFLAT(MI2)) || 586 (TII->isSMRD(MI1) && TII->isSMRD(MI2)) || 587 (TII->isDS(MI1) && TII->isDS(MI2))) { 588 SU.addPredBarrier(SUa); 589 590 for (const SDep &SI : SU.Preds) { 591 if (SI.getSUnit() != SUa) 592 SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial)); 593 } 594 595 if (&SU != &DAG->ExitSU) { 596 for (const SDep &SI : SUa->Succs) { 597 if (SI.getSUnit() != &SU) 598 SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial)); 599 } 600 } 601 } 602 603 SUa = &SU; 604 } 605 } 606 }; 607 } // namespace 608 609 void SISubtarget::getPostRAMutations( 610 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { 611 Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo)); 612 } 613