1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Implements the AMDGPU specific subclass of TargetSubtarget. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUSubtarget.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUTargetMachine.h" 18 #include "AMDGPUCallLowering.h" 19 #include "AMDGPUInstructionSelector.h" 20 #include "AMDGPULegalizerInfo.h" 21 #include "AMDGPURegisterBankInfo.h" 22 #include "SIMachineFunctionInfo.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/CodeGen/MachineScheduler.h" 25 #include "llvm/IR/MDBuilder.h" 26 #include "llvm/CodeGen/TargetFrameLowering.h" 27 #include <algorithm> 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "amdgpu-subtarget" 32 33 #define GET_SUBTARGETINFO_TARGET_DESC 34 #define GET_SUBTARGETINFO_CTOR 35 #include "AMDGPUGenSubtargetInfo.inc" 36 37 AMDGPUSubtarget::~AMDGPUSubtarget() = default; 38 39 AMDGPUSubtarget & 40 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, 41 StringRef GPU, StringRef FS) { 42 // Determine default and user-specified characteristics 43 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be 44 // enabled, but some instructions do not respect them and they run at the 45 // double precision rate, so don't enable by default. 46 // 47 // We want to be able to turn these off, but making this a subtarget feature 48 // for SI has the unhelpful behavior that it unsets everything else if you 49 // disable it. 50 51 SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,"); 52 53 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. 54 FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,"; 55 56 // FIXME: I don't think think Evergreen has any useful support for 57 // denormals, but should be checked. Should we issue a warning somewhere 58 // if someone tries to enable these? 59 if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 60 FullFS += "+fp64-fp16-denormals,"; 61 } else { 62 FullFS += "-fp32-denormals,"; 63 } 64 65 FullFS += FS; 66 67 ParseSubtargetFeatures(GPU, FullFS); 68 69 // We don't support FP64 for EG/NI atm. 70 assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)); 71 72 // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es 73 // on VI and newer hardware to avoid assertion failures due to missing ADDR64 74 // variants of MUBUF instructions. 75 if (!hasAddr64() && !FS.contains("flat-for-global")) { 76 FlatForGlobal = true; 77 } 78 79 // Set defaults if needed. 80 if (MaxPrivateElementSize == 0) 81 MaxPrivateElementSize = 4; 82 83 if (LDSBankCount == 0) 84 LDSBankCount = 32; 85 86 if (TT.getArch() == Triple::amdgcn) { 87 if (LocalMemorySize == 0) 88 LocalMemorySize = 32768; 89 90 // Do something sensible for unspecified target. 91 if (!HasMovrel && !HasVGPRIndexMode) 92 HasMovrel = true; 93 } 94 95 return *this; 96 } 97 98 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 99 const TargetMachine &TM) 100 : AMDGPUGenSubtargetInfo(TT, GPU, FS), 101 TargetTriple(TT), 102 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), 103 IsaVersion(ISAVersion0_0_0), 104 WavefrontSize(0), 105 LocalMemorySize(0), 106 LDSBankCount(0), 107 MaxPrivateElementSize(0), 108 109 FastFMAF32(false), 110 HalfRate64Ops(false), 111 112 FP32Denormals(false), 113 FP64FP16Denormals(false), 114 FPExceptions(false), 115 DX10Clamp(false), 116 FlatForGlobal(false), 117 AutoWaitcntBeforeBarrier(false), 118 CodeObjectV3(false), 119 UnalignedScratchAccess(false), 120 UnalignedBufferAccess(false), 121 122 HasApertureRegs(false), 123 EnableXNACK(false), 124 TrapHandler(false), 125 DebuggerInsertNops(false), 126 DebuggerReserveRegs(false), 127 DebuggerEmitPrologue(false), 128 129 EnableHugePrivateBuffer(false), 130 EnableVGPRSpilling(false), 131 EnablePromoteAlloca(false), 132 EnableLoadStoreOpt(false), 133 EnableUnsafeDSOffsetFolding(false), 134 EnableSIScheduler(false), 135 EnableDS128(false), 136 DumpCode(false), 137 138 FP64(false), 139 FMA(false), 140 MIMG_R128(false), 141 IsGCN(false), 142 GCN3Encoding(false), 143 CIInsts(false), 144 GFX9Insts(false), 145 SGPRInitBug(false), 146 HasSMemRealTime(false), 147 Has16BitInsts(false), 148 HasIntClamp(false), 149 HasVOP3PInsts(false), 150 HasMadMixInsts(false), 151 HasFmaMixInsts(false), 152 HasMovrel(false), 153 HasVGPRIndexMode(false), 154 HasScalarStores(false), 155 HasScalarAtomics(false), 156 HasInv2PiInlineImm(false), 157 HasSDWA(false), 158 HasSDWAOmod(false), 159 HasSDWAScalar(false), 160 HasSDWASdst(false), 161 HasSDWAMac(false), 162 HasSDWAOutModsVOPC(false), 163 HasDPP(false), 164 HasDLInsts(false), 165 D16PreservesUnusedBits(false), 166 FlatAddressSpace(false), 167 FlatInstOffsets(false), 168 FlatGlobalInsts(false), 169 FlatScratchInsts(false), 170 AddNoCarryInsts(false), 171 HasUnpackedD16VMem(false), 172 173 R600ALUInst(false), 174 CaymanISA(false), 175 CFALUBug(false), 176 HasVertexCache(false), 177 TexVTXClauseSize(0), 178 ScalarizeGlobal(false), 179 180 FeatureDisable(false), 181 InstrItins(getInstrItineraryForCPU(GPU)) { 182 AS = AMDGPU::getAMDGPUAS(TT); 183 initializeSubtargetDependencies(TT, GPU, FS); 184 } 185 186 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, 187 const Function &F) const { 188 if (NWaves == 1) 189 return getLocalMemorySize(); 190 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; 191 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); 192 unsigned MaxWaves = getMaxWavesPerEU(); 193 return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves; 194 } 195 196 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, 197 const Function &F) const { 198 unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; 199 unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); 200 unsigned MaxWaves = getMaxWavesPerEU(); 201 unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu; 202 unsigned NumWaves = Limit / (Bytes ? Bytes : 1u); 203 NumWaves = std::min(NumWaves, MaxWaves); 204 NumWaves = std::max(NumWaves, 1u); 205 return NumWaves; 206 } 207 208 std::pair<unsigned, unsigned> 209 AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { 210 switch (CC) { 211 case CallingConv::AMDGPU_CS: 212 case CallingConv::AMDGPU_KERNEL: 213 case CallingConv::SPIR_KERNEL: 214 return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4); 215 case CallingConv::AMDGPU_VS: 216 case CallingConv::AMDGPU_LS: 217 case CallingConv::AMDGPU_HS: 218 case CallingConv::AMDGPU_ES: 219 case CallingConv::AMDGPU_GS: 220 case CallingConv::AMDGPU_PS: 221 return std::make_pair(1, getWavefrontSize()); 222 default: 223 return std::make_pair(1, 16 * getWavefrontSize()); 224 } 225 } 226 227 std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes( 228 const Function &F) const { 229 // FIXME: 1024 if function. 230 // Default minimum/maximum flat work group sizes. 231 std::pair<unsigned, unsigned> Default = 232 getDefaultFlatWorkGroupSize(F.getCallingConv()); 233 234 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa 235 // starts using "amdgpu-flat-work-group-size" attribute. 236 Default.second = AMDGPU::getIntegerAttribute( 237 F, "amdgpu-max-work-group-size", Default.second); 238 Default.first = std::min(Default.first, Default.second); 239 240 // Requested minimum/maximum flat work group sizes. 241 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( 242 F, "amdgpu-flat-work-group-size", Default); 243 244 // Make sure requested minimum is less than requested maximum. 245 if (Requested.first > Requested.second) 246 return Default; 247 248 // Make sure requested values do not violate subtarget's specifications. 249 if (Requested.first < getMinFlatWorkGroupSize()) 250 return Default; 251 if (Requested.second > getMaxFlatWorkGroupSize()) 252 return Default; 253 254 return Requested; 255 } 256 257 std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU( 258 const Function &F) const { 259 // Default minimum/maximum number of waves per execution unit. 260 std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU()); 261 262 // Default/requested minimum/maximum flat work group sizes. 263 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F); 264 265 // If minimum/maximum flat work group sizes were explicitly requested using 266 // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum 267 // number of waves per execution unit to values implied by requested 268 // minimum/maximum flat work group sizes. 269 unsigned MinImpliedByFlatWorkGroupSize = 270 getMaxWavesPerEU(FlatWorkGroupSizes.second); 271 bool RequestedFlatWorkGroupSize = false; 272 273 // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa 274 // starts using "amdgpu-flat-work-group-size" attribute. 275 if (F.hasFnAttribute("amdgpu-max-work-group-size") || 276 F.hasFnAttribute("amdgpu-flat-work-group-size")) { 277 Default.first = MinImpliedByFlatWorkGroupSize; 278 RequestedFlatWorkGroupSize = true; 279 } 280 281 // Requested minimum/maximum number of waves per execution unit. 282 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( 283 F, "amdgpu-waves-per-eu", Default, true); 284 285 // Make sure requested minimum is less than requested maximum. 286 if (Requested.second && Requested.first > Requested.second) 287 return Default; 288 289 // Make sure requested values do not violate subtarget's specifications. 290 if (Requested.first < getMinWavesPerEU() || 291 Requested.first > getMaxWavesPerEU()) 292 return Default; 293 if (Requested.second > getMaxWavesPerEU()) 294 return Default; 295 296 // Make sure requested values are compatible with values implied by requested 297 // minimum/maximum flat work group sizes. 298 if (RequestedFlatWorkGroupSize && 299 Requested.first < MinImpliedByFlatWorkGroupSize) 300 return Default; 301 302 return Requested; 303 } 304 305 bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { 306 Function *Kernel = I->getParent()->getParent(); 307 unsigned MinSize = 0; 308 unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; 309 bool IdQuery = false; 310 311 // If reqd_work_group_size is present it narrows value down. 312 if (auto *CI = dyn_cast<CallInst>(I)) { 313 const Function *F = CI->getCalledFunction(); 314 if (F) { 315 unsigned Dim = UINT_MAX; 316 switch (F->getIntrinsicID()) { 317 case Intrinsic::amdgcn_workitem_id_x: 318 case Intrinsic::r600_read_tidig_x: 319 IdQuery = true; 320 LLVM_FALLTHROUGH; 321 case Intrinsic::r600_read_local_size_x: 322 Dim = 0; 323 break; 324 case Intrinsic::amdgcn_workitem_id_y: 325 case Intrinsic::r600_read_tidig_y: 326 IdQuery = true; 327 LLVM_FALLTHROUGH; 328 case Intrinsic::r600_read_local_size_y: 329 Dim = 1; 330 break; 331 case Intrinsic::amdgcn_workitem_id_z: 332 case Intrinsic::r600_read_tidig_z: 333 IdQuery = true; 334 LLVM_FALLTHROUGH; 335 case Intrinsic::r600_read_local_size_z: 336 Dim = 2; 337 break; 338 default: 339 break; 340 } 341 if (Dim <= 3) { 342 if (auto Node = Kernel->getMetadata("reqd_work_group_size")) 343 if (Node->getNumOperands() == 3) 344 MinSize = MaxSize = mdconst::extract<ConstantInt>( 345 Node->getOperand(Dim))->getZExtValue(); 346 } 347 } 348 } 349 350 if (!MaxSize) 351 return false; 352 353 // Range metadata is [Lo, Hi). For ID query we need to pass max size 354 // as Hi. For size query we need to pass Hi + 1. 355 if (IdQuery) 356 MinSize = 0; 357 else 358 ++MaxSize; 359 360 MDBuilder MDB(I->getContext()); 361 MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize), 362 APInt(32, MaxSize)); 363 I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); 364 return true; 365 } 366 367 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, 368 const TargetMachine &TM) : 369 AMDGPUSubtarget(TT, GPU, FS, TM), 370 InstrInfo(*this), 371 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 372 TLInfo(TM, *this) {} 373 374 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, 375 const GCNTargetMachine &TM) 376 : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), 377 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), 378 TLInfo(TM, *this) { 379 CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); 380 Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); 381 382 RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); 383 InstSelector.reset(new AMDGPUInstructionSelector( 384 *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()))); 385 } 386 387 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 388 unsigned NumRegionInstrs) const { 389 // Track register pressure so the scheduler can try to decrease 390 // pressure once register usage is above the threshold defined by 391 // SIRegisterInfo::getRegPressureSetLimit() 392 Policy.ShouldTrackPressure = true; 393 394 // Enabling both top down and bottom up scheduling seems to give us less 395 // register spills than just using one of these approaches on its own. 396 Policy.OnlyTopDown = false; 397 Policy.OnlyBottomUp = false; 398 399 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler. 400 if (!enableSIScheduler()) 401 Policy.ShouldTrackLaneMasks = true; 402 } 403 404 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { 405 return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); 406 } 407 408 unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF, 409 unsigned ExplicitArgBytes) const { 410 unsigned ImplicitBytes = getImplicitArgNumBytes(MF); 411 if (ImplicitBytes == 0) 412 return ExplicitArgBytes; 413 414 unsigned Alignment = getAlignmentForImplicitArgPtr(); 415 return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; 416 } 417 418 unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { 419 if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 420 if (SGPRs <= 80) 421 return 10; 422 if (SGPRs <= 88) 423 return 9; 424 if (SGPRs <= 100) 425 return 8; 426 return 7; 427 } 428 if (SGPRs <= 48) 429 return 10; 430 if (SGPRs <= 56) 431 return 9; 432 if (SGPRs <= 64) 433 return 8; 434 if (SGPRs <= 72) 435 return 7; 436 if (SGPRs <= 80) 437 return 6; 438 return 5; 439 } 440 441 unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { 442 if (VGPRs <= 24) 443 return 10; 444 if (VGPRs <= 28) 445 return 9; 446 if (VGPRs <= 32) 447 return 8; 448 if (VGPRs <= 36) 449 return 7; 450 if (VGPRs <= 40) 451 return 6; 452 if (VGPRs <= 48) 453 return 5; 454 if (VGPRs <= 64) 455 return 4; 456 if (VGPRs <= 84) 457 return 3; 458 if (VGPRs <= 128) 459 return 2; 460 return 1; 461 } 462 463 unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { 464 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 465 if (MFI.hasFlatScratchInit()) { 466 if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 467 return 6; // FLAT_SCRATCH, XNACK, VCC (in that order). 468 if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) 469 return 4; // FLAT_SCRATCH, VCC (in that order). 470 } 471 472 if (isXNACKEnabled()) 473 return 4; // XNACK, VCC (in that order). 474 return 2; // VCC. 475 } 476 477 unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { 478 const Function &F = MF.getFunction(); 479 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 480 481 // Compute maximum number of SGPRs function can use using default/requested 482 // minimum number of waves per execution unit. 483 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 484 unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false); 485 unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true); 486 487 // Check if maximum number of SGPRs was explicitly requested using 488 // "amdgpu-num-sgpr" attribute. 489 if (F.hasFnAttribute("amdgpu-num-sgpr")) { 490 unsigned Requested = AMDGPU::getIntegerAttribute( 491 F, "amdgpu-num-sgpr", MaxNumSGPRs); 492 493 // Make sure requested value does not violate subtarget's specifications. 494 if (Requested && (Requested <= getReservedNumSGPRs(MF))) 495 Requested = 0; 496 497 // If more SGPRs are required to support the input user/system SGPRs, 498 // increase to accommodate them. 499 // 500 // FIXME: This really ends up using the requested number of SGPRs + number 501 // of reserved special registers in total. Theoretically you could re-use 502 // the last input registers for these special registers, but this would 503 // require a lot of complexity to deal with the weird aliasing. 504 unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs(); 505 if (Requested && Requested < InputNumSGPRs) 506 Requested = InputNumSGPRs; 507 508 // Make sure requested value is compatible with values implied by 509 // default/requested minimum/maximum number of waves per execution unit. 510 if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false)) 511 Requested = 0; 512 if (WavesPerEU.second && 513 Requested && Requested < getMinNumSGPRs(WavesPerEU.second)) 514 Requested = 0; 515 516 if (Requested) 517 MaxNumSGPRs = Requested; 518 } 519 520 if (hasSGPRInitBug()) 521 MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 522 523 return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF), 524 MaxAddressableNumSGPRs); 525 } 526 527 unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { 528 const Function &F = MF.getFunction(); 529 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 530 531 // Compute maximum number of VGPRs function can use using default/requested 532 // minimum number of waves per execution unit. 533 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 534 unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first); 535 536 // Check if maximum number of VGPRs was explicitly requested using 537 // "amdgpu-num-vgpr" attribute. 538 if (F.hasFnAttribute("amdgpu-num-vgpr")) { 539 unsigned Requested = AMDGPU::getIntegerAttribute( 540 F, "amdgpu-num-vgpr", MaxNumVGPRs); 541 542 // Make sure requested value does not violate subtarget's specifications. 543 if (Requested && Requested <= getReservedNumVGPRs(MF)) 544 Requested = 0; 545 546 // Make sure requested value is compatible with values implied by 547 // default/requested minimum/maximum number of waves per execution unit. 548 if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first)) 549 Requested = 0; 550 if (WavesPerEU.second && 551 Requested && Requested < getMinNumVGPRs(WavesPerEU.second)) 552 Requested = 0; 553 554 if (Requested) 555 MaxNumVGPRs = Requested; 556 } 557 558 return MaxNumVGPRs - getReservedNumVGPRs(MF); 559 } 560 561 namespace { 562 struct MemOpClusterMutation : ScheduleDAGMutation { 563 const SIInstrInfo *TII; 564 565 MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {} 566 567 void apply(ScheduleDAGInstrs *DAGInstrs) override { 568 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); 569 570 SUnit *SUa = nullptr; 571 // Search for two consequent memory operations and link them 572 // to prevent scheduler from moving them apart. 573 // In DAG pre-process SUnits are in the original order of 574 // the instructions before scheduling. 575 for (SUnit &SU : DAG->SUnits) { 576 MachineInstr &MI2 = *SU.getInstr(); 577 if (!MI2.mayLoad() && !MI2.mayStore()) { 578 SUa = nullptr; 579 continue; 580 } 581 if (!SUa) { 582 SUa = &SU; 583 continue; 584 } 585 586 MachineInstr &MI1 = *SUa->getInstr(); 587 if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) || 588 (TII->isFLAT(MI1) && TII->isFLAT(MI2)) || 589 (TII->isSMRD(MI1) && TII->isSMRD(MI2)) || 590 (TII->isDS(MI1) && TII->isDS(MI2))) { 591 SU.addPredBarrier(SUa); 592 593 for (const SDep &SI : SU.Preds) { 594 if (SI.getSUnit() != SUa) 595 SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial)); 596 } 597 598 if (&SU != &DAG->ExitSU) { 599 for (const SDep &SI : SUa->Succs) { 600 if (SI.getSUnit() != &SU) 601 SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial)); 602 } 603 } 604 } 605 606 SUa = &SU; 607 } 608 } 609 }; 610 } // namespace 611 612 void SISubtarget::getPostRAMutations( 613 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { 614 Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo)); 615 } 616