148ebc1afSAustin Kerbow //===--- AMDGPUIGroupLP.cpp - AMDGPU IGroupLP  ------------===//
248ebc1afSAustin Kerbow //
348ebc1afSAustin Kerbow // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
448ebc1afSAustin Kerbow // See https://llvm.org/LICENSE.txt for license information.
548ebc1afSAustin Kerbow // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
648ebc1afSAustin Kerbow //
748ebc1afSAustin Kerbow //===----------------------------------------------------------------------===//
848ebc1afSAustin Kerbow //
948ebc1afSAustin Kerbow // \file This file defines a set of schedule DAG mutations that can be used to
1048ebc1afSAustin Kerbow // override default scheduler behavior to enforce specific scheduling patterns.
1148ebc1afSAustin Kerbow // They should be used in cases where runtime performance considerations such as
1248ebc1afSAustin Kerbow // inter-wavefront interactions, mean that compile-time heuristics cannot
1348ebc1afSAustin Kerbow // predict the optimal instruction ordering, or in kernels where optimum
1448ebc1afSAustin Kerbow // instruction scheduling is important enough to warrant manual intervention.
1548ebc1afSAustin Kerbow //
1648ebc1afSAustin Kerbow //===----------------------------------------------------------------------===//
1748ebc1afSAustin Kerbow 
1848ebc1afSAustin Kerbow #include "AMDGPUIGroupLP.h"
1948ebc1afSAustin Kerbow #include "AMDGPUTargetMachine.h"
2048ebc1afSAustin Kerbow #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
2148ebc1afSAustin Kerbow #include "SIInstrInfo.h"
2248ebc1afSAustin Kerbow #include "SIMachineFunctionInfo.h"
2348ebc1afSAustin Kerbow #include "llvm/ADT/BitmaskEnum.h"
2448ebc1afSAustin Kerbow #include "llvm/CodeGen/MachineScheduler.h"
2548ebc1afSAustin Kerbow #include "llvm/CodeGen/TargetOpcodes.h"
2648ebc1afSAustin Kerbow 
2748ebc1afSAustin Kerbow using namespace llvm;
2848ebc1afSAustin Kerbow 
2948ebc1afSAustin Kerbow #define DEBUG_TYPE "machine-scheduler"
3048ebc1afSAustin Kerbow 
3148ebc1afSAustin Kerbow namespace {
3248ebc1afSAustin Kerbow 
3348ebc1afSAustin Kerbow static cl::opt<bool>
3448ebc1afSAustin Kerbow     EnableIGroupLP("amdgpu-igrouplp",
3548ebc1afSAustin Kerbow                    cl::desc("Enable construction of Instruction Groups and "
3648ebc1afSAustin Kerbow                             "their ordering for scheduling"),
3748ebc1afSAustin Kerbow                    cl::init(false));
3848ebc1afSAustin Kerbow 
3948ebc1afSAustin Kerbow static cl::opt<Optional<unsigned>>
4048ebc1afSAustin Kerbow     VMEMGroupMaxSize("amdgpu-igrouplp-vmem-group-size", cl::init(None),
4148ebc1afSAustin Kerbow                      cl::Hidden,
4248ebc1afSAustin Kerbow                      cl::desc("The maximum number of instructions to include "
4348ebc1afSAustin Kerbow                               "in VMEM group."));
4448ebc1afSAustin Kerbow 
4548ebc1afSAustin Kerbow static cl::opt<Optional<unsigned>>
4648ebc1afSAustin Kerbow     MFMAGroupMaxSize("amdgpu-igrouplp-mfma-group-size", cl::init(None),
4748ebc1afSAustin Kerbow                      cl::Hidden,
4848ebc1afSAustin Kerbow                      cl::desc("The maximum number of instructions to include "
4948ebc1afSAustin Kerbow                               "in MFMA group."));
5048ebc1afSAustin Kerbow 
5148ebc1afSAustin Kerbow static cl::opt<Optional<unsigned>>
5248ebc1afSAustin Kerbow     LDRGroupMaxSize("amdgpu-igrouplp-ldr-group-size", cl::init(None),
5348ebc1afSAustin Kerbow                     cl::Hidden,
5448ebc1afSAustin Kerbow                     cl::desc("The maximum number of instructions to include "
5548ebc1afSAustin Kerbow                              "in lds/gds read group."));
5648ebc1afSAustin Kerbow 
5748ebc1afSAustin Kerbow static cl::opt<Optional<unsigned>>
5848ebc1afSAustin Kerbow     LDWGroupMaxSize("amdgpu-igrouplp-ldw-group-size", cl::init(None),
5948ebc1afSAustin Kerbow                     cl::Hidden,
6048ebc1afSAustin Kerbow                     cl::desc("The maximum number of instructions to include "
6148ebc1afSAustin Kerbow                              "in lds/gds write group."));
6248ebc1afSAustin Kerbow 
6348ebc1afSAustin Kerbow typedef function_ref<bool(const MachineInstr &, const SIInstrInfo *)>
6448ebc1afSAustin Kerbow     CanAddMIFn;
6548ebc1afSAustin Kerbow 
6648ebc1afSAustin Kerbow // Classify instructions into groups to enable fine tuned control over the
6748ebc1afSAustin Kerbow // scheduler. These groups may be more specific than current SchedModel
6848ebc1afSAustin Kerbow // instruction classes.
6948ebc1afSAustin Kerbow class SchedGroup {
7048ebc1afSAustin Kerbow private:
7148ebc1afSAustin Kerbow   // Function that returns true if a non-bundle MI may be inserted into this
7248ebc1afSAustin Kerbow   // group.
7348ebc1afSAustin Kerbow   const CanAddMIFn canAddMI;
7448ebc1afSAustin Kerbow 
7548ebc1afSAustin Kerbow   // Maximum number of SUnits that can be added to this group.
7648ebc1afSAustin Kerbow   Optional<unsigned> MaxSize;
7748ebc1afSAustin Kerbow 
7848ebc1afSAustin Kerbow   // Collection of SUnits that are classified as members of this group.
7948ebc1afSAustin Kerbow   SmallVector<SUnit *, 32> Collection;
8048ebc1afSAustin Kerbow 
8148ebc1afSAustin Kerbow   ScheduleDAGInstrs *DAG;
8248ebc1afSAustin Kerbow 
tryAddEdge(SUnit * A,SUnit * B)8348ebc1afSAustin Kerbow   void tryAddEdge(SUnit *A, SUnit *B) {
8448ebc1afSAustin Kerbow     if (A != B && DAG->canAddEdge(B, A)) {
8548ebc1afSAustin Kerbow       DAG->addEdge(B, SDep(A, SDep::Artificial));
8648ebc1afSAustin Kerbow       LLVM_DEBUG(dbgs() << "Adding edge...\n"
8748ebc1afSAustin Kerbow                         << "from: SU(" << A->NodeNum << ") " << *A->getInstr()
8848ebc1afSAustin Kerbow                         << "to: SU(" << B->NodeNum << ") " << *B->getInstr());
8948ebc1afSAustin Kerbow     }
9048ebc1afSAustin Kerbow   }
9148ebc1afSAustin Kerbow 
9248ebc1afSAustin Kerbow public:
9348ebc1afSAustin Kerbow   // Add DAG dependencies from all SUnits in this SchedGroup and this SU. If
9448ebc1afSAustin Kerbow   // MakePred is true, SU will be a predecessor of the SUnits in this
9548ebc1afSAustin Kerbow   // SchedGroup, otherwise SU will be a successor.
link(SUnit & SU,bool MakePred=false)9648ebc1afSAustin Kerbow   void link(SUnit &SU, bool MakePred = false) {
9748ebc1afSAustin Kerbow     for (auto A : Collection) {
9848ebc1afSAustin Kerbow       SUnit *B = &SU;
9948ebc1afSAustin Kerbow       if (MakePred)
10048ebc1afSAustin Kerbow         std::swap(A, B);
10148ebc1afSAustin Kerbow 
10248ebc1afSAustin Kerbow       tryAddEdge(A, B);
10348ebc1afSAustin Kerbow     }
10448ebc1afSAustin Kerbow   }
10548ebc1afSAustin Kerbow 
10648ebc1afSAustin Kerbow   // Add DAG dependencies from all SUnits in this SchedGroup and this SU. Use
10748ebc1afSAustin Kerbow   // the predicate to determine whether SU should be a predecessor (P = true)
10848ebc1afSAustin Kerbow   // or a successor (P = false) of this SchedGroup.
link(SUnit & SU,function_ref<bool (const SUnit * A,const SUnit * B)> P)10948ebc1afSAustin Kerbow   void link(SUnit &SU, function_ref<bool(const SUnit *A, const SUnit *B)> P) {
11048ebc1afSAustin Kerbow     for (auto A : Collection) {
11148ebc1afSAustin Kerbow       SUnit *B = &SU;
11248ebc1afSAustin Kerbow       if (P(A, B))
11348ebc1afSAustin Kerbow         std::swap(A, B);
11448ebc1afSAustin Kerbow 
11548ebc1afSAustin Kerbow       tryAddEdge(A, B);
11648ebc1afSAustin Kerbow     }
11748ebc1afSAustin Kerbow   }
11848ebc1afSAustin Kerbow 
11948ebc1afSAustin Kerbow   // Add DAG dependencies such that SUnits in this group shall be ordered
12048ebc1afSAustin Kerbow   // before SUnits in OtherGroup.
link(SchedGroup & OtherGroup)12148ebc1afSAustin Kerbow   void link(SchedGroup &OtherGroup) {
12248ebc1afSAustin Kerbow     for (auto B : OtherGroup.Collection)
12348ebc1afSAustin Kerbow       link(*B);
12448ebc1afSAustin Kerbow   }
12548ebc1afSAustin Kerbow 
12648ebc1afSAustin Kerbow   // Returns true if no more instructions may be added to this group.
isFull()127*064a08cdSKazu Hirata   bool isFull() { return MaxSize && Collection.size() >= *MaxSize; }
12848ebc1afSAustin Kerbow 
12948ebc1afSAustin Kerbow   // Returns true if SU can be added to this SchedGroup.
canAddSU(SUnit & SU,const SIInstrInfo * TII)13048ebc1afSAustin Kerbow   bool canAddSU(SUnit &SU, const SIInstrInfo *TII) {
13148ebc1afSAustin Kerbow     if (isFull())
13248ebc1afSAustin Kerbow       return false;
13348ebc1afSAustin Kerbow 
13448ebc1afSAustin Kerbow     MachineInstr &MI = *SU.getInstr();
13548ebc1afSAustin Kerbow     if (MI.getOpcode() != TargetOpcode::BUNDLE)
13648ebc1afSAustin Kerbow       return canAddMI(MI, TII);
13748ebc1afSAustin Kerbow 
13848ebc1afSAustin Kerbow     // Special case for bundled MIs.
13948ebc1afSAustin Kerbow     const MachineBasicBlock *MBB = MI.getParent();
14048ebc1afSAustin Kerbow     MachineBasicBlock::instr_iterator B = MI.getIterator(), E = ++B;
14148ebc1afSAustin Kerbow     while (E != MBB->end() && E->isBundledWithPred())
14248ebc1afSAustin Kerbow       ++E;
14348ebc1afSAustin Kerbow 
14448ebc1afSAustin Kerbow     // Return true if all of the bundled MIs can be added to this group.
14548ebc1afSAustin Kerbow     return std::all_of(
14648ebc1afSAustin Kerbow         B, E, [this, TII](MachineInstr &MI) { return canAddMI(MI, TII); });
14748ebc1afSAustin Kerbow   }
14848ebc1afSAustin Kerbow 
add(SUnit & SU)14948ebc1afSAustin Kerbow   void add(SUnit &SU) { Collection.push_back(&SU); }
15048ebc1afSAustin Kerbow 
SchedGroup(CanAddMIFn canAddMI,Optional<unsigned> MaxSize,ScheduleDAGInstrs * DAG)15148ebc1afSAustin Kerbow   SchedGroup(CanAddMIFn canAddMI, Optional<unsigned> MaxSize,
15248ebc1afSAustin Kerbow              ScheduleDAGInstrs *DAG)
15348ebc1afSAustin Kerbow       : canAddMI(canAddMI), MaxSize(MaxSize), DAG(DAG) {}
15448ebc1afSAustin Kerbow };
15548ebc1afSAustin Kerbow 
isMFMASGMember(const MachineInstr & MI,const SIInstrInfo * TII)15648ebc1afSAustin Kerbow bool isMFMASGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
15748ebc1afSAustin Kerbow   return TII->isMFMA(MI);
15848ebc1afSAustin Kerbow }
15948ebc1afSAustin Kerbow 
isVALUSGMember(const MachineInstr & MI,const SIInstrInfo * TII)16048ebc1afSAustin Kerbow bool isVALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
16148ebc1afSAustin Kerbow   return TII->isVALU(MI) && !TII->isMFMA(MI);
16248ebc1afSAustin Kerbow }
16348ebc1afSAustin Kerbow 
isSALUSGMember(const MachineInstr & MI,const SIInstrInfo * TII)16448ebc1afSAustin Kerbow bool isSALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
16548ebc1afSAustin Kerbow   return TII->isSALU(MI);
16648ebc1afSAustin Kerbow }
16748ebc1afSAustin Kerbow 
isVMEMSGMember(const MachineInstr & MI,const SIInstrInfo * TII)16848ebc1afSAustin Kerbow bool isVMEMSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
16948ebc1afSAustin Kerbow   return TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI));
17048ebc1afSAustin Kerbow }
17148ebc1afSAustin Kerbow 
isVMEMReadSGMember(const MachineInstr & MI,const SIInstrInfo * TII)17248ebc1afSAustin Kerbow bool isVMEMReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
17348ebc1afSAustin Kerbow   return MI.mayLoad() &&
17448ebc1afSAustin Kerbow          (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)));
17548ebc1afSAustin Kerbow }
17648ebc1afSAustin Kerbow 
isVMEMWriteSGMember(const MachineInstr & MI,const SIInstrInfo * TII)17748ebc1afSAustin Kerbow bool isVMEMWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
17848ebc1afSAustin Kerbow   return MI.mayStore() &&
17948ebc1afSAustin Kerbow          (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)));
18048ebc1afSAustin Kerbow }
18148ebc1afSAustin Kerbow 
isDSWriteSGMember(const MachineInstr & MI,const SIInstrInfo * TII)18248ebc1afSAustin Kerbow bool isDSWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
18348ebc1afSAustin Kerbow   return MI.mayStore() && TII->isDS(MI);
18448ebc1afSAustin Kerbow }
18548ebc1afSAustin Kerbow 
isDSReadSGMember(const MachineInstr & MI,const SIInstrInfo * TII)18648ebc1afSAustin Kerbow bool isDSReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
18748ebc1afSAustin Kerbow   return MI.mayLoad() && TII->isDS(MI);
18848ebc1afSAustin Kerbow }
18948ebc1afSAustin Kerbow 
19048ebc1afSAustin Kerbow class IGroupLPDAGMutation : public ScheduleDAGMutation {
19148ebc1afSAustin Kerbow public:
19248ebc1afSAustin Kerbow   const SIInstrInfo *TII;
19348ebc1afSAustin Kerbow   ScheduleDAGMI *DAG;
19448ebc1afSAustin Kerbow 
19548ebc1afSAustin Kerbow   IGroupLPDAGMutation() = default;
19648ebc1afSAustin Kerbow   void apply(ScheduleDAGInstrs *DAGInstrs) override;
19748ebc1afSAustin Kerbow };
19848ebc1afSAustin Kerbow 
19948ebc1afSAustin Kerbow // DAG mutation that coordinates with the SCHED_BARRIER instruction and
20048ebc1afSAustin Kerbow // corresponding builtin. The mutation adds edges from specific instruction
20148ebc1afSAustin Kerbow // classes determined by the SCHED_BARRIER mask so that they cannot be
20248ebc1afSAustin Kerbow // scheduled around the SCHED_BARRIER.
20348ebc1afSAustin Kerbow class SchedBarrierDAGMutation : public ScheduleDAGMutation {
20448ebc1afSAustin Kerbow private:
20548ebc1afSAustin Kerbow   const SIInstrInfo *TII;
20648ebc1afSAustin Kerbow 
20748ebc1afSAustin Kerbow   ScheduleDAGMI *DAG;
20848ebc1afSAustin Kerbow 
20948ebc1afSAustin Kerbow   // Components of the mask that determines which instructions may not be
21048ebc1afSAustin Kerbow   // scheduled across the SCHED_BARRIER.
21148ebc1afSAustin Kerbow   enum class SchedBarrierMasks {
21248ebc1afSAustin Kerbow     NONE = 0u,
21348ebc1afSAustin Kerbow     ALU = 1u << 0,
21448ebc1afSAustin Kerbow     VALU = 1u << 1,
21548ebc1afSAustin Kerbow     SALU = 1u << 2,
21648ebc1afSAustin Kerbow     MFMA = 1u << 3,
21748ebc1afSAustin Kerbow     VMEM = 1u << 4,
21848ebc1afSAustin Kerbow     VMEM_READ = 1u << 5,
21948ebc1afSAustin Kerbow     VMEM_WRITE = 1u << 6,
22048ebc1afSAustin Kerbow     DS = 1u << 7,
22148ebc1afSAustin Kerbow     DS_READ = 1u << 8,
22248ebc1afSAustin Kerbow     DS_WRITE = 1u << 9,
22348ebc1afSAustin Kerbow     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ DS_WRITE)
22448ebc1afSAustin Kerbow   };
22548ebc1afSAustin Kerbow 
22648ebc1afSAustin Kerbow   // Cache SchedGroups of each type if we have multiple SCHED_BARRIERs in a
22748ebc1afSAustin Kerbow   // region.
22848ebc1afSAustin Kerbow   //
22948ebc1afSAustin Kerbow   std::unique_ptr<SchedGroup> MFMASchedGroup = nullptr;
23048ebc1afSAustin Kerbow   std::unique_ptr<SchedGroup> VALUSchedGroup = nullptr;
23148ebc1afSAustin Kerbow   std::unique_ptr<SchedGroup> SALUSchedGroup = nullptr;
23248ebc1afSAustin Kerbow   std::unique_ptr<SchedGroup> VMEMReadSchedGroup = nullptr;
23348ebc1afSAustin Kerbow   std::unique_ptr<SchedGroup> VMEMWriteSchedGroup = nullptr;
23448ebc1afSAustin Kerbow   std::unique_ptr<SchedGroup> DSWriteSchedGroup = nullptr;
23548ebc1afSAustin Kerbow   std::unique_ptr<SchedGroup> DSReadSchedGroup = nullptr;
23648ebc1afSAustin Kerbow 
23748ebc1afSAustin Kerbow   // Use a SCHED_BARRIER's mask to identify instruction SchedGroups that should
23848ebc1afSAustin Kerbow   // not be reordered accross the SCHED_BARRIER.
23948ebc1afSAustin Kerbow   void getSchedGroupsFromMask(int32_t Mask,
24048ebc1afSAustin Kerbow                               SmallVectorImpl<SchedGroup *> &SchedGroups);
24148ebc1afSAustin Kerbow 
24248ebc1afSAustin Kerbow   // Add DAG edges that enforce SCHED_BARRIER ordering.
24348ebc1afSAustin Kerbow   void addSchedBarrierEdges(SUnit &SU);
24448ebc1afSAustin Kerbow 
24548ebc1afSAustin Kerbow   // Classify instructions and add them to the SchedGroup.
24648ebc1afSAustin Kerbow   void initSchedGroup(SchedGroup *SG);
24748ebc1afSAustin Kerbow 
24848ebc1afSAustin Kerbow   // Remove all existing edges from a SCHED_BARRIER.
24948ebc1afSAustin Kerbow   void resetSchedBarrierEdges(SUnit &SU);
25048ebc1afSAustin Kerbow 
25148ebc1afSAustin Kerbow public:
25248ebc1afSAustin Kerbow   void apply(ScheduleDAGInstrs *DAGInstrs) override;
25348ebc1afSAustin Kerbow 
25448ebc1afSAustin Kerbow   SchedBarrierDAGMutation() = default;
25548ebc1afSAustin Kerbow };
25648ebc1afSAustin Kerbow 
apply(ScheduleDAGInstrs * DAGInstrs)25748ebc1afSAustin Kerbow void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
25848ebc1afSAustin Kerbow   const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
25948ebc1afSAustin Kerbow   TII = ST.getInstrInfo();
26048ebc1afSAustin Kerbow   DAG = static_cast<ScheduleDAGMI *>(DAGInstrs);
26148ebc1afSAustin Kerbow   const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
26248ebc1afSAustin Kerbow   if (!TSchedModel || DAG->SUnits.empty())
26348ebc1afSAustin Kerbow     return;
26448ebc1afSAustin Kerbow 
26548ebc1afSAustin Kerbow   LLVM_DEBUG(dbgs() << "Applying IGroupLPDAGMutation...\n");
26648ebc1afSAustin Kerbow 
26748ebc1afSAustin Kerbow   // The order of InstructionGroups in this vector defines the
26848ebc1afSAustin Kerbow   // order in which edges will be added. In other words, given the
26948ebc1afSAustin Kerbow   // present ordering, we will try to make each VMEMRead instruction
27048ebc1afSAustin Kerbow   // a predecessor of each DSRead instruction, and so on.
27148ebc1afSAustin Kerbow   SmallVector<SchedGroup, 4> PipelineOrderGroups = {
2724bba8211SAustin Kerbow       SchedGroup(isVMEMSGMember, VMEMGroupMaxSize, DAG),
2734bba8211SAustin Kerbow       SchedGroup(isDSReadSGMember, LDRGroupMaxSize, DAG),
2744bba8211SAustin Kerbow       SchedGroup(isMFMASGMember, MFMAGroupMaxSize, DAG),
2754bba8211SAustin Kerbow       SchedGroup(isDSWriteSGMember, LDWGroupMaxSize, DAG)};
27648ebc1afSAustin Kerbow 
27748ebc1afSAustin Kerbow   for (SUnit &SU : DAG->SUnits) {
27848ebc1afSAustin Kerbow     LLVM_DEBUG(dbgs() << "Checking Node"; DAG->dumpNode(SU));
27948ebc1afSAustin Kerbow     for (auto &SG : PipelineOrderGroups)
28048ebc1afSAustin Kerbow       if (SG.canAddSU(SU, TII))
28148ebc1afSAustin Kerbow         SG.add(SU);
28248ebc1afSAustin Kerbow   }
28348ebc1afSAustin Kerbow 
28448ebc1afSAustin Kerbow   for (unsigned i = 0; i < PipelineOrderGroups.size() - 1; i++) {
28548ebc1afSAustin Kerbow     auto &GroupA = PipelineOrderGroups[i];
28648ebc1afSAustin Kerbow     for (unsigned j = i + 1; j < PipelineOrderGroups.size(); j++) {
28748ebc1afSAustin Kerbow       auto &GroupB = PipelineOrderGroups[j];
28848ebc1afSAustin Kerbow       GroupA.link(GroupB);
28948ebc1afSAustin Kerbow     }
29048ebc1afSAustin Kerbow   }
29148ebc1afSAustin Kerbow }
29248ebc1afSAustin Kerbow 
apply(ScheduleDAGInstrs * DAGInstrs)29348ebc1afSAustin Kerbow void SchedBarrierDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
29448ebc1afSAustin Kerbow   const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
29548ebc1afSAustin Kerbow   if (!TSchedModel || DAGInstrs->SUnits.empty())
29648ebc1afSAustin Kerbow     return;
29748ebc1afSAustin Kerbow 
29848ebc1afSAustin Kerbow   LLVM_DEBUG(dbgs() << "Applying SchedBarrierDAGMutation...\n");
29948ebc1afSAustin Kerbow 
30048ebc1afSAustin Kerbow   const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
30148ebc1afSAustin Kerbow   TII = ST.getInstrInfo();
30248ebc1afSAustin Kerbow   DAG = static_cast<ScheduleDAGMI *>(DAGInstrs);
30348ebc1afSAustin Kerbow   for (auto &SU : DAG->SUnits)
30448ebc1afSAustin Kerbow     if (SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER)
30548ebc1afSAustin Kerbow       addSchedBarrierEdges(SU);
30648ebc1afSAustin Kerbow }
30748ebc1afSAustin Kerbow 
addSchedBarrierEdges(SUnit & SchedBarrier)30848ebc1afSAustin Kerbow void SchedBarrierDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
30948ebc1afSAustin Kerbow   MachineInstr &MI = *SchedBarrier.getInstr();
31048ebc1afSAustin Kerbow   assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER);
31148ebc1afSAustin Kerbow   // Remove all existing edges from the SCHED_BARRIER that were added due to the
31248ebc1afSAustin Kerbow   // instruction having side effects.
31348ebc1afSAustin Kerbow   resetSchedBarrierEdges(SchedBarrier);
31448ebc1afSAustin Kerbow   SmallVector<SchedGroup *, 4> SchedGroups;
31548ebc1afSAustin Kerbow   int32_t Mask = MI.getOperand(0).getImm();
31648ebc1afSAustin Kerbow   getSchedGroupsFromMask(Mask, SchedGroups);
31748ebc1afSAustin Kerbow   for (auto SG : SchedGroups)
31848ebc1afSAustin Kerbow     SG->link(
31948ebc1afSAustin Kerbow         SchedBarrier, (function_ref<bool(const SUnit *A, const SUnit *B)>)[](
32048ebc1afSAustin Kerbow                           const SUnit *A, const SUnit *B) {
32148ebc1afSAustin Kerbow           return A->NodeNum > B->NodeNum;
32248ebc1afSAustin Kerbow         });
32348ebc1afSAustin Kerbow }
32448ebc1afSAustin Kerbow 
getSchedGroupsFromMask(int32_t Mask,SmallVectorImpl<SchedGroup * > & SchedGroups)32548ebc1afSAustin Kerbow void SchedBarrierDAGMutation::getSchedGroupsFromMask(
32648ebc1afSAustin Kerbow     int32_t Mask, SmallVectorImpl<SchedGroup *> &SchedGroups) {
32748ebc1afSAustin Kerbow   SchedBarrierMasks SBMask = (SchedBarrierMasks)Mask;
32848ebc1afSAustin Kerbow   // See IntrinsicsAMDGPU.td for an explanation of these masks and their
32948ebc1afSAustin Kerbow   // mappings.
33048ebc1afSAustin Kerbow   //
33148ebc1afSAustin Kerbow   if ((SBMask & SchedBarrierMasks::VALU) == SchedBarrierMasks::NONE &&
33248ebc1afSAustin Kerbow       (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) {
33348ebc1afSAustin Kerbow     if (!VALUSchedGroup) {
33448ebc1afSAustin Kerbow       VALUSchedGroup = std::make_unique<SchedGroup>(isVALUSGMember, None, DAG);
33548ebc1afSAustin Kerbow       initSchedGroup(VALUSchedGroup.get());
33648ebc1afSAustin Kerbow     }
33748ebc1afSAustin Kerbow 
33848ebc1afSAustin Kerbow     SchedGroups.push_back(VALUSchedGroup.get());
33948ebc1afSAustin Kerbow   }
34048ebc1afSAustin Kerbow 
34148ebc1afSAustin Kerbow   if ((SBMask & SchedBarrierMasks::SALU) == SchedBarrierMasks::NONE &&
34248ebc1afSAustin Kerbow       (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) {
34348ebc1afSAustin Kerbow     if (!SALUSchedGroup) {
34448ebc1afSAustin Kerbow       SALUSchedGroup = std::make_unique<SchedGroup>(isSALUSGMember, None, DAG);
34548ebc1afSAustin Kerbow       initSchedGroup(SALUSchedGroup.get());
34648ebc1afSAustin Kerbow     }
34748ebc1afSAustin Kerbow 
34848ebc1afSAustin Kerbow     SchedGroups.push_back(SALUSchedGroup.get());
34948ebc1afSAustin Kerbow   }
35048ebc1afSAustin Kerbow 
35148ebc1afSAustin Kerbow   if ((SBMask & SchedBarrierMasks::MFMA) == SchedBarrierMasks::NONE &&
35248ebc1afSAustin Kerbow       (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) {
35348ebc1afSAustin Kerbow     if (!MFMASchedGroup) {
35448ebc1afSAustin Kerbow       MFMASchedGroup = std::make_unique<SchedGroup>(isMFMASGMember, None, DAG);
35548ebc1afSAustin Kerbow       initSchedGroup(MFMASchedGroup.get());
35648ebc1afSAustin Kerbow     }
35748ebc1afSAustin Kerbow 
35848ebc1afSAustin Kerbow     SchedGroups.push_back(MFMASchedGroup.get());
35948ebc1afSAustin Kerbow   }
36048ebc1afSAustin Kerbow 
36148ebc1afSAustin Kerbow   if ((SBMask & SchedBarrierMasks::VMEM_READ) == SchedBarrierMasks::NONE &&
36248ebc1afSAustin Kerbow       (SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) {
36348ebc1afSAustin Kerbow     if (!VMEMReadSchedGroup) {
36448ebc1afSAustin Kerbow       VMEMReadSchedGroup =
36548ebc1afSAustin Kerbow           std::make_unique<SchedGroup>(isVMEMReadSGMember, None, DAG);
36648ebc1afSAustin Kerbow       initSchedGroup(VMEMReadSchedGroup.get());
36748ebc1afSAustin Kerbow     }
36848ebc1afSAustin Kerbow 
36948ebc1afSAustin Kerbow     SchedGroups.push_back(VMEMReadSchedGroup.get());
37048ebc1afSAustin Kerbow   }
37148ebc1afSAustin Kerbow 
37248ebc1afSAustin Kerbow   if ((SBMask & SchedBarrierMasks::VMEM_WRITE) == SchedBarrierMasks::NONE &&
37348ebc1afSAustin Kerbow       (SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) {
37448ebc1afSAustin Kerbow     if (!VMEMWriteSchedGroup) {
37548ebc1afSAustin Kerbow       VMEMWriteSchedGroup =
37648ebc1afSAustin Kerbow           std::make_unique<SchedGroup>(isVMEMWriteSGMember, None, DAG);
37748ebc1afSAustin Kerbow       initSchedGroup(VMEMWriteSchedGroup.get());
37848ebc1afSAustin Kerbow     }
37948ebc1afSAustin Kerbow 
38048ebc1afSAustin Kerbow     SchedGroups.push_back(VMEMWriteSchedGroup.get());
38148ebc1afSAustin Kerbow   }
38248ebc1afSAustin Kerbow 
38348ebc1afSAustin Kerbow   if ((SBMask & SchedBarrierMasks::DS_READ) == SchedBarrierMasks::NONE &&
38448ebc1afSAustin Kerbow       (SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) {
38548ebc1afSAustin Kerbow     if (!DSReadSchedGroup) {
38648ebc1afSAustin Kerbow       DSReadSchedGroup =
38748ebc1afSAustin Kerbow           std::make_unique<SchedGroup>(isDSReadSGMember, None, DAG);
38848ebc1afSAustin Kerbow       initSchedGroup(DSReadSchedGroup.get());
38948ebc1afSAustin Kerbow     }
39048ebc1afSAustin Kerbow 
39148ebc1afSAustin Kerbow     SchedGroups.push_back(DSReadSchedGroup.get());
39248ebc1afSAustin Kerbow   }
39348ebc1afSAustin Kerbow 
39448ebc1afSAustin Kerbow   if ((SBMask & SchedBarrierMasks::DS_WRITE) == SchedBarrierMasks::NONE &&
39548ebc1afSAustin Kerbow       (SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) {
39648ebc1afSAustin Kerbow     if (!DSWriteSchedGroup) {
39748ebc1afSAustin Kerbow       DSWriteSchedGroup =
39848ebc1afSAustin Kerbow           std::make_unique<SchedGroup>(isDSWriteSGMember, None, DAG);
39948ebc1afSAustin Kerbow       initSchedGroup(DSWriteSchedGroup.get());
40048ebc1afSAustin Kerbow     }
40148ebc1afSAustin Kerbow 
40248ebc1afSAustin Kerbow     SchedGroups.push_back(DSWriteSchedGroup.get());
40348ebc1afSAustin Kerbow   }
40448ebc1afSAustin Kerbow }
40548ebc1afSAustin Kerbow 
initSchedGroup(SchedGroup * SG)40648ebc1afSAustin Kerbow void SchedBarrierDAGMutation::initSchedGroup(SchedGroup *SG) {
40748ebc1afSAustin Kerbow   assert(SG);
40848ebc1afSAustin Kerbow   for (auto &SU : DAG->SUnits)
40948ebc1afSAustin Kerbow     if (SG->canAddSU(SU, TII))
41048ebc1afSAustin Kerbow       SG->add(SU);
41148ebc1afSAustin Kerbow }
41248ebc1afSAustin Kerbow 
resetSchedBarrierEdges(SUnit & SU)41348ebc1afSAustin Kerbow void SchedBarrierDAGMutation::resetSchedBarrierEdges(SUnit &SU) {
41448ebc1afSAustin Kerbow   assert(SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER);
41548ebc1afSAustin Kerbow   for (auto &P : SU.Preds)
41648ebc1afSAustin Kerbow     SU.removePred(P);
41748ebc1afSAustin Kerbow 
41848ebc1afSAustin Kerbow   for (auto &S : SU.Succs) {
41948ebc1afSAustin Kerbow     for (auto &SP : S.getSUnit()->Preds) {
42048ebc1afSAustin Kerbow       if (SP.getSUnit() == &SU) {
42148ebc1afSAustin Kerbow         S.getSUnit()->removePred(SP);
42248ebc1afSAustin Kerbow       }
42348ebc1afSAustin Kerbow     }
42448ebc1afSAustin Kerbow   }
42548ebc1afSAustin Kerbow }
42648ebc1afSAustin Kerbow 
42748ebc1afSAustin Kerbow } // namespace
42848ebc1afSAustin Kerbow 
42948ebc1afSAustin Kerbow namespace llvm {
43048ebc1afSAustin Kerbow 
createIGroupLPDAGMutation()43148ebc1afSAustin Kerbow std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation() {
43248ebc1afSAustin Kerbow   return EnableIGroupLP ? std::make_unique<IGroupLPDAGMutation>() : nullptr;
43348ebc1afSAustin Kerbow }
43448ebc1afSAustin Kerbow 
createSchedBarrierDAGMutation()43548ebc1afSAustin Kerbow std::unique_ptr<ScheduleDAGMutation> createSchedBarrierDAGMutation() {
43648ebc1afSAustin Kerbow   return std::make_unique<SchedBarrierDAGMutation>();
43748ebc1afSAustin Kerbow }
43848ebc1afSAustin Kerbow 
43948ebc1afSAustin Kerbow } // end namespace llvm
440