18f0fd8f6SDimitry Andric //===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
28f0fd8f6SDimitry Andric //
38f0fd8f6SDimitry Andric // The LLVM Compiler Infrastructure
48f0fd8f6SDimitry Andric //
58f0fd8f6SDimitry Andric // This file is distributed under the University of Illinois Open Source
68f0fd8f6SDimitry Andric // License. See LICENSE.TXT for details.
78f0fd8f6SDimitry Andric //
88f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
98f0fd8f6SDimitry Andric //
108f0fd8f6SDimitry Andric /// \file
114ba319b5SDimitry Andric /// R600 Machine Scheduler interface
128f0fd8f6SDimitry Andric //
138f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
148f0fd8f6SDimitry Andric
158f0fd8f6SDimitry Andric #include "R600MachineScheduler.h"
168f0fd8f6SDimitry Andric #include "AMDGPUSubtarget.h"
17db17bf38SDimitry Andric #include "R600InstrInfo.h"
184ba319b5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
198f0fd8f6SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
208f0fd8f6SDimitry Andric #include "llvm/IR/LegacyPassManager.h"
21db17bf38SDimitry Andric #include "llvm/Pass.h"
228f0fd8f6SDimitry Andric #include "llvm/Support/raw_ostream.h"
238f0fd8f6SDimitry Andric
248f0fd8f6SDimitry Andric using namespace llvm;
258f0fd8f6SDimitry Andric
26c4394386SDimitry Andric #define DEBUG_TYPE "machine-scheduler"
278f0fd8f6SDimitry Andric
initialize(ScheduleDAGMI * dag)288f0fd8f6SDimitry Andric void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
298f0fd8f6SDimitry Andric assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness");
308f0fd8f6SDimitry Andric DAG = static_cast<ScheduleDAGMILive*>(dag);
313ca95b02SDimitry Andric const R600Subtarget &ST = DAG->MF.getSubtarget<R600Subtarget>();
328f0fd8f6SDimitry Andric TII = static_cast<const R600InstrInfo*>(DAG->TII);
338f0fd8f6SDimitry Andric TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
348f0fd8f6SDimitry Andric VLIW5 = !ST.hasCaymanISA();
358f0fd8f6SDimitry Andric MRI = &DAG->MRI;
368f0fd8f6SDimitry Andric CurInstKind = IDOther;
378f0fd8f6SDimitry Andric CurEmitted = 0;
388f0fd8f6SDimitry Andric OccupedSlotsMask = 31;
398f0fd8f6SDimitry Andric InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
408f0fd8f6SDimitry Andric InstKindLimit[IDOther] = 32;
418f0fd8f6SDimitry Andric InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
428f0fd8f6SDimitry Andric AluInstCount = 0;
438f0fd8f6SDimitry Andric FetchInstCount = 0;
448f0fd8f6SDimitry Andric }
458f0fd8f6SDimitry Andric
MoveUnits(std::vector<SUnit * > & QSrc,std::vector<SUnit * > & QDst)468f0fd8f6SDimitry Andric void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
478f0fd8f6SDimitry Andric std::vector<SUnit *> &QDst)
488f0fd8f6SDimitry Andric {
498f0fd8f6SDimitry Andric QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
508f0fd8f6SDimitry Andric QSrc.clear();
518f0fd8f6SDimitry Andric }
528f0fd8f6SDimitry Andric
getWFCountLimitedByGPR(unsigned GPRCount)533ca95b02SDimitry Andric static unsigned getWFCountLimitedByGPR(unsigned GPRCount) {
548f0fd8f6SDimitry Andric assert (GPRCount && "GPRCount cannot be 0");
558f0fd8f6SDimitry Andric return 248 / GPRCount;
568f0fd8f6SDimitry Andric }
578f0fd8f6SDimitry Andric
pickNode(bool & IsTopNode)588f0fd8f6SDimitry Andric SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
598f0fd8f6SDimitry Andric SUnit *SU = nullptr;
608f0fd8f6SDimitry Andric NextInstKind = IDOther;
618f0fd8f6SDimitry Andric
628f0fd8f6SDimitry Andric IsTopNode = false;
638f0fd8f6SDimitry Andric
648f0fd8f6SDimitry Andric // check if we might want to switch current clause type
658f0fd8f6SDimitry Andric bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
668f0fd8f6SDimitry Andric (Available[CurInstKind].empty());
678f0fd8f6SDimitry Andric bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
688f0fd8f6SDimitry Andric (!Available[IDFetch].empty() || !Available[IDOther].empty());
698f0fd8f6SDimitry Andric
708f0fd8f6SDimitry Andric if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
718f0fd8f6SDimitry Andric // We use the heuristic provided by AMD Accelerated Parallel Processing
728f0fd8f6SDimitry Andric // OpenCL Programming Guide :
738f0fd8f6SDimitry Andric // The approx. number of WF that allows TEX inst to hide ALU inst is :
748f0fd8f6SDimitry Andric // 500 (cycles for TEX) / (AluFetchRatio * 8 (cycles for ALU))
758f0fd8f6SDimitry Andric float ALUFetchRationEstimate =
768f0fd8f6SDimitry Andric (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
778f0fd8f6SDimitry Andric (FetchInstCount + Available[IDFetch].size());
788f0fd8f6SDimitry Andric if (ALUFetchRationEstimate == 0) {
798f0fd8f6SDimitry Andric AllowSwitchFromAlu = true;
808f0fd8f6SDimitry Andric } else {
818f0fd8f6SDimitry Andric unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
824ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << NeededWF << " approx. Wavefronts Required\n");
838f0fd8f6SDimitry Andric // We assume the local GPR requirements to be "dominated" by the requirement
848f0fd8f6SDimitry Andric // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and
858f0fd8f6SDimitry Andric // after TEX are indeed likely to consume or generate values from/for the
868f0fd8f6SDimitry Andric // TEX clause.
878f0fd8f6SDimitry Andric // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause
888f0fd8f6SDimitry Andric // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need
898f0fd8f6SDimitry Andric // one GPR) or TmXYZW = TnXYZW (need 2 GPR).
908f0fd8f6SDimitry Andric // (TODO : use RegisterPressure)
918f0fd8f6SDimitry Andric // If we are going too use too many GPR, we flush Fetch instruction to lower
928f0fd8f6SDimitry Andric // register pressure on 128 bits regs.
938f0fd8f6SDimitry Andric unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
948f0fd8f6SDimitry Andric if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement))
958f0fd8f6SDimitry Andric AllowSwitchFromAlu = true;
968f0fd8f6SDimitry Andric }
978f0fd8f6SDimitry Andric }
988f0fd8f6SDimitry Andric
998f0fd8f6SDimitry Andric if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
1008f0fd8f6SDimitry Andric (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
1018f0fd8f6SDimitry Andric // try to pick ALU
1028f0fd8f6SDimitry Andric SU = pickAlu();
1038f0fd8f6SDimitry Andric if (!SU && !PhysicalRegCopy.empty()) {
1048f0fd8f6SDimitry Andric SU = PhysicalRegCopy.front();
1058f0fd8f6SDimitry Andric PhysicalRegCopy.erase(PhysicalRegCopy.begin());
1068f0fd8f6SDimitry Andric }
1078f0fd8f6SDimitry Andric if (SU) {
1088f0fd8f6SDimitry Andric if (CurEmitted >= InstKindLimit[IDAlu])
1098f0fd8f6SDimitry Andric CurEmitted = 0;
1108f0fd8f6SDimitry Andric NextInstKind = IDAlu;
1118f0fd8f6SDimitry Andric }
1128f0fd8f6SDimitry Andric }
1138f0fd8f6SDimitry Andric
1148f0fd8f6SDimitry Andric if (!SU) {
1158f0fd8f6SDimitry Andric // try to pick FETCH
1168f0fd8f6SDimitry Andric SU = pickOther(IDFetch);
1178f0fd8f6SDimitry Andric if (SU)
1188f0fd8f6SDimitry Andric NextInstKind = IDFetch;
1198f0fd8f6SDimitry Andric }
1208f0fd8f6SDimitry Andric
1218f0fd8f6SDimitry Andric // try to pick other
1228f0fd8f6SDimitry Andric if (!SU) {
1238f0fd8f6SDimitry Andric SU = pickOther(IDOther);
1248f0fd8f6SDimitry Andric if (SU)
1258f0fd8f6SDimitry Andric NextInstKind = IDOther;
1268f0fd8f6SDimitry Andric }
1278f0fd8f6SDimitry Andric
1284ba319b5SDimitry Andric LLVM_DEBUG(if (SU) {
1298f0fd8f6SDimitry Andric dbgs() << " ** Pick node **\n";
130*b5893f02SDimitry Andric DAG->dumpNode(*SU);
1318f0fd8f6SDimitry Andric } else {
1328f0fd8f6SDimitry Andric dbgs() << "NO NODE \n";
1338f0fd8f6SDimitry Andric for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
1348f0fd8f6SDimitry Andric const SUnit &S = DAG->SUnits[i];
1358f0fd8f6SDimitry Andric if (!S.isScheduled)
136*b5893f02SDimitry Andric DAG->dumpNode(S);
1378f0fd8f6SDimitry Andric }
1384ba319b5SDimitry Andric });
1398f0fd8f6SDimitry Andric
1408f0fd8f6SDimitry Andric return SU;
1418f0fd8f6SDimitry Andric }
1428f0fd8f6SDimitry Andric
schedNode(SUnit * SU,bool IsTopNode)1438f0fd8f6SDimitry Andric void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
1448f0fd8f6SDimitry Andric if (NextInstKind != CurInstKind) {
1454ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Instruction Type Switch\n");
1468f0fd8f6SDimitry Andric if (NextInstKind != IDAlu)
1478f0fd8f6SDimitry Andric OccupedSlotsMask |= 31;
1488f0fd8f6SDimitry Andric CurEmitted = 0;
1498f0fd8f6SDimitry Andric CurInstKind = NextInstKind;
1508f0fd8f6SDimitry Andric }
1518f0fd8f6SDimitry Andric
1528f0fd8f6SDimitry Andric if (CurInstKind == IDAlu) {
1538f0fd8f6SDimitry Andric AluInstCount ++;
1548f0fd8f6SDimitry Andric switch (getAluKind(SU)) {
1558f0fd8f6SDimitry Andric case AluT_XYZW:
1568f0fd8f6SDimitry Andric CurEmitted += 4;
1578f0fd8f6SDimitry Andric break;
1588f0fd8f6SDimitry Andric case AluDiscarded:
1598f0fd8f6SDimitry Andric break;
1608f0fd8f6SDimitry Andric default: {
1618f0fd8f6SDimitry Andric ++CurEmitted;
1628f0fd8f6SDimitry Andric for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
1638f0fd8f6SDimitry Andric E = SU->getInstr()->operands_end(); It != E; ++It) {
1648f0fd8f6SDimitry Andric MachineOperand &MO = *It;
1654ba319b5SDimitry Andric if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
1668f0fd8f6SDimitry Andric ++CurEmitted;
1678f0fd8f6SDimitry Andric }
1688f0fd8f6SDimitry Andric }
1698f0fd8f6SDimitry Andric }
1708f0fd8f6SDimitry Andric } else {
1718f0fd8f6SDimitry Andric ++CurEmitted;
1728f0fd8f6SDimitry Andric }
1738f0fd8f6SDimitry Andric
1744ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
1758f0fd8f6SDimitry Andric
1768f0fd8f6SDimitry Andric if (CurInstKind != IDFetch) {
1778f0fd8f6SDimitry Andric MoveUnits(Pending[IDFetch], Available[IDFetch]);
1788f0fd8f6SDimitry Andric } else
1798f0fd8f6SDimitry Andric FetchInstCount++;
1808f0fd8f6SDimitry Andric }
1818f0fd8f6SDimitry Andric
1828f0fd8f6SDimitry Andric static bool
isPhysicalRegCopy(MachineInstr * MI)1838f0fd8f6SDimitry Andric isPhysicalRegCopy(MachineInstr *MI) {
1844ba319b5SDimitry Andric if (MI->getOpcode() != R600::COPY)
1858f0fd8f6SDimitry Andric return false;
1868f0fd8f6SDimitry Andric
1878f0fd8f6SDimitry Andric return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
1888f0fd8f6SDimitry Andric }
1898f0fd8f6SDimitry Andric
releaseTopNode(SUnit * SU)1908f0fd8f6SDimitry Andric void R600SchedStrategy::releaseTopNode(SUnit *SU) {
191*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Top Releasing "; DAG->dumpNode(*SU));
1928f0fd8f6SDimitry Andric }
1938f0fd8f6SDimitry Andric
releaseBottomNode(SUnit * SU)1948f0fd8f6SDimitry Andric void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
195*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Bottom Releasing "; DAG->dumpNode(*SU));
1968f0fd8f6SDimitry Andric if (isPhysicalRegCopy(SU->getInstr())) {
1978f0fd8f6SDimitry Andric PhysicalRegCopy.push_back(SU);
1988f0fd8f6SDimitry Andric return;
1998f0fd8f6SDimitry Andric }
2008f0fd8f6SDimitry Andric
2018f0fd8f6SDimitry Andric int IK = getInstKind(SU);
2028f0fd8f6SDimitry Andric
2038f0fd8f6SDimitry Andric // There is no export clause, we can schedule one as soon as its ready
2048f0fd8f6SDimitry Andric if (IK == IDOther)
2058f0fd8f6SDimitry Andric Available[IDOther].push_back(SU);
2068f0fd8f6SDimitry Andric else
2078f0fd8f6SDimitry Andric Pending[IK].push_back(SU);
2088f0fd8f6SDimitry Andric
2098f0fd8f6SDimitry Andric }
2108f0fd8f6SDimitry Andric
regBelongsToClass(unsigned Reg,const TargetRegisterClass * RC) const2118f0fd8f6SDimitry Andric bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
2128f0fd8f6SDimitry Andric const TargetRegisterClass *RC) const {
2138f0fd8f6SDimitry Andric if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
2148f0fd8f6SDimitry Andric return RC->contains(Reg);
2158f0fd8f6SDimitry Andric } else {
2168f0fd8f6SDimitry Andric return MRI->getRegClass(Reg) == RC;
2178f0fd8f6SDimitry Andric }
2188f0fd8f6SDimitry Andric }
2198f0fd8f6SDimitry Andric
getAluKind(SUnit * SU) const2208f0fd8f6SDimitry Andric R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
2218f0fd8f6SDimitry Andric MachineInstr *MI = SU->getInstr();
2228f0fd8f6SDimitry Andric
2233ca95b02SDimitry Andric if (TII->isTransOnly(*MI))
2248f0fd8f6SDimitry Andric return AluTrans;
2258f0fd8f6SDimitry Andric
2268f0fd8f6SDimitry Andric switch (MI->getOpcode()) {
2274ba319b5SDimitry Andric case R600::PRED_X:
2288f0fd8f6SDimitry Andric return AluPredX;
2294ba319b5SDimitry Andric case R600::INTERP_PAIR_XY:
2304ba319b5SDimitry Andric case R600::INTERP_PAIR_ZW:
2314ba319b5SDimitry Andric case R600::INTERP_VEC_LOAD:
2324ba319b5SDimitry Andric case R600::DOT_4:
2338f0fd8f6SDimitry Andric return AluT_XYZW;
2344ba319b5SDimitry Andric case R600::COPY:
2358f0fd8f6SDimitry Andric if (MI->getOperand(1).isUndef()) {
2368f0fd8f6SDimitry Andric // MI will become a KILL, don't considers it in scheduling
2378f0fd8f6SDimitry Andric return AluDiscarded;
2388f0fd8f6SDimitry Andric }
239*b5893f02SDimitry Andric break;
2408f0fd8f6SDimitry Andric default:
2418f0fd8f6SDimitry Andric break;
2428f0fd8f6SDimitry Andric }
2438f0fd8f6SDimitry Andric
2448f0fd8f6SDimitry Andric // Does the instruction take a whole IG ?
2458f0fd8f6SDimitry Andric // XXX: Is it possible to add a helper function in R600InstrInfo that can
2468f0fd8f6SDimitry Andric // be used here and in R600PacketizerList::isSoloInstruction() ?
2478f0fd8f6SDimitry Andric if(TII->isVector(*MI) ||
2488f0fd8f6SDimitry Andric TII->isCubeOp(MI->getOpcode()) ||
2498f0fd8f6SDimitry Andric TII->isReductionOp(MI->getOpcode()) ||
2504ba319b5SDimitry Andric MI->getOpcode() == R600::GROUP_BARRIER) {
2518f0fd8f6SDimitry Andric return AluT_XYZW;
2528f0fd8f6SDimitry Andric }
2538f0fd8f6SDimitry Andric
2548f0fd8f6SDimitry Andric if (TII->isLDSInstr(MI->getOpcode())) {
2558f0fd8f6SDimitry Andric return AluT_X;
2568f0fd8f6SDimitry Andric }
2578f0fd8f6SDimitry Andric
2588f0fd8f6SDimitry Andric // Is the result already assigned to a channel ?
2598f0fd8f6SDimitry Andric unsigned DestSubReg = MI->getOperand(0).getSubReg();
2608f0fd8f6SDimitry Andric switch (DestSubReg) {
2614ba319b5SDimitry Andric case R600::sub0:
2628f0fd8f6SDimitry Andric return AluT_X;
2634ba319b5SDimitry Andric case R600::sub1:
2648f0fd8f6SDimitry Andric return AluT_Y;
2654ba319b5SDimitry Andric case R600::sub2:
2668f0fd8f6SDimitry Andric return AluT_Z;
2674ba319b5SDimitry Andric case R600::sub3:
2688f0fd8f6SDimitry Andric return AluT_W;
2698f0fd8f6SDimitry Andric default:
2708f0fd8f6SDimitry Andric break;
2718f0fd8f6SDimitry Andric }
2728f0fd8f6SDimitry Andric
2738f0fd8f6SDimitry Andric // Is the result already member of a X/Y/Z/W class ?
2748f0fd8f6SDimitry Andric unsigned DestReg = MI->getOperand(0).getReg();
2754ba319b5SDimitry Andric if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
2764ba319b5SDimitry Andric regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
2778f0fd8f6SDimitry Andric return AluT_X;
2784ba319b5SDimitry Andric if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
2798f0fd8f6SDimitry Andric return AluT_Y;
2804ba319b5SDimitry Andric if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
2818f0fd8f6SDimitry Andric return AluT_Z;
2824ba319b5SDimitry Andric if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
2838f0fd8f6SDimitry Andric return AluT_W;
2844ba319b5SDimitry Andric if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
2858f0fd8f6SDimitry Andric return AluT_XYZW;
2868f0fd8f6SDimitry Andric
2878f0fd8f6SDimitry Andric // LDS src registers cannot be used in the Trans slot.
2883ca95b02SDimitry Andric if (TII->readsLDSSrcReg(*MI))
2898f0fd8f6SDimitry Andric return AluT_XYZW;
2908f0fd8f6SDimitry Andric
2918f0fd8f6SDimitry Andric return AluAny;
2928f0fd8f6SDimitry Andric }
2938f0fd8f6SDimitry Andric
getInstKind(SUnit * SU)2948f0fd8f6SDimitry Andric int R600SchedStrategy::getInstKind(SUnit* SU) {
2958f0fd8f6SDimitry Andric int Opcode = SU->getInstr()->getOpcode();
2968f0fd8f6SDimitry Andric
2978f0fd8f6SDimitry Andric if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
2988f0fd8f6SDimitry Andric return IDFetch;
2998f0fd8f6SDimitry Andric
3008f0fd8f6SDimitry Andric if (TII->isALUInstr(Opcode)) {
3018f0fd8f6SDimitry Andric return IDAlu;
3028f0fd8f6SDimitry Andric }
3038f0fd8f6SDimitry Andric
3048f0fd8f6SDimitry Andric switch (Opcode) {
3054ba319b5SDimitry Andric case R600::PRED_X:
3064ba319b5SDimitry Andric case R600::COPY:
3074ba319b5SDimitry Andric case R600::CONST_COPY:
3084ba319b5SDimitry Andric case R600::INTERP_PAIR_XY:
3094ba319b5SDimitry Andric case R600::INTERP_PAIR_ZW:
3104ba319b5SDimitry Andric case R600::INTERP_VEC_LOAD:
3114ba319b5SDimitry Andric case R600::DOT_4:
3128f0fd8f6SDimitry Andric return IDAlu;
3138f0fd8f6SDimitry Andric default:
3148f0fd8f6SDimitry Andric return IDOther;
3158f0fd8f6SDimitry Andric }
3168f0fd8f6SDimitry Andric }
3178f0fd8f6SDimitry Andric
PopInst(std::vector<SUnit * > & Q,bool AnyALU)3188f0fd8f6SDimitry Andric SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
3198f0fd8f6SDimitry Andric if (Q.empty())
3208f0fd8f6SDimitry Andric return nullptr;
3218f0fd8f6SDimitry Andric for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
3228f0fd8f6SDimitry Andric It != E; ++It) {
3238f0fd8f6SDimitry Andric SUnit *SU = *It;
3248f0fd8f6SDimitry Andric InstructionsGroupCandidate.push_back(SU->getInstr());
3253ca95b02SDimitry Andric if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) &&
3263ca95b02SDimitry Andric (!AnyALU || !TII->isVectorOnly(*SU->getInstr()))) {
3278f0fd8f6SDimitry Andric InstructionsGroupCandidate.pop_back();
3288f0fd8f6SDimitry Andric Q.erase((It + 1).base());
3298f0fd8f6SDimitry Andric return SU;
3308f0fd8f6SDimitry Andric } else {
3318f0fd8f6SDimitry Andric InstructionsGroupCandidate.pop_back();
3328f0fd8f6SDimitry Andric }
3338f0fd8f6SDimitry Andric }
3348f0fd8f6SDimitry Andric return nullptr;
3358f0fd8f6SDimitry Andric }
3368f0fd8f6SDimitry Andric
LoadAlu()3378f0fd8f6SDimitry Andric void R600SchedStrategy::LoadAlu() {
3388f0fd8f6SDimitry Andric std::vector<SUnit *> &QSrc = Pending[IDAlu];
3398f0fd8f6SDimitry Andric for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
3408f0fd8f6SDimitry Andric AluKind AK = getAluKind(QSrc[i]);
3418f0fd8f6SDimitry Andric AvailableAlus[AK].push_back(QSrc[i]);
3428f0fd8f6SDimitry Andric }
3438f0fd8f6SDimitry Andric QSrc.clear();
3448f0fd8f6SDimitry Andric }
3458f0fd8f6SDimitry Andric
PrepareNextSlot()3468f0fd8f6SDimitry Andric void R600SchedStrategy::PrepareNextSlot() {
3474ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "New Slot\n");
3488f0fd8f6SDimitry Andric assert (OccupedSlotsMask && "Slot wasn't filled");
3498f0fd8f6SDimitry Andric OccupedSlotsMask = 0;
3504ba319b5SDimitry Andric // if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
3518f0fd8f6SDimitry Andric // OccupedSlotsMask |= 16;
3528f0fd8f6SDimitry Andric InstructionsGroupCandidate.clear();
3538f0fd8f6SDimitry Andric LoadAlu();
3548f0fd8f6SDimitry Andric }
3558f0fd8f6SDimitry Andric
AssignSlot(MachineInstr * MI,unsigned Slot)3568f0fd8f6SDimitry Andric void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
3574ba319b5SDimitry Andric int DstIndex = TII->getOperandIdx(MI->getOpcode(), R600::OpName::dst);
3588f0fd8f6SDimitry Andric if (DstIndex == -1) {
3598f0fd8f6SDimitry Andric return;
3608f0fd8f6SDimitry Andric }
3618f0fd8f6SDimitry Andric unsigned DestReg = MI->getOperand(DstIndex).getReg();
3628f0fd8f6SDimitry Andric // PressureRegister crashes if an operand is def and used in the same inst
3638f0fd8f6SDimitry Andric // and we try to constraint its regclass
3648f0fd8f6SDimitry Andric for (MachineInstr::mop_iterator It = MI->operands_begin(),
3658f0fd8f6SDimitry Andric E = MI->operands_end(); It != E; ++It) {
3668f0fd8f6SDimitry Andric MachineOperand &MO = *It;
3678f0fd8f6SDimitry Andric if (MO.isReg() && !MO.isDef() &&
3688f0fd8f6SDimitry Andric MO.getReg() == DestReg)
3698f0fd8f6SDimitry Andric return;
3708f0fd8f6SDimitry Andric }
3718f0fd8f6SDimitry Andric // Constrains the regclass of DestReg to assign it to Slot
3728f0fd8f6SDimitry Andric switch (Slot) {
3738f0fd8f6SDimitry Andric case 0:
3744ba319b5SDimitry Andric MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass);
3758f0fd8f6SDimitry Andric break;
3768f0fd8f6SDimitry Andric case 1:
3774ba319b5SDimitry Andric MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass);
3788f0fd8f6SDimitry Andric break;
3798f0fd8f6SDimitry Andric case 2:
3804ba319b5SDimitry Andric MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass);
3818f0fd8f6SDimitry Andric break;
3828f0fd8f6SDimitry Andric case 3:
3834ba319b5SDimitry Andric MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass);
3848f0fd8f6SDimitry Andric break;
3858f0fd8f6SDimitry Andric }
3868f0fd8f6SDimitry Andric }
3878f0fd8f6SDimitry Andric
AttemptFillSlot(unsigned Slot,bool AnyAlu)3888f0fd8f6SDimitry Andric SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {
3898f0fd8f6SDimitry Andric static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
3908f0fd8f6SDimitry Andric SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
3918f0fd8f6SDimitry Andric if (SlotedSU)
3928f0fd8f6SDimitry Andric return SlotedSU;
3938f0fd8f6SDimitry Andric SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
3948f0fd8f6SDimitry Andric if (UnslotedSU)
3958f0fd8f6SDimitry Andric AssignSlot(UnslotedSU->getInstr(), Slot);
3968f0fd8f6SDimitry Andric return UnslotedSU;
3978f0fd8f6SDimitry Andric }
3988f0fd8f6SDimitry Andric
AvailablesAluCount() const3998f0fd8f6SDimitry Andric unsigned R600SchedStrategy::AvailablesAluCount() const {
4008f0fd8f6SDimitry Andric return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
4018f0fd8f6SDimitry Andric AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
4028f0fd8f6SDimitry Andric AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
4038f0fd8f6SDimitry Andric AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
4048f0fd8f6SDimitry Andric AvailableAlus[AluPredX].size();
4058f0fd8f6SDimitry Andric }
4068f0fd8f6SDimitry Andric
pickAlu()4078f0fd8f6SDimitry Andric SUnit* R600SchedStrategy::pickAlu() {
4088f0fd8f6SDimitry Andric while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
4098f0fd8f6SDimitry Andric if (!OccupedSlotsMask) {
4108f0fd8f6SDimitry Andric // Bottom up scheduling : predX must comes first
4118f0fd8f6SDimitry Andric if (!AvailableAlus[AluPredX].empty()) {
4128f0fd8f6SDimitry Andric OccupedSlotsMask |= 31;
4138f0fd8f6SDimitry Andric return PopInst(AvailableAlus[AluPredX], false);
4148f0fd8f6SDimitry Andric }
4158f0fd8f6SDimitry Andric // Flush physical reg copies (RA will discard them)
4168f0fd8f6SDimitry Andric if (!AvailableAlus[AluDiscarded].empty()) {
4178f0fd8f6SDimitry Andric OccupedSlotsMask |= 31;
4188f0fd8f6SDimitry Andric return PopInst(AvailableAlus[AluDiscarded], false);
4198f0fd8f6SDimitry Andric }
4208f0fd8f6SDimitry Andric // If there is a T_XYZW alu available, use it
4218f0fd8f6SDimitry Andric if (!AvailableAlus[AluT_XYZW].empty()) {
4228f0fd8f6SDimitry Andric OccupedSlotsMask |= 15;
4238f0fd8f6SDimitry Andric return PopInst(AvailableAlus[AluT_XYZW], false);
4248f0fd8f6SDimitry Andric }
4258f0fd8f6SDimitry Andric }
4268f0fd8f6SDimitry Andric bool TransSlotOccuped = OccupedSlotsMask & 16;
4278f0fd8f6SDimitry Andric if (!TransSlotOccuped && VLIW5) {
4288f0fd8f6SDimitry Andric if (!AvailableAlus[AluTrans].empty()) {
4298f0fd8f6SDimitry Andric OccupedSlotsMask |= 16;
4308f0fd8f6SDimitry Andric return PopInst(AvailableAlus[AluTrans], false);
4318f0fd8f6SDimitry Andric }
4328f0fd8f6SDimitry Andric SUnit *SU = AttemptFillSlot(3, true);
4338f0fd8f6SDimitry Andric if (SU) {
4348f0fd8f6SDimitry Andric OccupedSlotsMask |= 16;
4358f0fd8f6SDimitry Andric return SU;
4368f0fd8f6SDimitry Andric }
4378f0fd8f6SDimitry Andric }
4388f0fd8f6SDimitry Andric for (int Chan = 3; Chan > -1; --Chan) {
4398f0fd8f6SDimitry Andric bool isOccupied = OccupedSlotsMask & (1 << Chan);
4408f0fd8f6SDimitry Andric if (!isOccupied) {
4418f0fd8f6SDimitry Andric SUnit *SU = AttemptFillSlot(Chan, false);
4428f0fd8f6SDimitry Andric if (SU) {
4438f0fd8f6SDimitry Andric OccupedSlotsMask |= (1 << Chan);
4448f0fd8f6SDimitry Andric InstructionsGroupCandidate.push_back(SU->getInstr());
4458f0fd8f6SDimitry Andric return SU;
4468f0fd8f6SDimitry Andric }
4478f0fd8f6SDimitry Andric }
4488f0fd8f6SDimitry Andric }
4498f0fd8f6SDimitry Andric PrepareNextSlot();
4508f0fd8f6SDimitry Andric }
4518f0fd8f6SDimitry Andric return nullptr;
4528f0fd8f6SDimitry Andric }
4538f0fd8f6SDimitry Andric
pickOther(int QID)4548f0fd8f6SDimitry Andric SUnit* R600SchedStrategy::pickOther(int QID) {
4558f0fd8f6SDimitry Andric SUnit *SU = nullptr;
4568f0fd8f6SDimitry Andric std::vector<SUnit *> &AQ = Available[QID];
4578f0fd8f6SDimitry Andric
4588f0fd8f6SDimitry Andric if (AQ.empty()) {
4598f0fd8f6SDimitry Andric MoveUnits(Pending[QID], AQ);
4608f0fd8f6SDimitry Andric }
4618f0fd8f6SDimitry Andric if (!AQ.empty()) {
4628f0fd8f6SDimitry Andric SU = AQ.back();
4634ba319b5SDimitry Andric AQ.pop_back();
4648f0fd8f6SDimitry Andric }
4658f0fd8f6SDimitry Andric return SU;
4668f0fd8f6SDimitry Andric }
467