10b57cec5SDimitry Andric //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // The machine combiner pass uses machine trace metrics to ensure the combined
100b57cec5SDimitry Andric // instructions do not lengthen the critical path or the resource depth.
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
140b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
15480093f4SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
16480093f4SDimitry Andric #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
17bdd1243dSDimitry Andric #include "llvm/CodeGen/MachineCombinerPattern.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
190b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
200b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
23480093f4SDimitry Andric #include "llvm/CodeGen/MachineSizeOpts.h"
240b57cec5SDimitry Andric #include "llvm/CodeGen/MachineTraceMetrics.h"
25e8d8bef9SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
30480093f4SDimitry Andric #include "llvm/InitializePasses.h"
310b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
320b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
330b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
340b57cec5SDimitry Andric
350b57cec5SDimitry Andric using namespace llvm;
360b57cec5SDimitry Andric
370b57cec5SDimitry Andric #define DEBUG_TYPE "machine-combiner"
380b57cec5SDimitry Andric
390b57cec5SDimitry Andric STATISTIC(NumInstCombined, "Number of machineinst combined");
400b57cec5SDimitry Andric
410b57cec5SDimitry Andric static cl::opt<unsigned>
420b57cec5SDimitry Andric inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
430b57cec5SDimitry Andric cl::desc("Incremental depth computation will be used for basic "
440b57cec5SDimitry Andric "blocks with more instructions."), cl::init(500));
450b57cec5SDimitry Andric
460b57cec5SDimitry Andric static cl::opt<bool> dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden,
470b57cec5SDimitry Andric cl::desc("Dump all substituted intrs"),
480b57cec5SDimitry Andric cl::init(false));
490b57cec5SDimitry Andric
500b57cec5SDimitry Andric #ifdef EXPENSIVE_CHECKS
510b57cec5SDimitry Andric static cl::opt<bool> VerifyPatternOrder(
520b57cec5SDimitry Andric "machine-combiner-verify-pattern-order", cl::Hidden,
530b57cec5SDimitry Andric cl::desc(
540b57cec5SDimitry Andric "Verify that the generated patterns are ordered by increasing latency"),
550b57cec5SDimitry Andric cl::init(true));
560b57cec5SDimitry Andric #else
570b57cec5SDimitry Andric static cl::opt<bool> VerifyPatternOrder(
580b57cec5SDimitry Andric "machine-combiner-verify-pattern-order", cl::Hidden,
590b57cec5SDimitry Andric cl::desc(
600b57cec5SDimitry Andric "Verify that the generated patterns are ordered by increasing latency"),
610b57cec5SDimitry Andric cl::init(false));
620b57cec5SDimitry Andric #endif
630b57cec5SDimitry Andric
640b57cec5SDimitry Andric namespace {
650b57cec5SDimitry Andric class MachineCombiner : public MachineFunctionPass {
66*fe013be4SDimitry Andric const TargetSubtargetInfo *STI = nullptr;
67*fe013be4SDimitry Andric const TargetInstrInfo *TII = nullptr;
68*fe013be4SDimitry Andric const TargetRegisterInfo *TRI = nullptr;
690b57cec5SDimitry Andric MCSchedModel SchedModel;
70*fe013be4SDimitry Andric MachineRegisterInfo *MRI = nullptr;
71*fe013be4SDimitry Andric MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo
72*fe013be4SDimitry Andric MachineTraceMetrics *Traces = nullptr;
73*fe013be4SDimitry Andric MachineTraceMetrics::Ensemble *TraceEnsemble = nullptr;
74*fe013be4SDimitry Andric MachineBlockFrequencyInfo *MBFI = nullptr;
75*fe013be4SDimitry Andric ProfileSummaryInfo *PSI = nullptr;
76e8d8bef9SDimitry Andric RegisterClassInfo RegClassInfo;
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric TargetSchedModel TSchedModel;
790b57cec5SDimitry Andric
800b57cec5SDimitry Andric /// True if optimizing for code size.
81*fe013be4SDimitry Andric bool OptSize = false;
820b57cec5SDimitry Andric
830b57cec5SDimitry Andric public:
840b57cec5SDimitry Andric static char ID;
MachineCombiner()850b57cec5SDimitry Andric MachineCombiner() : MachineFunctionPass(ID) {
860b57cec5SDimitry Andric initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
870b57cec5SDimitry Andric }
880b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override;
890b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
getPassName() const900b57cec5SDimitry Andric StringRef getPassName() const override { return "Machine InstCombiner"; }
910b57cec5SDimitry Andric
920b57cec5SDimitry Andric private:
930b57cec5SDimitry Andric bool combineInstructions(MachineBasicBlock *);
940b57cec5SDimitry Andric MachineInstr *getOperandDef(const MachineOperand &MO);
95fcaf7f86SDimitry Andric bool isTransientMI(const MachineInstr *MI);
960b57cec5SDimitry Andric unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
970b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
98*fe013be4SDimitry Andric MachineTraceMetrics::Trace BlockTrace,
99*fe013be4SDimitry Andric const MachineBasicBlock &MBB);
1000b57cec5SDimitry Andric unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
1010b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace);
1020b57cec5SDimitry Andric bool
1030b57cec5SDimitry Andric improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
1040b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace,
1050b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
1060b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
1070b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
1080b57cec5SDimitry Andric MachineCombinerPattern Pattern, bool SlackIsAccurate);
109e8d8bef9SDimitry Andric bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
110e8d8bef9SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
111e8d8bef9SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
112e8d8bef9SDimitry Andric MachineCombinerPattern Pattern);
1130b57cec5SDimitry Andric bool preservesResourceLen(MachineBasicBlock *MBB,
1140b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace,
1150b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
1160b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs);
1170b57cec5SDimitry Andric void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
1180b57cec5SDimitry Andric SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
1190b57cec5SDimitry Andric std::pair<unsigned, unsigned>
1200b57cec5SDimitry Andric getLatenciesForInstrSequences(MachineInstr &MI,
1210b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
1220b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
1230b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace);
1240b57cec5SDimitry Andric
1250b57cec5SDimitry Andric void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root,
1260b57cec5SDimitry Andric SmallVector<MachineCombinerPattern, 16> &Patterns);
1270b57cec5SDimitry Andric };
1280b57cec5SDimitry Andric }
1290b57cec5SDimitry Andric
1300b57cec5SDimitry Andric char MachineCombiner::ID = 0;
1310b57cec5SDimitry Andric char &llvm::MachineCombinerID = MachineCombiner::ID;
1320b57cec5SDimitry Andric
1330b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE,
1340b57cec5SDimitry Andric "Machine InstCombiner", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)1350b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
1360b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
1370b57cec5SDimitry Andric INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner",
1380b57cec5SDimitry Andric false, false)
1390b57cec5SDimitry Andric
1400b57cec5SDimitry Andric void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
1410b57cec5SDimitry Andric AU.setPreservesCFG();
1420b57cec5SDimitry Andric AU.addPreserved<MachineDominatorTree>();
1430b57cec5SDimitry Andric AU.addRequired<MachineLoopInfo>();
1440b57cec5SDimitry Andric AU.addPreserved<MachineLoopInfo>();
1450b57cec5SDimitry Andric AU.addRequired<MachineTraceMetrics>();
1460b57cec5SDimitry Andric AU.addPreserved<MachineTraceMetrics>();
147480093f4SDimitry Andric AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
148480093f4SDimitry Andric AU.addRequired<ProfileSummaryInfoWrapperPass>();
1490b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric
152*fe013be4SDimitry Andric MachineInstr *
getOperandDef(const MachineOperand & MO)153*fe013be4SDimitry Andric MachineCombiner::getOperandDef(const MachineOperand &MO) {
1540b57cec5SDimitry Andric MachineInstr *DefInstr = nullptr;
1550b57cec5SDimitry Andric // We need a virtual register definition.
156bdd1243dSDimitry Andric if (MO.isReg() && MO.getReg().isVirtual())
1570b57cec5SDimitry Andric DefInstr = MRI->getUniqueVRegDef(MO.getReg());
1580b57cec5SDimitry Andric // PHI's have no depth etc.
1590b57cec5SDimitry Andric if (DefInstr && DefInstr->isPHI())
1600b57cec5SDimitry Andric DefInstr = nullptr;
1610b57cec5SDimitry Andric return DefInstr;
1620b57cec5SDimitry Andric }
1630b57cec5SDimitry Andric
164fcaf7f86SDimitry Andric /// Return true if MI is unlikely to generate an actual target instruction.
isTransientMI(const MachineInstr * MI)165fcaf7f86SDimitry Andric bool MachineCombiner::isTransientMI(const MachineInstr *MI) {
166fcaf7f86SDimitry Andric if (!MI->isCopy())
167fcaf7f86SDimitry Andric return MI->isTransient();
168fcaf7f86SDimitry Andric
169fcaf7f86SDimitry Andric // If MI is a COPY, check if its src and dst registers can be coalesced.
170fcaf7f86SDimitry Andric Register Dst = MI->getOperand(0).getReg();
171fcaf7f86SDimitry Andric Register Src = MI->getOperand(1).getReg();
172fcaf7f86SDimitry Andric
173fcaf7f86SDimitry Andric if (!MI->isFullCopy()) {
174fcaf7f86SDimitry Andric // If src RC contains super registers of dst RC, it can also be coalesced.
175fcaf7f86SDimitry Andric if (MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical())
176fcaf7f86SDimitry Andric return false;
177fcaf7f86SDimitry Andric
178fcaf7f86SDimitry Andric auto SrcSub = MI->getOperand(1).getSubReg();
179fcaf7f86SDimitry Andric auto SrcRC = MRI->getRegClass(Src);
180fcaf7f86SDimitry Andric auto DstRC = MRI->getRegClass(Dst);
181fcaf7f86SDimitry Andric return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) != nullptr;
182fcaf7f86SDimitry Andric }
183fcaf7f86SDimitry Andric
184fcaf7f86SDimitry Andric if (Src.isPhysical() && Dst.isPhysical())
185fcaf7f86SDimitry Andric return Src == Dst;
186fcaf7f86SDimitry Andric
187fcaf7f86SDimitry Andric if (Src.isVirtual() && Dst.isVirtual()) {
188fcaf7f86SDimitry Andric auto SrcRC = MRI->getRegClass(Src);
189fcaf7f86SDimitry Andric auto DstRC = MRI->getRegClass(Dst);
190fcaf7f86SDimitry Andric return SrcRC->hasSuperClassEq(DstRC) || SrcRC->hasSubClassEq(DstRC);
191fcaf7f86SDimitry Andric }
192fcaf7f86SDimitry Andric
193fcaf7f86SDimitry Andric if (Src.isVirtual())
194fcaf7f86SDimitry Andric std::swap(Src, Dst);
195fcaf7f86SDimitry Andric
196fcaf7f86SDimitry Andric // Now Src is physical register, Dst is virtual register.
197fcaf7f86SDimitry Andric auto DstRC = MRI->getRegClass(Dst);
198fcaf7f86SDimitry Andric return DstRC->contains(Src);
199fcaf7f86SDimitry Andric }
200fcaf7f86SDimitry Andric
2010b57cec5SDimitry Andric /// Computes depth of instructions in vector \InsInstr.
2020b57cec5SDimitry Andric ///
2030b57cec5SDimitry Andric /// \param InsInstrs is a vector of machine instructions
2040b57cec5SDimitry Andric /// \param InstrIdxForVirtReg is a dense map of virtual register to index
2050b57cec5SDimitry Andric /// of defining machine instruction in \p InsInstrs
2060b57cec5SDimitry Andric /// \param BlockTrace is a trace of machine instructions
2070b57cec5SDimitry Andric ///
2080b57cec5SDimitry Andric /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
2090b57cec5SDimitry Andric unsigned
getDepth(SmallVectorImpl<MachineInstr * > & InsInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,MachineTraceMetrics::Trace BlockTrace,const MachineBasicBlock & MBB)2100b57cec5SDimitry Andric MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
2110b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
212*fe013be4SDimitry Andric MachineTraceMetrics::Trace BlockTrace,
213*fe013be4SDimitry Andric const MachineBasicBlock &MBB) {
2140b57cec5SDimitry Andric SmallVector<unsigned, 16> InstrDepth;
2150b57cec5SDimitry Andric // For each instruction in the new sequence compute the depth based on the
2160b57cec5SDimitry Andric // operands. Use the trace information when possible. For new operands which
2170b57cec5SDimitry Andric // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
2180b57cec5SDimitry Andric for (auto *InstrPtr : InsInstrs) { // for each Use
2190b57cec5SDimitry Andric unsigned IDepth = 0;
220*fe013be4SDimitry Andric for (const MachineOperand &MO : InstrPtr->all_uses()) {
2210b57cec5SDimitry Andric // Check for virtual register operand.
222*fe013be4SDimitry Andric if (!MO.getReg().isVirtual())
2230b57cec5SDimitry Andric continue;
2240b57cec5SDimitry Andric unsigned DepthOp = 0;
2250b57cec5SDimitry Andric unsigned LatencyOp = 0;
2260b57cec5SDimitry Andric DenseMap<unsigned, unsigned>::iterator II =
2270b57cec5SDimitry Andric InstrIdxForVirtReg.find(MO.getReg());
2280b57cec5SDimitry Andric if (II != InstrIdxForVirtReg.end()) {
2290b57cec5SDimitry Andric // Operand is new virtual register not in trace
2300b57cec5SDimitry Andric assert(II->second < InstrDepth.size() && "Bad Index");
2310b57cec5SDimitry Andric MachineInstr *DefInstr = InsInstrs[II->second];
2320b57cec5SDimitry Andric assert(DefInstr &&
2330b57cec5SDimitry Andric "There must be a definition for a new virtual register");
2340b57cec5SDimitry Andric DepthOp = InstrDepth[II->second];
2350b57cec5SDimitry Andric int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg());
2360b57cec5SDimitry Andric int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg());
2370b57cec5SDimitry Andric LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
2380b57cec5SDimitry Andric InstrPtr, UseIdx);
2390b57cec5SDimitry Andric } else {
2400b57cec5SDimitry Andric MachineInstr *DefInstr = getOperandDef(MO);
241*fe013be4SDimitry Andric if (DefInstr && (TII->getMachineCombinerTraceStrategy() !=
242*fe013be4SDimitry Andric MachineTraceStrategy::TS_Local ||
243*fe013be4SDimitry Andric DefInstr->getParent() == &MBB)) {
2440b57cec5SDimitry Andric DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
245fcaf7f86SDimitry Andric if (!isTransientMI(DefInstr))
2460b57cec5SDimitry Andric LatencyOp = TSchedModel.computeOperandLatency(
2470b57cec5SDimitry Andric DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
2480b57cec5SDimitry Andric InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
2490b57cec5SDimitry Andric }
2500b57cec5SDimitry Andric }
2510b57cec5SDimitry Andric IDepth = std::max(IDepth, DepthOp + LatencyOp);
2520b57cec5SDimitry Andric }
2530b57cec5SDimitry Andric InstrDepth.push_back(IDepth);
2540b57cec5SDimitry Andric }
2550b57cec5SDimitry Andric unsigned NewRootIdx = InsInstrs.size() - 1;
2560b57cec5SDimitry Andric return InstrDepth[NewRootIdx];
2570b57cec5SDimitry Andric }
2580b57cec5SDimitry Andric
2590b57cec5SDimitry Andric /// Computes instruction latency as max of latency of defined operands.
2600b57cec5SDimitry Andric ///
2610b57cec5SDimitry Andric /// \param Root is a machine instruction that could be replaced by NewRoot.
2620b57cec5SDimitry Andric /// It is used to compute a more accurate latency information for NewRoot in
2630b57cec5SDimitry Andric /// case there is a dependent instruction in the same trace (\p BlockTrace)
2640b57cec5SDimitry Andric /// \param NewRoot is the instruction for which the latency is computed
2650b57cec5SDimitry Andric /// \param BlockTrace is a trace of machine instructions
2660b57cec5SDimitry Andric ///
2670b57cec5SDimitry Andric /// \returns Latency of \p NewRoot
getLatency(MachineInstr * Root,MachineInstr * NewRoot,MachineTraceMetrics::Trace BlockTrace)2680b57cec5SDimitry Andric unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
2690b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace) {
2700b57cec5SDimitry Andric // Check each definition in NewRoot and compute the latency
2710b57cec5SDimitry Andric unsigned NewRootLatency = 0;
2720b57cec5SDimitry Andric
273*fe013be4SDimitry Andric for (const MachineOperand &MO : NewRoot->all_defs()) {
2740b57cec5SDimitry Andric // Check for virtual register operand.
275*fe013be4SDimitry Andric if (!MO.getReg().isVirtual())
2760b57cec5SDimitry Andric continue;
2770b57cec5SDimitry Andric // Get the first instruction that uses MO
2780b57cec5SDimitry Andric MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
2790b57cec5SDimitry Andric RI++;
2800b57cec5SDimitry Andric if (RI == MRI->reg_end())
2810b57cec5SDimitry Andric continue;
2820b57cec5SDimitry Andric MachineInstr *UseMO = RI->getParent();
2830b57cec5SDimitry Andric unsigned LatencyOp = 0;
2840b57cec5SDimitry Andric if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
2850b57cec5SDimitry Andric LatencyOp = TSchedModel.computeOperandLatency(
2860b57cec5SDimitry Andric NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
2870b57cec5SDimitry Andric UseMO->findRegisterUseOperandIdx(MO.getReg()));
2880b57cec5SDimitry Andric } else {
2890b57cec5SDimitry Andric LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
2900b57cec5SDimitry Andric }
2910b57cec5SDimitry Andric NewRootLatency = std::max(NewRootLatency, LatencyOp);
2920b57cec5SDimitry Andric }
2930b57cec5SDimitry Andric return NewRootLatency;
2940b57cec5SDimitry Andric }
2950b57cec5SDimitry Andric
2960b57cec5SDimitry Andric /// The combiner's goal may differ based on which pattern it is attempting
2970b57cec5SDimitry Andric /// to optimize.
2980b57cec5SDimitry Andric enum class CombinerObjective {
2990b57cec5SDimitry Andric MustReduceDepth, // The data dependency chain must be improved.
300e8d8bef9SDimitry Andric MustReduceRegisterPressure, // The register pressure must be reduced.
3010b57cec5SDimitry Andric Default // The critical path must not be lengthened.
3020b57cec5SDimitry Andric };
3030b57cec5SDimitry Andric
getCombinerObjective(MachineCombinerPattern P)3040b57cec5SDimitry Andric static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
3050b57cec5SDimitry Andric // TODO: If C++ ever gets a real enum class, make this part of the
3060b57cec5SDimitry Andric // MachineCombinerPattern class.
3070b57cec5SDimitry Andric switch (P) {
3080b57cec5SDimitry Andric case MachineCombinerPattern::REASSOC_AX_BY:
3090b57cec5SDimitry Andric case MachineCombinerPattern::REASSOC_AX_YB:
3100b57cec5SDimitry Andric case MachineCombinerPattern::REASSOC_XA_BY:
3110b57cec5SDimitry Andric case MachineCombinerPattern::REASSOC_XA_YB:
3125ffd83dbSDimitry Andric case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
3135ffd83dbSDimitry Andric case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
31481ad6265SDimitry Andric case MachineCombinerPattern::SUBADD_OP1:
31581ad6265SDimitry Andric case MachineCombinerPattern::SUBADD_OP2:
316bdd1243dSDimitry Andric case MachineCombinerPattern::FMADD_AX:
317bdd1243dSDimitry Andric case MachineCombinerPattern::FMADD_XA:
318bdd1243dSDimitry Andric case MachineCombinerPattern::FMSUB:
319bdd1243dSDimitry Andric case MachineCombinerPattern::FNMSUB:
3200b57cec5SDimitry Andric return CombinerObjective::MustReduceDepth;
321e8d8bef9SDimitry Andric case MachineCombinerPattern::REASSOC_XY_BCA:
322e8d8bef9SDimitry Andric case MachineCombinerPattern::REASSOC_XY_BAC:
323e8d8bef9SDimitry Andric return CombinerObjective::MustReduceRegisterPressure;
3240b57cec5SDimitry Andric default:
3250b57cec5SDimitry Andric return CombinerObjective::Default;
3260b57cec5SDimitry Andric }
3270b57cec5SDimitry Andric }
3280b57cec5SDimitry Andric
3290b57cec5SDimitry Andric /// Estimate the latency of the new and original instruction sequence by summing
3300b57cec5SDimitry Andric /// up the latencies of the inserted and deleted instructions. This assumes
3310b57cec5SDimitry Andric /// that the inserted and deleted instructions are dependent instruction chains,
3320b57cec5SDimitry Andric /// which might not hold in all cases.
getLatenciesForInstrSequences(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,MachineTraceMetrics::Trace BlockTrace)3330b57cec5SDimitry Andric std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
3340b57cec5SDimitry Andric MachineInstr &MI, SmallVectorImpl<MachineInstr *> &InsInstrs,
3350b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
3360b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace) {
3370b57cec5SDimitry Andric assert(!InsInstrs.empty() && "Only support sequences that insert instrs.");
3380b57cec5SDimitry Andric unsigned NewRootLatency = 0;
3390b57cec5SDimitry Andric // NewRoot is the last instruction in the \p InsInstrs vector.
3400b57cec5SDimitry Andric MachineInstr *NewRoot = InsInstrs.back();
3410b57cec5SDimitry Andric for (unsigned i = 0; i < InsInstrs.size() - 1; i++)
3420b57cec5SDimitry Andric NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
3430b57cec5SDimitry Andric NewRootLatency += getLatency(&MI, NewRoot, BlockTrace);
3440b57cec5SDimitry Andric
3450b57cec5SDimitry Andric unsigned RootLatency = 0;
346fcaf7f86SDimitry Andric for (auto *I : DelInstrs)
3470b57cec5SDimitry Andric RootLatency += TSchedModel.computeInstrLatency(I);
3480b57cec5SDimitry Andric
3490b57cec5SDimitry Andric return {NewRootLatency, RootLatency};
3500b57cec5SDimitry Andric }
3510b57cec5SDimitry Andric
reduceRegisterPressure(MachineInstr & Root,MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,MachineCombinerPattern Pattern)352e8d8bef9SDimitry Andric bool MachineCombiner::reduceRegisterPressure(
353e8d8bef9SDimitry Andric MachineInstr &Root, MachineBasicBlock *MBB,
354e8d8bef9SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
355e8d8bef9SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
356e8d8bef9SDimitry Andric MachineCombinerPattern Pattern) {
357e8d8bef9SDimitry Andric // FIXME: for now, we don't do any check for the register pressure patterns.
358e8d8bef9SDimitry Andric // We treat them as always profitable. But we can do better if we make
359e8d8bef9SDimitry Andric // RegPressureTracker class be aware of TIE attribute. Then we can get an
360e8d8bef9SDimitry Andric // accurate compare of register pressure with DelInstrs or InsInstrs.
361e8d8bef9SDimitry Andric return true;
362e8d8bef9SDimitry Andric }
363e8d8bef9SDimitry Andric
3640b57cec5SDimitry Andric /// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
3650b57cec5SDimitry Andric /// The new code sequence ends in MI NewRoot. A necessary condition for the new
3660b57cec5SDimitry Andric /// sequence to replace the old sequence is that it cannot lengthen the critical
3670b57cec5SDimitry Andric /// path. The definition of "improve" may be restricted by specifying that the
3680b57cec5SDimitry Andric /// new path improves the data dependency chain (MustReduceDepth).
improvesCriticalPathLen(MachineBasicBlock * MBB,MachineInstr * Root,MachineTraceMetrics::Trace BlockTrace,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,MachineCombinerPattern Pattern,bool SlackIsAccurate)3690b57cec5SDimitry Andric bool MachineCombiner::improvesCriticalPathLen(
3700b57cec5SDimitry Andric MachineBasicBlock *MBB, MachineInstr *Root,
3710b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace,
3720b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
3730b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
3740b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
3750b57cec5SDimitry Andric MachineCombinerPattern Pattern,
3760b57cec5SDimitry Andric bool SlackIsAccurate) {
3770b57cec5SDimitry Andric // Get depth and latency of NewRoot and Root.
378*fe013be4SDimitry Andric unsigned NewRootDepth =
379*fe013be4SDimitry Andric getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *MBB);
3800b57cec5SDimitry Andric unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
3810b57cec5SDimitry Andric
3820b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: "
3830b57cec5SDimitry Andric << NewRootDepth << "\tRootDepth: " << RootDepth);
3840b57cec5SDimitry Andric
3850b57cec5SDimitry Andric // For a transform such as reassociation, the cost equation is
3860b57cec5SDimitry Andric // conservatively calculated so that we must improve the depth (data
3870b57cec5SDimitry Andric // dependency cycles) in the critical path to proceed with the transform.
3880b57cec5SDimitry Andric // Being conservative also protects against inaccuracies in the underlying
3890b57cec5SDimitry Andric // machine trace metrics and CPU models.
3900b57cec5SDimitry Andric if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) {
3910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth ");
3920b57cec5SDimitry Andric LLVM_DEBUG(NewRootDepth < RootDepth
3930b57cec5SDimitry Andric ? dbgs() << "\t and it does it\n"
3940b57cec5SDimitry Andric : dbgs() << "\t but it does NOT do it\n");
3950b57cec5SDimitry Andric return NewRootDepth < RootDepth;
3960b57cec5SDimitry Andric }
3970b57cec5SDimitry Andric
3980b57cec5SDimitry Andric // A more flexible cost calculation for the critical path includes the slack
3990b57cec5SDimitry Andric // of the original code sequence. This may allow the transform to proceed
4000b57cec5SDimitry Andric // even if the instruction depths (data dependency cycles) become worse.
4010b57cec5SDimitry Andric
4020b57cec5SDimitry Andric // Account for the latency of the inserted and deleted instructions by
4030b57cec5SDimitry Andric unsigned NewRootLatency, RootLatency;
404*fe013be4SDimitry Andric if (TII->accumulateInstrSeqToRootLatency(*Root)) {
4050b57cec5SDimitry Andric std::tie(NewRootLatency, RootLatency) =
4060b57cec5SDimitry Andric getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
407*fe013be4SDimitry Andric } else {
408*fe013be4SDimitry Andric NewRootLatency = TSchedModel.computeInstrLatency(InsInstrs.back());
409*fe013be4SDimitry Andric RootLatency = TSchedModel.computeInstrLatency(Root);
410*fe013be4SDimitry Andric }
4110b57cec5SDimitry Andric
4120b57cec5SDimitry Andric unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
4130b57cec5SDimitry Andric unsigned NewCycleCount = NewRootDepth + NewRootLatency;
4140b57cec5SDimitry Andric unsigned OldCycleCount =
4150b57cec5SDimitry Andric RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
4160b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency
4170b57cec5SDimitry Andric << "\tRootLatency: " << RootLatency << "\n\tRootSlack: "
4180b57cec5SDimitry Andric << RootSlack << " SlackIsAccurate=" << SlackIsAccurate
4190b57cec5SDimitry Andric << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
4200b57cec5SDimitry Andric << "\n\tRootDepth + RootLatency + RootSlack = "
4210b57cec5SDimitry Andric << OldCycleCount;);
4220b57cec5SDimitry Andric LLVM_DEBUG(NewCycleCount <= OldCycleCount
4230b57cec5SDimitry Andric ? dbgs() << "\n\t It IMPROVES PathLen because"
4240b57cec5SDimitry Andric : dbgs() << "\n\t It DOES NOT improve PathLen because");
4250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount
4260b57cec5SDimitry Andric << ", OldCycleCount = " << OldCycleCount << "\n");
4270b57cec5SDimitry Andric
4280b57cec5SDimitry Andric return NewCycleCount <= OldCycleCount;
4290b57cec5SDimitry Andric }
4300b57cec5SDimitry Andric
4310b57cec5SDimitry Andric /// helper routine to convert instructions into SC
instr2instrSC(SmallVectorImpl<MachineInstr * > & Instrs,SmallVectorImpl<const MCSchedClassDesc * > & InstrsSC)4320b57cec5SDimitry Andric void MachineCombiner::instr2instrSC(
4330b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &Instrs,
4340b57cec5SDimitry Andric SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
4350b57cec5SDimitry Andric for (auto *InstrPtr : Instrs) {
4360b57cec5SDimitry Andric unsigned Opc = InstrPtr->getOpcode();
4370b57cec5SDimitry Andric unsigned Idx = TII->get(Opc).getSchedClass();
4380b57cec5SDimitry Andric const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
4390b57cec5SDimitry Andric InstrsSC.push_back(SC);
4400b57cec5SDimitry Andric }
4410b57cec5SDimitry Andric }
4420b57cec5SDimitry Andric
4430b57cec5SDimitry Andric /// True when the new instructions do not increase resource length
preservesResourceLen(MachineBasicBlock * MBB,MachineTraceMetrics::Trace BlockTrace,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs)4440b57cec5SDimitry Andric bool MachineCombiner::preservesResourceLen(
4450b57cec5SDimitry Andric MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
4460b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
4470b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs) {
4480b57cec5SDimitry Andric if (!TSchedModel.hasInstrSchedModel())
4490b57cec5SDimitry Andric return true;
4500b57cec5SDimitry Andric
4510b57cec5SDimitry Andric // Compute current resource length
4520b57cec5SDimitry Andric
4530b57cec5SDimitry Andric //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
4540b57cec5SDimitry Andric SmallVector <const MachineBasicBlock *, 1> MBBarr;
4550b57cec5SDimitry Andric MBBarr.push_back(MBB);
4560b57cec5SDimitry Andric unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
4570b57cec5SDimitry Andric
4580b57cec5SDimitry Andric // Deal with SC rather than Instructions.
4590b57cec5SDimitry Andric SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
4600b57cec5SDimitry Andric SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
4610b57cec5SDimitry Andric
4620b57cec5SDimitry Andric instr2instrSC(InsInstrs, InsInstrsSC);
4630b57cec5SDimitry Andric instr2instrSC(DelInstrs, DelInstrsSC);
4640b57cec5SDimitry Andric
465bdd1243dSDimitry Andric ArrayRef<const MCSchedClassDesc *> MSCInsArr{InsInstrsSC};
466bdd1243dSDimitry Andric ArrayRef<const MCSchedClassDesc *> MSCDelArr{DelInstrsSC};
4670b57cec5SDimitry Andric
4680b57cec5SDimitry Andric // Compute new resource length.
4690b57cec5SDimitry Andric unsigned ResLenAfterCombine =
4700b57cec5SDimitry Andric BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
4710b57cec5SDimitry Andric
4720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: "
4730b57cec5SDimitry Andric << ResLenBeforeCombine
4740b57cec5SDimitry Andric << " and after: " << ResLenAfterCombine << "\n";);
4750b57cec5SDimitry Andric LLVM_DEBUG(
4765ffd83dbSDimitry Andric ResLenAfterCombine <=
4775ffd83dbSDimitry Andric ResLenBeforeCombine + TII->getExtendResourceLenLimit()
4780b57cec5SDimitry Andric ? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n"
4790b57cec5SDimitry Andric : dbgs() << "\t\t As result it DOES NOT improve/preserve Resource "
4800b57cec5SDimitry Andric "Length\n");
4810b57cec5SDimitry Andric
4825ffd83dbSDimitry Andric return ResLenAfterCombine <=
4835ffd83dbSDimitry Andric ResLenBeforeCombine + TII->getExtendResourceLenLimit();
4840b57cec5SDimitry Andric }
4850b57cec5SDimitry Andric
4860b57cec5SDimitry Andric /// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
4870b57cec5SDimitry Andric /// depths if requested.
4880b57cec5SDimitry Andric ///
4890b57cec5SDimitry Andric /// \param MBB basic block to insert instructions in
4900b57cec5SDimitry Andric /// \param MI current machine instruction
4910b57cec5SDimitry Andric /// \param InsInstrs new instructions to insert in \p MBB
4920b57cec5SDimitry Andric /// \param DelInstrs instruction to delete from \p MBB
493*fe013be4SDimitry Andric /// \param TraceEnsemble is a pointer to the machine trace information
4940b57cec5SDimitry Andric /// \param RegUnits set of live registers, needed to compute instruction depths
495e8d8bef9SDimitry Andric /// \param TII is target instruction info, used to call target hook
496e8d8bef9SDimitry Andric /// \param Pattern is used to call target hook finalizeInsInstrs
4970b57cec5SDimitry Andric /// \param IncrementalUpdate if true, compute instruction depths incrementally,
4980b57cec5SDimitry Andric /// otherwise invalidate the trace
insertDeleteInstructions(MachineBasicBlock * MBB,MachineInstr & MI,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,MachineTraceMetrics::Ensemble * TraceEnsemble,SparseSet<LiveRegUnit> & RegUnits,const TargetInstrInfo * TII,MachineCombinerPattern Pattern,bool IncrementalUpdate)499*fe013be4SDimitry Andric static void insertDeleteInstructions(
500*fe013be4SDimitry Andric MachineBasicBlock *MBB, MachineInstr &MI,
501*fe013be4SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
502*fe013be4SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
503*fe013be4SDimitry Andric MachineTraceMetrics::Ensemble *TraceEnsemble,
504*fe013be4SDimitry Andric SparseSet<LiveRegUnit> &RegUnits, const TargetInstrInfo *TII,
505*fe013be4SDimitry Andric MachineCombinerPattern Pattern, bool IncrementalUpdate) {
506e8d8bef9SDimitry Andric // If we want to fix up some placeholder for some target, do it now.
507e8d8bef9SDimitry Andric // We need this because in genAlternativeCodeSequence, we have not decided the
508e8d8bef9SDimitry Andric // better pattern InsInstrs or DelInstrs, so we don't want generate some
509e8d8bef9SDimitry Andric // sideeffect to the function. For example we need to delay the constant pool
510e8d8bef9SDimitry Andric // entry creation here after InsInstrs is selected as better pattern.
511e8d8bef9SDimitry Andric // Otherwise the constant pool entry created for InsInstrs will not be deleted
512e8d8bef9SDimitry Andric // even if InsInstrs is not the better pattern.
513e8d8bef9SDimitry Andric TII->finalizeInsInstrs(MI, Pattern, InsInstrs);
514e8d8bef9SDimitry Andric
5150b57cec5SDimitry Andric for (auto *InstrPtr : InsInstrs)
5160b57cec5SDimitry Andric MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
5170b57cec5SDimitry Andric
5180b57cec5SDimitry Andric for (auto *InstrPtr : DelInstrs) {
5190eae32dcSDimitry Andric InstrPtr->eraseFromParent();
5200b57cec5SDimitry Andric // Erase all LiveRegs defined by the removed instruction
521fcaf7f86SDimitry Andric for (auto *I = RegUnits.begin(); I != RegUnits.end();) {
5220b57cec5SDimitry Andric if (I->MI == InstrPtr)
5230b57cec5SDimitry Andric I = RegUnits.erase(I);
5240b57cec5SDimitry Andric else
5250b57cec5SDimitry Andric I++;
5260b57cec5SDimitry Andric }
5270b57cec5SDimitry Andric }
5280b57cec5SDimitry Andric
5290b57cec5SDimitry Andric if (IncrementalUpdate)
5300b57cec5SDimitry Andric for (auto *InstrPtr : InsInstrs)
531*fe013be4SDimitry Andric TraceEnsemble->updateDepth(MBB, *InstrPtr, RegUnits);
5320b57cec5SDimitry Andric else
533*fe013be4SDimitry Andric TraceEnsemble->invalidate(MBB);
5340b57cec5SDimitry Andric
5350b57cec5SDimitry Andric NumInstCombined++;
5360b57cec5SDimitry Andric }
5370b57cec5SDimitry Andric
5380b57cec5SDimitry Andric // Check that the difference between original and new latency is decreasing for
5390b57cec5SDimitry Andric // later patterns. This helps to discover sub-optimal pattern orderings.
verifyPatternOrder(MachineBasicBlock * MBB,MachineInstr & Root,SmallVector<MachineCombinerPattern,16> & Patterns)5400b57cec5SDimitry Andric void MachineCombiner::verifyPatternOrder(
5410b57cec5SDimitry Andric MachineBasicBlock *MBB, MachineInstr &Root,
5420b57cec5SDimitry Andric SmallVector<MachineCombinerPattern, 16> &Patterns) {
5430b57cec5SDimitry Andric long PrevLatencyDiff = std::numeric_limits<long>::max();
5440b57cec5SDimitry Andric (void)PrevLatencyDiff; // Variable is used in assert only.
5450b57cec5SDimitry Andric for (auto P : Patterns) {
5460b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> InsInstrs;
5470b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> DelInstrs;
5480b57cec5SDimitry Andric DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
5490b57cec5SDimitry Andric TII->genAlternativeCodeSequence(Root, P, InsInstrs, DelInstrs,
5500b57cec5SDimitry Andric InstrIdxForVirtReg);
5510b57cec5SDimitry Andric // Found pattern, but did not generate alternative sequence.
5520b57cec5SDimitry Andric // This can happen e.g. when an immediate could not be materialized
5530b57cec5SDimitry Andric // in a single instruction.
5540b57cec5SDimitry Andric if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
5550b57cec5SDimitry Andric continue;
5560b57cec5SDimitry Andric
5570b57cec5SDimitry Andric unsigned NewRootLatency, RootLatency;
5580b57cec5SDimitry Andric std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
559*fe013be4SDimitry Andric Root, InsInstrs, DelInstrs, TraceEnsemble->getTrace(MBB));
5600b57cec5SDimitry Andric long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
5610b57cec5SDimitry Andric assert(CurrentLatencyDiff <= PrevLatencyDiff &&
5620b57cec5SDimitry Andric "Current pattern is better than previous pattern.");
5630b57cec5SDimitry Andric PrevLatencyDiff = CurrentLatencyDiff;
5640b57cec5SDimitry Andric }
5650b57cec5SDimitry Andric }
5660b57cec5SDimitry Andric
5670b57cec5SDimitry Andric /// Substitute a slow code sequence with a faster one by
5680b57cec5SDimitry Andric /// evaluating instruction combining pattern.
5690b57cec5SDimitry Andric /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
5700b57cec5SDimitry Andric /// combining based on machine trace metrics. Only combine a sequence of
5710b57cec5SDimitry Andric /// instructions when this neither lengthens the critical path nor increases
5720b57cec5SDimitry Andric /// resource pressure. When optimizing for codesize always combine when the new
5730b57cec5SDimitry Andric /// sequence is shorter.
combineInstructions(MachineBasicBlock * MBB)5740b57cec5SDimitry Andric bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
5750b57cec5SDimitry Andric bool Changed = false;
5760b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
5770b57cec5SDimitry Andric
5780b57cec5SDimitry Andric bool IncrementalUpdate = false;
5790b57cec5SDimitry Andric auto BlockIter = MBB->begin();
5800b57cec5SDimitry Andric decltype(BlockIter) LastUpdate;
5810b57cec5SDimitry Andric // Check if the block is in a loop.
5820b57cec5SDimitry Andric const MachineLoop *ML = MLI->getLoopFor(MBB);
583*fe013be4SDimitry Andric if (!TraceEnsemble)
584*fe013be4SDimitry Andric TraceEnsemble = Traces->getEnsemble(TII->getMachineCombinerTraceStrategy());
5850b57cec5SDimitry Andric
5860b57cec5SDimitry Andric SparseSet<LiveRegUnit> RegUnits;
5870b57cec5SDimitry Andric RegUnits.setUniverse(TRI->getNumRegUnits());
5880b57cec5SDimitry Andric
589480093f4SDimitry Andric bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
590480093f4SDimitry Andric
591e8d8bef9SDimitry Andric bool DoRegPressureReduce =
592e8d8bef9SDimitry Andric TII->shouldReduceRegisterPressure(MBB, &RegClassInfo);
593e8d8bef9SDimitry Andric
5940b57cec5SDimitry Andric while (BlockIter != MBB->end()) {
5950b57cec5SDimitry Andric auto &MI = *BlockIter++;
5960b57cec5SDimitry Andric SmallVector<MachineCombinerPattern, 16> Patterns;
5970b57cec5SDimitry Andric // The motivating example is:
5980b57cec5SDimitry Andric //
5990b57cec5SDimitry Andric // MUL Other MUL_op1 MUL_op2 Other
6000b57cec5SDimitry Andric // \ / \ | /
6010b57cec5SDimitry Andric // ADD/SUB => MADD/MSUB
6020b57cec5SDimitry Andric // (=Root) (=NewRoot)
6030b57cec5SDimitry Andric
6040b57cec5SDimitry Andric // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
6050b57cec5SDimitry Andric // usually beneficial for code size it unfortunately can hurt performance
6060b57cec5SDimitry Andric // when the ADD is on the critical path, but the MUL is not. With the
6070b57cec5SDimitry Andric // substitution the MUL becomes part of the critical path (in form of the
6080b57cec5SDimitry Andric // MADD) and can lengthen it on architectures where the MADD latency is
6090b57cec5SDimitry Andric // longer than the ADD latency.
6100b57cec5SDimitry Andric //
6110b57cec5SDimitry Andric // For each instruction we check if it can be the root of a combiner
6120b57cec5SDimitry Andric // pattern. Then for each pattern the new code sequence in form of MI is
6130b57cec5SDimitry Andric // generated and evaluated. When the efficiency criteria (don't lengthen
6140b57cec5SDimitry Andric // critical path, don't use more resources) is met the new sequence gets
6150b57cec5SDimitry Andric // hooked up into the basic block before the old sequence is removed.
6160b57cec5SDimitry Andric //
6170b57cec5SDimitry Andric // The algorithm does not try to evaluate all patterns and pick the best.
6180b57cec5SDimitry Andric // This is only an artificial restriction though. In practice there is
6190b57cec5SDimitry Andric // mostly one pattern, and getMachineCombinerPatterns() can order patterns
6200b57cec5SDimitry Andric // based on an internal cost heuristic. If
6210b57cec5SDimitry Andric // machine-combiner-verify-pattern-order is enabled, all patterns are
6220b57cec5SDimitry Andric // checked to ensure later patterns do not provide better latency savings.
6230b57cec5SDimitry Andric
624e8d8bef9SDimitry Andric if (!TII->getMachineCombinerPatterns(MI, Patterns, DoRegPressureReduce))
6250b57cec5SDimitry Andric continue;
6260b57cec5SDimitry Andric
6270b57cec5SDimitry Andric if (VerifyPatternOrder)
6280b57cec5SDimitry Andric verifyPatternOrder(MBB, MI, Patterns);
6290b57cec5SDimitry Andric
630bdd1243dSDimitry Andric for (const auto P : Patterns) {
6310b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> InsInstrs;
6320b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> DelInstrs;
6330b57cec5SDimitry Andric DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
6340b57cec5SDimitry Andric TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
6350b57cec5SDimitry Andric InstrIdxForVirtReg);
6360b57cec5SDimitry Andric // Found pattern, but did not generate alternative sequence.
6370b57cec5SDimitry Andric // This can happen e.g. when an immediate could not be materialized
6380b57cec5SDimitry Andric // in a single instruction.
639bdd1243dSDimitry Andric if (InsInstrs.empty())
6400b57cec5SDimitry Andric continue;
6410b57cec5SDimitry Andric
6420b57cec5SDimitry Andric LLVM_DEBUG(if (dump_intrs) {
6430b57cec5SDimitry Andric dbgs() << "\tFor the Pattern (" << (int)P
6440b57cec5SDimitry Andric << ") these instructions could be removed\n";
6450b57cec5SDimitry Andric for (auto const *InstrPtr : DelInstrs)
6460b57cec5SDimitry Andric InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
6470b57cec5SDimitry Andric /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
6480b57cec5SDimitry Andric dbgs() << "\tThese instructions could replace the removed ones\n";
6490b57cec5SDimitry Andric for (auto const *InstrPtr : InsInstrs)
6500b57cec5SDimitry Andric InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
6510b57cec5SDimitry Andric /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
6520b57cec5SDimitry Andric });
6530b57cec5SDimitry Andric
654e8d8bef9SDimitry Andric if (IncrementalUpdate && LastUpdate != BlockIter) {
6550b57cec5SDimitry Andric // Update depths since the last incremental update.
656*fe013be4SDimitry Andric TraceEnsemble->updateDepths(LastUpdate, BlockIter, RegUnits);
6570b57cec5SDimitry Andric LastUpdate = BlockIter;
6580b57cec5SDimitry Andric }
6590b57cec5SDimitry Andric
660e8d8bef9SDimitry Andric if (DoRegPressureReduce &&
661e8d8bef9SDimitry Andric getCombinerObjective(P) ==
662e8d8bef9SDimitry Andric CombinerObjective::MustReduceRegisterPressure) {
663e8d8bef9SDimitry Andric if (MBB->size() > inc_threshold) {
664e8d8bef9SDimitry Andric // Use incremental depth updates for basic blocks above threshold
665e8d8bef9SDimitry Andric IncrementalUpdate = true;
666e8d8bef9SDimitry Andric LastUpdate = BlockIter;
667e8d8bef9SDimitry Andric }
668e8d8bef9SDimitry Andric if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) {
669e8d8bef9SDimitry Andric // Replace DelInstrs with InsInstrs.
670*fe013be4SDimitry Andric insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
671e8d8bef9SDimitry Andric RegUnits, TII, P, IncrementalUpdate);
672e8d8bef9SDimitry Andric Changed |= true;
673e8d8bef9SDimitry Andric
674e8d8bef9SDimitry Andric // Go back to previous instruction as it may have ILP reassociation
675e8d8bef9SDimitry Andric // opportunity.
676e8d8bef9SDimitry Andric BlockIter--;
677e8d8bef9SDimitry Andric break;
678e8d8bef9SDimitry Andric }
679e8d8bef9SDimitry Andric }
680e8d8bef9SDimitry Andric
681bdd1243dSDimitry Andric if (ML && TII->isThroughputPattern(P)) {
682bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "\t Replacing due to throughput pattern in loop\n");
683*fe013be4SDimitry Andric insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
684bdd1243dSDimitry Andric RegUnits, TII, P, IncrementalUpdate);
685bdd1243dSDimitry Andric // Eagerly stop after the first pattern fires.
686bdd1243dSDimitry Andric Changed = true;
687bdd1243dSDimitry Andric break;
688bdd1243dSDimitry Andric } else if (OptForSize && InsInstrs.size() < DelInstrs.size()) {
689bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "\t Replacing due to OptForSize ("
690bdd1243dSDimitry Andric << InsInstrs.size() << " < "
691bdd1243dSDimitry Andric << DelInstrs.size() << ")\n");
692*fe013be4SDimitry Andric insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
693e8d8bef9SDimitry Andric RegUnits, TII, P, IncrementalUpdate);
6940b57cec5SDimitry Andric // Eagerly stop after the first pattern fires.
6950b57cec5SDimitry Andric Changed = true;
6960b57cec5SDimitry Andric break;
6970b57cec5SDimitry Andric } else {
6980b57cec5SDimitry Andric // For big basic blocks, we only compute the full trace the first time
6990b57cec5SDimitry Andric // we hit this. We do not invalidate the trace, but instead update the
7000b57cec5SDimitry Andric // instruction depths incrementally.
7010b57cec5SDimitry Andric // NOTE: Only the instruction depths up to MI are accurate. All other
7020b57cec5SDimitry Andric // trace information is not updated.
703*fe013be4SDimitry Andric MachineTraceMetrics::Trace BlockTrace = TraceEnsemble->getTrace(MBB);
7040b57cec5SDimitry Andric Traces->verifyAnalysis();
7050b57cec5SDimitry Andric if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
7060b57cec5SDimitry Andric InstrIdxForVirtReg, P,
7070b57cec5SDimitry Andric !IncrementalUpdate) &&
7080b57cec5SDimitry Andric preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
7090b57cec5SDimitry Andric if (MBB->size() > inc_threshold) {
7100b57cec5SDimitry Andric // Use incremental depth updates for basic blocks above treshold
7110b57cec5SDimitry Andric IncrementalUpdate = true;
7120b57cec5SDimitry Andric LastUpdate = BlockIter;
7130b57cec5SDimitry Andric }
7140b57cec5SDimitry Andric
715*fe013be4SDimitry Andric insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
716e8d8bef9SDimitry Andric RegUnits, TII, P, IncrementalUpdate);
7170b57cec5SDimitry Andric
7180b57cec5SDimitry Andric // Eagerly stop after the first pattern fires.
7190b57cec5SDimitry Andric Changed = true;
7200b57cec5SDimitry Andric break;
7210b57cec5SDimitry Andric }
7220b57cec5SDimitry Andric // Cleanup instructions of the alternative code sequence. There is no
7230b57cec5SDimitry Andric // use for them.
7240b57cec5SDimitry Andric MachineFunction *MF = MBB->getParent();
7250b57cec5SDimitry Andric for (auto *InstrPtr : InsInstrs)
7260eae32dcSDimitry Andric MF->deleteMachineInstr(InstrPtr);
7270b57cec5SDimitry Andric }
7280b57cec5SDimitry Andric InstrIdxForVirtReg.clear();
7290b57cec5SDimitry Andric }
7300b57cec5SDimitry Andric }
7310b57cec5SDimitry Andric
7320b57cec5SDimitry Andric if (Changed && IncrementalUpdate)
7330b57cec5SDimitry Andric Traces->invalidate(MBB);
7340b57cec5SDimitry Andric return Changed;
7350b57cec5SDimitry Andric }
7360b57cec5SDimitry Andric
runOnMachineFunction(MachineFunction & MF)7370b57cec5SDimitry Andric bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
7380b57cec5SDimitry Andric STI = &MF.getSubtarget();
7390b57cec5SDimitry Andric TII = STI->getInstrInfo();
7400b57cec5SDimitry Andric TRI = STI->getRegisterInfo();
7410b57cec5SDimitry Andric SchedModel = STI->getSchedModel();
7420b57cec5SDimitry Andric TSchedModel.init(STI);
7430b57cec5SDimitry Andric MRI = &MF.getRegInfo();
7440b57cec5SDimitry Andric MLI = &getAnalysis<MachineLoopInfo>();
7450b57cec5SDimitry Andric Traces = &getAnalysis<MachineTraceMetrics>();
746480093f4SDimitry Andric PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
747480093f4SDimitry Andric MBFI = (PSI && PSI->hasProfileSummary()) ?
748480093f4SDimitry Andric &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
749480093f4SDimitry Andric nullptr;
750*fe013be4SDimitry Andric TraceEnsemble = nullptr;
7510b57cec5SDimitry Andric OptSize = MF.getFunction().hasOptSize();
752e8d8bef9SDimitry Andric RegClassInfo.runOnMachineFunction(MF);
7530b57cec5SDimitry Andric
7540b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
7550b57cec5SDimitry Andric if (!TII->useMachineCombiner()) {
7560b57cec5SDimitry Andric LLVM_DEBUG(
7570b57cec5SDimitry Andric dbgs()
7580b57cec5SDimitry Andric << " Skipping pass: Target does not support machine combiner\n");
7590b57cec5SDimitry Andric return false;
7600b57cec5SDimitry Andric }
7610b57cec5SDimitry Andric
7620b57cec5SDimitry Andric bool Changed = false;
7630b57cec5SDimitry Andric
7640b57cec5SDimitry Andric // Try to combine instructions.
7650b57cec5SDimitry Andric for (auto &MBB : MF)
7660b57cec5SDimitry Andric Changed |= combineInstructions(&MBB);
7670b57cec5SDimitry Andric
7680b57cec5SDimitry Andric return Changed;
7690b57cec5SDimitry Andric }
770