10b57cec5SDimitry Andric //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // The machine combiner pass uses machine trace metrics to ensure the combined
100b57cec5SDimitry Andric // instructions do not lengthen the critical path or the resource depth.
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
140b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
15480093f4SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
16480093f4SDimitry Andric #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
170b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
190b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
200b57cec5SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
22480093f4SDimitry Andric #include "llvm/CodeGen/MachineSizeOpts.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/MachineTraceMetrics.h"
240b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h"
25*af732203SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
30480093f4SDimitry Andric #include "llvm/InitializePasses.h"
310b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
320b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
330b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
340b57cec5SDimitry Andric
350b57cec5SDimitry Andric using namespace llvm;
360b57cec5SDimitry Andric
370b57cec5SDimitry Andric #define DEBUG_TYPE "machine-combiner"
380b57cec5SDimitry Andric
390b57cec5SDimitry Andric STATISTIC(NumInstCombined, "Number of machineinst combined");
400b57cec5SDimitry Andric
410b57cec5SDimitry Andric static cl::opt<unsigned>
420b57cec5SDimitry Andric inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
430b57cec5SDimitry Andric cl::desc("Incremental depth computation will be used for basic "
440b57cec5SDimitry Andric "blocks with more instructions."), cl::init(500));
450b57cec5SDimitry Andric
460b57cec5SDimitry Andric static cl::opt<bool> dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden,
470b57cec5SDimitry Andric cl::desc("Dump all substituted intrs"),
480b57cec5SDimitry Andric cl::init(false));
490b57cec5SDimitry Andric
500b57cec5SDimitry Andric #ifdef EXPENSIVE_CHECKS
510b57cec5SDimitry Andric static cl::opt<bool> VerifyPatternOrder(
520b57cec5SDimitry Andric "machine-combiner-verify-pattern-order", cl::Hidden,
530b57cec5SDimitry Andric cl::desc(
540b57cec5SDimitry Andric "Verify that the generated patterns are ordered by increasing latency"),
550b57cec5SDimitry Andric cl::init(true));
560b57cec5SDimitry Andric #else
570b57cec5SDimitry Andric static cl::opt<bool> VerifyPatternOrder(
580b57cec5SDimitry Andric "machine-combiner-verify-pattern-order", cl::Hidden,
590b57cec5SDimitry Andric cl::desc(
600b57cec5SDimitry Andric "Verify that the generated patterns are ordered by increasing latency"),
610b57cec5SDimitry Andric cl::init(false));
620b57cec5SDimitry Andric #endif
630b57cec5SDimitry Andric
640b57cec5SDimitry Andric namespace {
650b57cec5SDimitry Andric class MachineCombiner : public MachineFunctionPass {
660b57cec5SDimitry Andric const TargetSubtargetInfo *STI;
670b57cec5SDimitry Andric const TargetInstrInfo *TII;
680b57cec5SDimitry Andric const TargetRegisterInfo *TRI;
690b57cec5SDimitry Andric MCSchedModel SchedModel;
700b57cec5SDimitry Andric MachineRegisterInfo *MRI;
710b57cec5SDimitry Andric MachineLoopInfo *MLI; // Current MachineLoopInfo
720b57cec5SDimitry Andric MachineTraceMetrics *Traces;
730b57cec5SDimitry Andric MachineTraceMetrics::Ensemble *MinInstr;
74480093f4SDimitry Andric MachineBlockFrequencyInfo *MBFI;
75480093f4SDimitry Andric ProfileSummaryInfo *PSI;
76*af732203SDimitry Andric RegisterClassInfo RegClassInfo;
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric TargetSchedModel TSchedModel;
790b57cec5SDimitry Andric
800b57cec5SDimitry Andric /// True if optimizing for code size.
810b57cec5SDimitry Andric bool OptSize;
820b57cec5SDimitry Andric
830b57cec5SDimitry Andric public:
840b57cec5SDimitry Andric static char ID;
MachineCombiner()850b57cec5SDimitry Andric MachineCombiner() : MachineFunctionPass(ID) {
860b57cec5SDimitry Andric initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
870b57cec5SDimitry Andric }
880b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override;
890b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
getPassName() const900b57cec5SDimitry Andric StringRef getPassName() const override { return "Machine InstCombiner"; }
910b57cec5SDimitry Andric
920b57cec5SDimitry Andric private:
93480093f4SDimitry Andric bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize);
940b57cec5SDimitry Andric bool combineInstructions(MachineBasicBlock *);
950b57cec5SDimitry Andric MachineInstr *getOperandDef(const MachineOperand &MO);
960b57cec5SDimitry Andric unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
970b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
980b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace);
990b57cec5SDimitry Andric unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
1000b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace);
1010b57cec5SDimitry Andric bool
1020b57cec5SDimitry Andric improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
1030b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace,
1040b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
1050b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
1060b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
1070b57cec5SDimitry Andric MachineCombinerPattern Pattern, bool SlackIsAccurate);
108*af732203SDimitry Andric bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
109*af732203SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
110*af732203SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
111*af732203SDimitry Andric MachineCombinerPattern Pattern);
1120b57cec5SDimitry Andric bool preservesResourceLen(MachineBasicBlock *MBB,
1130b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace,
1140b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
1150b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs);
1160b57cec5SDimitry Andric void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
1170b57cec5SDimitry Andric SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
1180b57cec5SDimitry Andric std::pair<unsigned, unsigned>
1190b57cec5SDimitry Andric getLatenciesForInstrSequences(MachineInstr &MI,
1200b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
1210b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
1220b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace);
1230b57cec5SDimitry Andric
1240b57cec5SDimitry Andric void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root,
1250b57cec5SDimitry Andric SmallVector<MachineCombinerPattern, 16> &Patterns);
1260b57cec5SDimitry Andric };
1270b57cec5SDimitry Andric }
1280b57cec5SDimitry Andric
1290b57cec5SDimitry Andric char MachineCombiner::ID = 0;
1300b57cec5SDimitry Andric char &llvm::MachineCombinerID = MachineCombiner::ID;
1310b57cec5SDimitry Andric
1320b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE,
1330b57cec5SDimitry Andric "Machine InstCombiner", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)1340b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
1350b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
1360b57cec5SDimitry Andric INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner",
1370b57cec5SDimitry Andric false, false)
1380b57cec5SDimitry Andric
1390b57cec5SDimitry Andric void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
1400b57cec5SDimitry Andric AU.setPreservesCFG();
1410b57cec5SDimitry Andric AU.addPreserved<MachineDominatorTree>();
1420b57cec5SDimitry Andric AU.addRequired<MachineLoopInfo>();
1430b57cec5SDimitry Andric AU.addPreserved<MachineLoopInfo>();
1440b57cec5SDimitry Andric AU.addRequired<MachineTraceMetrics>();
1450b57cec5SDimitry Andric AU.addPreserved<MachineTraceMetrics>();
146480093f4SDimitry Andric AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
147480093f4SDimitry Andric AU.addRequired<ProfileSummaryInfoWrapperPass>();
1480b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
1490b57cec5SDimitry Andric }
1500b57cec5SDimitry Andric
getOperandDef(const MachineOperand & MO)1510b57cec5SDimitry Andric MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
1520b57cec5SDimitry Andric MachineInstr *DefInstr = nullptr;
1530b57cec5SDimitry Andric // We need a virtual register definition.
1548bcb0991SDimitry Andric if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
1550b57cec5SDimitry Andric DefInstr = MRI->getUniqueVRegDef(MO.getReg());
1560b57cec5SDimitry Andric // PHI's have no depth etc.
1570b57cec5SDimitry Andric if (DefInstr && DefInstr->isPHI())
1580b57cec5SDimitry Andric DefInstr = nullptr;
1590b57cec5SDimitry Andric return DefInstr;
1600b57cec5SDimitry Andric }
1610b57cec5SDimitry Andric
1620b57cec5SDimitry Andric /// Computes depth of instructions in vector \InsInstr.
1630b57cec5SDimitry Andric ///
1640b57cec5SDimitry Andric /// \param InsInstrs is a vector of machine instructions
1650b57cec5SDimitry Andric /// \param InstrIdxForVirtReg is a dense map of virtual register to index
1660b57cec5SDimitry Andric /// of defining machine instruction in \p InsInstrs
1670b57cec5SDimitry Andric /// \param BlockTrace is a trace of machine instructions
1680b57cec5SDimitry Andric ///
1690b57cec5SDimitry Andric /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
1700b57cec5SDimitry Andric unsigned
getDepth(SmallVectorImpl<MachineInstr * > & InsInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,MachineTraceMetrics::Trace BlockTrace)1710b57cec5SDimitry Andric MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
1720b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
1730b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace) {
1740b57cec5SDimitry Andric SmallVector<unsigned, 16> InstrDepth;
1750b57cec5SDimitry Andric assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
1760b57cec5SDimitry Andric "Missing machine model\n");
1770b57cec5SDimitry Andric
1780b57cec5SDimitry Andric // For each instruction in the new sequence compute the depth based on the
1790b57cec5SDimitry Andric // operands. Use the trace information when possible. For new operands which
1800b57cec5SDimitry Andric // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
1810b57cec5SDimitry Andric for (auto *InstrPtr : InsInstrs) { // for each Use
1820b57cec5SDimitry Andric unsigned IDepth = 0;
1830b57cec5SDimitry Andric for (const MachineOperand &MO : InstrPtr->operands()) {
1840b57cec5SDimitry Andric // Check for virtual register operand.
1858bcb0991SDimitry Andric if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
1860b57cec5SDimitry Andric continue;
1870b57cec5SDimitry Andric if (!MO.isUse())
1880b57cec5SDimitry Andric continue;
1890b57cec5SDimitry Andric unsigned DepthOp = 0;
1900b57cec5SDimitry Andric unsigned LatencyOp = 0;
1910b57cec5SDimitry Andric DenseMap<unsigned, unsigned>::iterator II =
1920b57cec5SDimitry Andric InstrIdxForVirtReg.find(MO.getReg());
1930b57cec5SDimitry Andric if (II != InstrIdxForVirtReg.end()) {
1940b57cec5SDimitry Andric // Operand is new virtual register not in trace
1950b57cec5SDimitry Andric assert(II->second < InstrDepth.size() && "Bad Index");
1960b57cec5SDimitry Andric MachineInstr *DefInstr = InsInstrs[II->second];
1970b57cec5SDimitry Andric assert(DefInstr &&
1980b57cec5SDimitry Andric "There must be a definition for a new virtual register");
1990b57cec5SDimitry Andric DepthOp = InstrDepth[II->second];
2000b57cec5SDimitry Andric int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg());
2010b57cec5SDimitry Andric int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg());
2020b57cec5SDimitry Andric LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
2030b57cec5SDimitry Andric InstrPtr, UseIdx);
2040b57cec5SDimitry Andric } else {
2050b57cec5SDimitry Andric MachineInstr *DefInstr = getOperandDef(MO);
2060b57cec5SDimitry Andric if (DefInstr) {
2070b57cec5SDimitry Andric DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
2080b57cec5SDimitry Andric LatencyOp = TSchedModel.computeOperandLatency(
2090b57cec5SDimitry Andric DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
2100b57cec5SDimitry Andric InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
2110b57cec5SDimitry Andric }
2120b57cec5SDimitry Andric }
2130b57cec5SDimitry Andric IDepth = std::max(IDepth, DepthOp + LatencyOp);
2140b57cec5SDimitry Andric }
2150b57cec5SDimitry Andric InstrDepth.push_back(IDepth);
2160b57cec5SDimitry Andric }
2170b57cec5SDimitry Andric unsigned NewRootIdx = InsInstrs.size() - 1;
2180b57cec5SDimitry Andric return InstrDepth[NewRootIdx];
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric
2210b57cec5SDimitry Andric /// Computes instruction latency as max of latency of defined operands.
2220b57cec5SDimitry Andric ///
2230b57cec5SDimitry Andric /// \param Root is a machine instruction that could be replaced by NewRoot.
2240b57cec5SDimitry Andric /// It is used to compute a more accurate latency information for NewRoot in
2250b57cec5SDimitry Andric /// case there is a dependent instruction in the same trace (\p BlockTrace)
2260b57cec5SDimitry Andric /// \param NewRoot is the instruction for which the latency is computed
2270b57cec5SDimitry Andric /// \param BlockTrace is a trace of machine instructions
2280b57cec5SDimitry Andric ///
2290b57cec5SDimitry Andric /// \returns Latency of \p NewRoot
getLatency(MachineInstr * Root,MachineInstr * NewRoot,MachineTraceMetrics::Trace BlockTrace)2300b57cec5SDimitry Andric unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
2310b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace) {
2320b57cec5SDimitry Andric assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
2330b57cec5SDimitry Andric "Missing machine model\n");
2340b57cec5SDimitry Andric
2350b57cec5SDimitry Andric // Check each definition in NewRoot and compute the latency
2360b57cec5SDimitry Andric unsigned NewRootLatency = 0;
2370b57cec5SDimitry Andric
2380b57cec5SDimitry Andric for (const MachineOperand &MO : NewRoot->operands()) {
2390b57cec5SDimitry Andric // Check for virtual register operand.
2408bcb0991SDimitry Andric if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
2410b57cec5SDimitry Andric continue;
2420b57cec5SDimitry Andric if (!MO.isDef())
2430b57cec5SDimitry Andric continue;
2440b57cec5SDimitry Andric // Get the first instruction that uses MO
2450b57cec5SDimitry Andric MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
2460b57cec5SDimitry Andric RI++;
2470b57cec5SDimitry Andric if (RI == MRI->reg_end())
2480b57cec5SDimitry Andric continue;
2490b57cec5SDimitry Andric MachineInstr *UseMO = RI->getParent();
2500b57cec5SDimitry Andric unsigned LatencyOp = 0;
2510b57cec5SDimitry Andric if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
2520b57cec5SDimitry Andric LatencyOp = TSchedModel.computeOperandLatency(
2530b57cec5SDimitry Andric NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
2540b57cec5SDimitry Andric UseMO->findRegisterUseOperandIdx(MO.getReg()));
2550b57cec5SDimitry Andric } else {
2560b57cec5SDimitry Andric LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
2570b57cec5SDimitry Andric }
2580b57cec5SDimitry Andric NewRootLatency = std::max(NewRootLatency, LatencyOp);
2590b57cec5SDimitry Andric }
2600b57cec5SDimitry Andric return NewRootLatency;
2610b57cec5SDimitry Andric }
2620b57cec5SDimitry Andric
2630b57cec5SDimitry Andric /// The combiner's goal may differ based on which pattern it is attempting
2640b57cec5SDimitry Andric /// to optimize.
2650b57cec5SDimitry Andric enum class CombinerObjective {
2660b57cec5SDimitry Andric MustReduceDepth, // The data dependency chain must be improved.
267*af732203SDimitry Andric MustReduceRegisterPressure, // The register pressure must be reduced.
2680b57cec5SDimitry Andric Default // The critical path must not be lengthened.
2690b57cec5SDimitry Andric };
2700b57cec5SDimitry Andric
getCombinerObjective(MachineCombinerPattern P)2710b57cec5SDimitry Andric static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
2720b57cec5SDimitry Andric // TODO: If C++ ever gets a real enum class, make this part of the
2730b57cec5SDimitry Andric // MachineCombinerPattern class.
2740b57cec5SDimitry Andric switch (P) {
2750b57cec5SDimitry Andric case MachineCombinerPattern::REASSOC_AX_BY:
2760b57cec5SDimitry Andric case MachineCombinerPattern::REASSOC_AX_YB:
2770b57cec5SDimitry Andric case MachineCombinerPattern::REASSOC_XA_BY:
2780b57cec5SDimitry Andric case MachineCombinerPattern::REASSOC_XA_YB:
2795ffd83dbSDimitry Andric case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
2805ffd83dbSDimitry Andric case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
2810b57cec5SDimitry Andric return CombinerObjective::MustReduceDepth;
282*af732203SDimitry Andric case MachineCombinerPattern::REASSOC_XY_BCA:
283*af732203SDimitry Andric case MachineCombinerPattern::REASSOC_XY_BAC:
284*af732203SDimitry Andric return CombinerObjective::MustReduceRegisterPressure;
2850b57cec5SDimitry Andric default:
2860b57cec5SDimitry Andric return CombinerObjective::Default;
2870b57cec5SDimitry Andric }
2880b57cec5SDimitry Andric }
2890b57cec5SDimitry Andric
2900b57cec5SDimitry Andric /// Estimate the latency of the new and original instruction sequence by summing
2910b57cec5SDimitry Andric /// up the latencies of the inserted and deleted instructions. This assumes
2920b57cec5SDimitry Andric /// that the inserted and deleted instructions are dependent instruction chains,
2930b57cec5SDimitry Andric /// which might not hold in all cases.
getLatenciesForInstrSequences(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,MachineTraceMetrics::Trace BlockTrace)2940b57cec5SDimitry Andric std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
2950b57cec5SDimitry Andric MachineInstr &MI, SmallVectorImpl<MachineInstr *> &InsInstrs,
2960b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
2970b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace) {
2980b57cec5SDimitry Andric assert(!InsInstrs.empty() && "Only support sequences that insert instrs.");
2990b57cec5SDimitry Andric unsigned NewRootLatency = 0;
3000b57cec5SDimitry Andric // NewRoot is the last instruction in the \p InsInstrs vector.
3010b57cec5SDimitry Andric MachineInstr *NewRoot = InsInstrs.back();
3020b57cec5SDimitry Andric for (unsigned i = 0; i < InsInstrs.size() - 1; i++)
3030b57cec5SDimitry Andric NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
3040b57cec5SDimitry Andric NewRootLatency += getLatency(&MI, NewRoot, BlockTrace);
3050b57cec5SDimitry Andric
3060b57cec5SDimitry Andric unsigned RootLatency = 0;
3070b57cec5SDimitry Andric for (auto I : DelInstrs)
3080b57cec5SDimitry Andric RootLatency += TSchedModel.computeInstrLatency(I);
3090b57cec5SDimitry Andric
3100b57cec5SDimitry Andric return {NewRootLatency, RootLatency};
3110b57cec5SDimitry Andric }
3120b57cec5SDimitry Andric
reduceRegisterPressure(MachineInstr & Root,MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,MachineCombinerPattern Pattern)313*af732203SDimitry Andric bool MachineCombiner::reduceRegisterPressure(
314*af732203SDimitry Andric MachineInstr &Root, MachineBasicBlock *MBB,
315*af732203SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
316*af732203SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
317*af732203SDimitry Andric MachineCombinerPattern Pattern) {
318*af732203SDimitry Andric // FIXME: for now, we don't do any check for the register pressure patterns.
319*af732203SDimitry Andric // We treat them as always profitable. But we can do better if we make
320*af732203SDimitry Andric // RegPressureTracker class be aware of TIE attribute. Then we can get an
321*af732203SDimitry Andric // accurate compare of register pressure with DelInstrs or InsInstrs.
322*af732203SDimitry Andric return true;
323*af732203SDimitry Andric }
324*af732203SDimitry Andric
3250b57cec5SDimitry Andric /// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
3260b57cec5SDimitry Andric /// The new code sequence ends in MI NewRoot. A necessary condition for the new
3270b57cec5SDimitry Andric /// sequence to replace the old sequence is that it cannot lengthen the critical
3280b57cec5SDimitry Andric /// path. The definition of "improve" may be restricted by specifying that the
3290b57cec5SDimitry Andric /// new path improves the data dependency chain (MustReduceDepth).
improvesCriticalPathLen(MachineBasicBlock * MBB,MachineInstr * Root,MachineTraceMetrics::Trace BlockTrace,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,MachineCombinerPattern Pattern,bool SlackIsAccurate)3300b57cec5SDimitry Andric bool MachineCombiner::improvesCriticalPathLen(
3310b57cec5SDimitry Andric MachineBasicBlock *MBB, MachineInstr *Root,
3320b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace,
3330b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
3340b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs,
3350b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
3360b57cec5SDimitry Andric MachineCombinerPattern Pattern,
3370b57cec5SDimitry Andric bool SlackIsAccurate) {
3380b57cec5SDimitry Andric assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
3390b57cec5SDimitry Andric "Missing machine model\n");
3400b57cec5SDimitry Andric // Get depth and latency of NewRoot and Root.
3410b57cec5SDimitry Andric unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
3420b57cec5SDimitry Andric unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
3430b57cec5SDimitry Andric
3440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: "
3450b57cec5SDimitry Andric << NewRootDepth << "\tRootDepth: " << RootDepth);
3460b57cec5SDimitry Andric
3470b57cec5SDimitry Andric // For a transform such as reassociation, the cost equation is
3480b57cec5SDimitry Andric // conservatively calculated so that we must improve the depth (data
3490b57cec5SDimitry Andric // dependency cycles) in the critical path to proceed with the transform.
3500b57cec5SDimitry Andric // Being conservative also protects against inaccuracies in the underlying
3510b57cec5SDimitry Andric // machine trace metrics and CPU models.
3520b57cec5SDimitry Andric if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) {
3530b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth ");
3540b57cec5SDimitry Andric LLVM_DEBUG(NewRootDepth < RootDepth
3550b57cec5SDimitry Andric ? dbgs() << "\t and it does it\n"
3560b57cec5SDimitry Andric : dbgs() << "\t but it does NOT do it\n");
3570b57cec5SDimitry Andric return NewRootDepth < RootDepth;
3580b57cec5SDimitry Andric }
3590b57cec5SDimitry Andric
3600b57cec5SDimitry Andric // A more flexible cost calculation for the critical path includes the slack
3610b57cec5SDimitry Andric // of the original code sequence. This may allow the transform to proceed
3620b57cec5SDimitry Andric // even if the instruction depths (data dependency cycles) become worse.
3630b57cec5SDimitry Andric
3640b57cec5SDimitry Andric // Account for the latency of the inserted and deleted instructions by
3650b57cec5SDimitry Andric unsigned NewRootLatency, RootLatency;
3660b57cec5SDimitry Andric std::tie(NewRootLatency, RootLatency) =
3670b57cec5SDimitry Andric getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
3680b57cec5SDimitry Andric
3690b57cec5SDimitry Andric unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
3700b57cec5SDimitry Andric unsigned NewCycleCount = NewRootDepth + NewRootLatency;
3710b57cec5SDimitry Andric unsigned OldCycleCount =
3720b57cec5SDimitry Andric RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
3730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency
3740b57cec5SDimitry Andric << "\tRootLatency: " << RootLatency << "\n\tRootSlack: "
3750b57cec5SDimitry Andric << RootSlack << " SlackIsAccurate=" << SlackIsAccurate
3760b57cec5SDimitry Andric << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
3770b57cec5SDimitry Andric << "\n\tRootDepth + RootLatency + RootSlack = "
3780b57cec5SDimitry Andric << OldCycleCount;);
3790b57cec5SDimitry Andric LLVM_DEBUG(NewCycleCount <= OldCycleCount
3800b57cec5SDimitry Andric ? dbgs() << "\n\t It IMPROVES PathLen because"
3810b57cec5SDimitry Andric : dbgs() << "\n\t It DOES NOT improve PathLen because");
3820b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount
3830b57cec5SDimitry Andric << ", OldCycleCount = " << OldCycleCount << "\n");
3840b57cec5SDimitry Andric
3850b57cec5SDimitry Andric return NewCycleCount <= OldCycleCount;
3860b57cec5SDimitry Andric }
3870b57cec5SDimitry Andric
3880b57cec5SDimitry Andric /// helper routine to convert instructions into SC
instr2instrSC(SmallVectorImpl<MachineInstr * > & Instrs,SmallVectorImpl<const MCSchedClassDesc * > & InstrsSC)3890b57cec5SDimitry Andric void MachineCombiner::instr2instrSC(
3900b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &Instrs,
3910b57cec5SDimitry Andric SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
3920b57cec5SDimitry Andric for (auto *InstrPtr : Instrs) {
3930b57cec5SDimitry Andric unsigned Opc = InstrPtr->getOpcode();
3940b57cec5SDimitry Andric unsigned Idx = TII->get(Opc).getSchedClass();
3950b57cec5SDimitry Andric const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
3960b57cec5SDimitry Andric InstrsSC.push_back(SC);
3970b57cec5SDimitry Andric }
3980b57cec5SDimitry Andric }
3990b57cec5SDimitry Andric
4000b57cec5SDimitry Andric /// True when the new instructions do not increase resource length
preservesResourceLen(MachineBasicBlock * MBB,MachineTraceMetrics::Trace BlockTrace,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs)4010b57cec5SDimitry Andric bool MachineCombiner::preservesResourceLen(
4020b57cec5SDimitry Andric MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
4030b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs,
4040b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs) {
4050b57cec5SDimitry Andric if (!TSchedModel.hasInstrSchedModel())
4060b57cec5SDimitry Andric return true;
4070b57cec5SDimitry Andric
4080b57cec5SDimitry Andric // Compute current resource length
4090b57cec5SDimitry Andric
4100b57cec5SDimitry Andric //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
4110b57cec5SDimitry Andric SmallVector <const MachineBasicBlock *, 1> MBBarr;
4120b57cec5SDimitry Andric MBBarr.push_back(MBB);
4130b57cec5SDimitry Andric unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
4140b57cec5SDimitry Andric
4150b57cec5SDimitry Andric // Deal with SC rather than Instructions.
4160b57cec5SDimitry Andric SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
4170b57cec5SDimitry Andric SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
4180b57cec5SDimitry Andric
4190b57cec5SDimitry Andric instr2instrSC(InsInstrs, InsInstrsSC);
4200b57cec5SDimitry Andric instr2instrSC(DelInstrs, DelInstrsSC);
4210b57cec5SDimitry Andric
4220b57cec5SDimitry Andric ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
4230b57cec5SDimitry Andric ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
4240b57cec5SDimitry Andric
4250b57cec5SDimitry Andric // Compute new resource length.
4260b57cec5SDimitry Andric unsigned ResLenAfterCombine =
4270b57cec5SDimitry Andric BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
4280b57cec5SDimitry Andric
4290b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: "
4300b57cec5SDimitry Andric << ResLenBeforeCombine
4310b57cec5SDimitry Andric << " and after: " << ResLenAfterCombine << "\n";);
4320b57cec5SDimitry Andric LLVM_DEBUG(
4335ffd83dbSDimitry Andric ResLenAfterCombine <=
4345ffd83dbSDimitry Andric ResLenBeforeCombine + TII->getExtendResourceLenLimit()
4350b57cec5SDimitry Andric ? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n"
4360b57cec5SDimitry Andric : dbgs() << "\t\t As result it DOES NOT improve/preserve Resource "
4370b57cec5SDimitry Andric "Length\n");
4380b57cec5SDimitry Andric
4395ffd83dbSDimitry Andric return ResLenAfterCombine <=
4405ffd83dbSDimitry Andric ResLenBeforeCombine + TII->getExtendResourceLenLimit();
4410b57cec5SDimitry Andric }
4420b57cec5SDimitry Andric
4430b57cec5SDimitry Andric /// \returns true when new instruction sequence should be generated
4440b57cec5SDimitry Andric /// independent if it lengthens critical path or not
doSubstitute(unsigned NewSize,unsigned OldSize,bool OptForSize)445480093f4SDimitry Andric bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize,
446480093f4SDimitry Andric bool OptForSize) {
447480093f4SDimitry Andric if (OptForSize && (NewSize < OldSize))
4480b57cec5SDimitry Andric return true;
4490b57cec5SDimitry Andric if (!TSchedModel.hasInstrSchedModelOrItineraries())
4500b57cec5SDimitry Andric return true;
4510b57cec5SDimitry Andric return false;
4520b57cec5SDimitry Andric }
4530b57cec5SDimitry Andric
4540b57cec5SDimitry Andric /// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
4550b57cec5SDimitry Andric /// depths if requested.
4560b57cec5SDimitry Andric ///
4570b57cec5SDimitry Andric /// \param MBB basic block to insert instructions in
4580b57cec5SDimitry Andric /// \param MI current machine instruction
4590b57cec5SDimitry Andric /// \param InsInstrs new instructions to insert in \p MBB
4600b57cec5SDimitry Andric /// \param DelInstrs instruction to delete from \p MBB
4610b57cec5SDimitry Andric /// \param MinInstr is a pointer to the machine trace information
4620b57cec5SDimitry Andric /// \param RegUnits set of live registers, needed to compute instruction depths
463*af732203SDimitry Andric /// \param TII is target instruction info, used to call target hook
464*af732203SDimitry Andric /// \param Pattern is used to call target hook finalizeInsInstrs
4650b57cec5SDimitry Andric /// \param IncrementalUpdate if true, compute instruction depths incrementally,
4660b57cec5SDimitry Andric /// otherwise invalidate the trace
insertDeleteInstructions(MachineBasicBlock * MBB,MachineInstr & MI,SmallVector<MachineInstr *,16> InsInstrs,SmallVector<MachineInstr *,16> DelInstrs,MachineTraceMetrics::Ensemble * MinInstr,SparseSet<LiveRegUnit> & RegUnits,const TargetInstrInfo * TII,MachineCombinerPattern Pattern,bool IncrementalUpdate)4670b57cec5SDimitry Andric static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
4680b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> InsInstrs,
4690b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> DelInstrs,
4700b57cec5SDimitry Andric MachineTraceMetrics::Ensemble *MinInstr,
4710b57cec5SDimitry Andric SparseSet<LiveRegUnit> &RegUnits,
472*af732203SDimitry Andric const TargetInstrInfo *TII,
473*af732203SDimitry Andric MachineCombinerPattern Pattern,
4740b57cec5SDimitry Andric bool IncrementalUpdate) {
475*af732203SDimitry Andric // If we want to fix up some placeholder for some target, do it now.
476*af732203SDimitry Andric // We need this because in genAlternativeCodeSequence, we have not decided the
477*af732203SDimitry Andric // better pattern InsInstrs or DelInstrs, so we don't want generate some
478*af732203SDimitry Andric // sideeffect to the function. For example we need to delay the constant pool
479*af732203SDimitry Andric // entry creation here after InsInstrs is selected as better pattern.
480*af732203SDimitry Andric // Otherwise the constant pool entry created for InsInstrs will not be deleted
481*af732203SDimitry Andric // even if InsInstrs is not the better pattern.
482*af732203SDimitry Andric TII->finalizeInsInstrs(MI, Pattern, InsInstrs);
483*af732203SDimitry Andric
4840b57cec5SDimitry Andric for (auto *InstrPtr : InsInstrs)
4850b57cec5SDimitry Andric MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
4860b57cec5SDimitry Andric
4870b57cec5SDimitry Andric for (auto *InstrPtr : DelInstrs) {
4880b57cec5SDimitry Andric InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
4890b57cec5SDimitry Andric // Erase all LiveRegs defined by the removed instruction
4900b57cec5SDimitry Andric for (auto I = RegUnits.begin(); I != RegUnits.end(); ) {
4910b57cec5SDimitry Andric if (I->MI == InstrPtr)
4920b57cec5SDimitry Andric I = RegUnits.erase(I);
4930b57cec5SDimitry Andric else
4940b57cec5SDimitry Andric I++;
4950b57cec5SDimitry Andric }
4960b57cec5SDimitry Andric }
4970b57cec5SDimitry Andric
4980b57cec5SDimitry Andric if (IncrementalUpdate)
4990b57cec5SDimitry Andric for (auto *InstrPtr : InsInstrs)
5000b57cec5SDimitry Andric MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
5010b57cec5SDimitry Andric else
5020b57cec5SDimitry Andric MinInstr->invalidate(MBB);
5030b57cec5SDimitry Andric
5040b57cec5SDimitry Andric NumInstCombined++;
5050b57cec5SDimitry Andric }
5060b57cec5SDimitry Andric
5070b57cec5SDimitry Andric // Check that the difference between original and new latency is decreasing for
5080b57cec5SDimitry Andric // later patterns. This helps to discover sub-optimal pattern orderings.
verifyPatternOrder(MachineBasicBlock * MBB,MachineInstr & Root,SmallVector<MachineCombinerPattern,16> & Patterns)5090b57cec5SDimitry Andric void MachineCombiner::verifyPatternOrder(
5100b57cec5SDimitry Andric MachineBasicBlock *MBB, MachineInstr &Root,
5110b57cec5SDimitry Andric SmallVector<MachineCombinerPattern, 16> &Patterns) {
5120b57cec5SDimitry Andric long PrevLatencyDiff = std::numeric_limits<long>::max();
5130b57cec5SDimitry Andric (void)PrevLatencyDiff; // Variable is used in assert only.
5140b57cec5SDimitry Andric for (auto P : Patterns) {
5150b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> InsInstrs;
5160b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> DelInstrs;
5170b57cec5SDimitry Andric DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
5180b57cec5SDimitry Andric TII->genAlternativeCodeSequence(Root, P, InsInstrs, DelInstrs,
5190b57cec5SDimitry Andric InstrIdxForVirtReg);
5200b57cec5SDimitry Andric // Found pattern, but did not generate alternative sequence.
5210b57cec5SDimitry Andric // This can happen e.g. when an immediate could not be materialized
5220b57cec5SDimitry Andric // in a single instruction.
5230b57cec5SDimitry Andric if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
5240b57cec5SDimitry Andric continue;
5250b57cec5SDimitry Andric
5260b57cec5SDimitry Andric unsigned NewRootLatency, RootLatency;
5270b57cec5SDimitry Andric std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
5280b57cec5SDimitry Andric Root, InsInstrs, DelInstrs, MinInstr->getTrace(MBB));
5290b57cec5SDimitry Andric long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
5300b57cec5SDimitry Andric assert(CurrentLatencyDiff <= PrevLatencyDiff &&
5310b57cec5SDimitry Andric "Current pattern is better than previous pattern.");
5320b57cec5SDimitry Andric PrevLatencyDiff = CurrentLatencyDiff;
5330b57cec5SDimitry Andric }
5340b57cec5SDimitry Andric }
5350b57cec5SDimitry Andric
5360b57cec5SDimitry Andric /// Substitute a slow code sequence with a faster one by
5370b57cec5SDimitry Andric /// evaluating instruction combining pattern.
5380b57cec5SDimitry Andric /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
5390b57cec5SDimitry Andric /// combining based on machine trace metrics. Only combine a sequence of
5400b57cec5SDimitry Andric /// instructions when this neither lengthens the critical path nor increases
5410b57cec5SDimitry Andric /// resource pressure. When optimizing for codesize always combine when the new
5420b57cec5SDimitry Andric /// sequence is shorter.
combineInstructions(MachineBasicBlock * MBB)5430b57cec5SDimitry Andric bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
5440b57cec5SDimitry Andric bool Changed = false;
5450b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
5460b57cec5SDimitry Andric
5470b57cec5SDimitry Andric bool IncrementalUpdate = false;
5480b57cec5SDimitry Andric auto BlockIter = MBB->begin();
5490b57cec5SDimitry Andric decltype(BlockIter) LastUpdate;
5500b57cec5SDimitry Andric // Check if the block is in a loop.
5510b57cec5SDimitry Andric const MachineLoop *ML = MLI->getLoopFor(MBB);
5520b57cec5SDimitry Andric if (!MinInstr)
5530b57cec5SDimitry Andric MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
5540b57cec5SDimitry Andric
5550b57cec5SDimitry Andric SparseSet<LiveRegUnit> RegUnits;
5560b57cec5SDimitry Andric RegUnits.setUniverse(TRI->getNumRegUnits());
5570b57cec5SDimitry Andric
558480093f4SDimitry Andric bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
559480093f4SDimitry Andric
560*af732203SDimitry Andric bool DoRegPressureReduce =
561*af732203SDimitry Andric TII->shouldReduceRegisterPressure(MBB, &RegClassInfo);
562*af732203SDimitry Andric
5630b57cec5SDimitry Andric while (BlockIter != MBB->end()) {
5640b57cec5SDimitry Andric auto &MI = *BlockIter++;
5650b57cec5SDimitry Andric SmallVector<MachineCombinerPattern, 16> Patterns;
5660b57cec5SDimitry Andric // The motivating example is:
5670b57cec5SDimitry Andric //
5680b57cec5SDimitry Andric // MUL Other MUL_op1 MUL_op2 Other
5690b57cec5SDimitry Andric // \ / \ | /
5700b57cec5SDimitry Andric // ADD/SUB => MADD/MSUB
5710b57cec5SDimitry Andric // (=Root) (=NewRoot)
5720b57cec5SDimitry Andric
5730b57cec5SDimitry Andric // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
5740b57cec5SDimitry Andric // usually beneficial for code size it unfortunately can hurt performance
5750b57cec5SDimitry Andric // when the ADD is on the critical path, but the MUL is not. With the
5760b57cec5SDimitry Andric // substitution the MUL becomes part of the critical path (in form of the
5770b57cec5SDimitry Andric // MADD) and can lengthen it on architectures where the MADD latency is
5780b57cec5SDimitry Andric // longer than the ADD latency.
5790b57cec5SDimitry Andric //
5800b57cec5SDimitry Andric // For each instruction we check if it can be the root of a combiner
5810b57cec5SDimitry Andric // pattern. Then for each pattern the new code sequence in form of MI is
5820b57cec5SDimitry Andric // generated and evaluated. When the efficiency criteria (don't lengthen
5830b57cec5SDimitry Andric // critical path, don't use more resources) is met the new sequence gets
5840b57cec5SDimitry Andric // hooked up into the basic block before the old sequence is removed.
5850b57cec5SDimitry Andric //
5860b57cec5SDimitry Andric // The algorithm does not try to evaluate all patterns and pick the best.
5870b57cec5SDimitry Andric // This is only an artificial restriction though. In practice there is
5880b57cec5SDimitry Andric // mostly one pattern, and getMachineCombinerPatterns() can order patterns
5890b57cec5SDimitry Andric // based on an internal cost heuristic. If
5900b57cec5SDimitry Andric // machine-combiner-verify-pattern-order is enabled, all patterns are
5910b57cec5SDimitry Andric // checked to ensure later patterns do not provide better latency savings.
5920b57cec5SDimitry Andric
593*af732203SDimitry Andric if (!TII->getMachineCombinerPatterns(MI, Patterns, DoRegPressureReduce))
5940b57cec5SDimitry Andric continue;
5950b57cec5SDimitry Andric
5960b57cec5SDimitry Andric if (VerifyPatternOrder)
5970b57cec5SDimitry Andric verifyPatternOrder(MBB, MI, Patterns);
5980b57cec5SDimitry Andric
5990b57cec5SDimitry Andric for (auto P : Patterns) {
6000b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> InsInstrs;
6010b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> DelInstrs;
6020b57cec5SDimitry Andric DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
6030b57cec5SDimitry Andric TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
6040b57cec5SDimitry Andric InstrIdxForVirtReg);
6050b57cec5SDimitry Andric unsigned NewInstCount = InsInstrs.size();
6060b57cec5SDimitry Andric unsigned OldInstCount = DelInstrs.size();
6070b57cec5SDimitry Andric // Found pattern, but did not generate alternative sequence.
6080b57cec5SDimitry Andric // This can happen e.g. when an immediate could not be materialized
6090b57cec5SDimitry Andric // in a single instruction.
6100b57cec5SDimitry Andric if (!NewInstCount)
6110b57cec5SDimitry Andric continue;
6120b57cec5SDimitry Andric
6130b57cec5SDimitry Andric LLVM_DEBUG(if (dump_intrs) {
6140b57cec5SDimitry Andric dbgs() << "\tFor the Pattern (" << (int)P
6150b57cec5SDimitry Andric << ") these instructions could be removed\n";
6160b57cec5SDimitry Andric for (auto const *InstrPtr : DelInstrs)
6170b57cec5SDimitry Andric InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
6180b57cec5SDimitry Andric /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
6190b57cec5SDimitry Andric dbgs() << "\tThese instructions could replace the removed ones\n";
6200b57cec5SDimitry Andric for (auto const *InstrPtr : InsInstrs)
6210b57cec5SDimitry Andric InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
6220b57cec5SDimitry Andric /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
6230b57cec5SDimitry Andric });
6240b57cec5SDimitry Andric
6250b57cec5SDimitry Andric bool SubstituteAlways = false;
6260b57cec5SDimitry Andric if (ML && TII->isThroughputPattern(P))
6270b57cec5SDimitry Andric SubstituteAlways = true;
6280b57cec5SDimitry Andric
629*af732203SDimitry Andric if (IncrementalUpdate && LastUpdate != BlockIter) {
6300b57cec5SDimitry Andric // Update depths since the last incremental update.
6310b57cec5SDimitry Andric MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
6320b57cec5SDimitry Andric LastUpdate = BlockIter;
6330b57cec5SDimitry Andric }
6340b57cec5SDimitry Andric
635*af732203SDimitry Andric if (DoRegPressureReduce &&
636*af732203SDimitry Andric getCombinerObjective(P) ==
637*af732203SDimitry Andric CombinerObjective::MustReduceRegisterPressure) {
638*af732203SDimitry Andric if (MBB->size() > inc_threshold) {
639*af732203SDimitry Andric // Use incremental depth updates for basic blocks above threshold
640*af732203SDimitry Andric IncrementalUpdate = true;
641*af732203SDimitry Andric LastUpdate = BlockIter;
642*af732203SDimitry Andric }
643*af732203SDimitry Andric if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) {
644*af732203SDimitry Andric // Replace DelInstrs with InsInstrs.
645*af732203SDimitry Andric insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
646*af732203SDimitry Andric RegUnits, TII, P, IncrementalUpdate);
647*af732203SDimitry Andric Changed |= true;
648*af732203SDimitry Andric
649*af732203SDimitry Andric // Go back to previous instruction as it may have ILP reassociation
650*af732203SDimitry Andric // opportunity.
651*af732203SDimitry Andric BlockIter--;
652*af732203SDimitry Andric break;
653*af732203SDimitry Andric }
654*af732203SDimitry Andric }
655*af732203SDimitry Andric
6560b57cec5SDimitry Andric // Substitute when we optimize for codesize and the new sequence has
6570b57cec5SDimitry Andric // fewer instructions OR
6580b57cec5SDimitry Andric // the new sequence neither lengthens the critical path nor increases
6590b57cec5SDimitry Andric // resource pressure.
660480093f4SDimitry Andric if (SubstituteAlways ||
661480093f4SDimitry Andric doSubstitute(NewInstCount, OldInstCount, OptForSize)) {
6620b57cec5SDimitry Andric insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
663*af732203SDimitry Andric RegUnits, TII, P, IncrementalUpdate);
6640b57cec5SDimitry Andric // Eagerly stop after the first pattern fires.
6650b57cec5SDimitry Andric Changed = true;
6660b57cec5SDimitry Andric break;
6670b57cec5SDimitry Andric } else {
6680b57cec5SDimitry Andric // For big basic blocks, we only compute the full trace the first time
6690b57cec5SDimitry Andric // we hit this. We do not invalidate the trace, but instead update the
6700b57cec5SDimitry Andric // instruction depths incrementally.
6710b57cec5SDimitry Andric // NOTE: Only the instruction depths up to MI are accurate. All other
6720b57cec5SDimitry Andric // trace information is not updated.
6730b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
6740b57cec5SDimitry Andric Traces->verifyAnalysis();
6750b57cec5SDimitry Andric if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
6760b57cec5SDimitry Andric InstrIdxForVirtReg, P,
6770b57cec5SDimitry Andric !IncrementalUpdate) &&
6780b57cec5SDimitry Andric preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
6790b57cec5SDimitry Andric if (MBB->size() > inc_threshold) {
6800b57cec5SDimitry Andric // Use incremental depth updates for basic blocks above treshold
6810b57cec5SDimitry Andric IncrementalUpdate = true;
6820b57cec5SDimitry Andric LastUpdate = BlockIter;
6830b57cec5SDimitry Andric }
6840b57cec5SDimitry Andric
6850b57cec5SDimitry Andric insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
686*af732203SDimitry Andric RegUnits, TII, P, IncrementalUpdate);
6870b57cec5SDimitry Andric
6880b57cec5SDimitry Andric // Eagerly stop after the first pattern fires.
6890b57cec5SDimitry Andric Changed = true;
6900b57cec5SDimitry Andric break;
6910b57cec5SDimitry Andric }
6920b57cec5SDimitry Andric // Cleanup instructions of the alternative code sequence. There is no
6930b57cec5SDimitry Andric // use for them.
6940b57cec5SDimitry Andric MachineFunction *MF = MBB->getParent();
6950b57cec5SDimitry Andric for (auto *InstrPtr : InsInstrs)
6960b57cec5SDimitry Andric MF->DeleteMachineInstr(InstrPtr);
6970b57cec5SDimitry Andric }
6980b57cec5SDimitry Andric InstrIdxForVirtReg.clear();
6990b57cec5SDimitry Andric }
7000b57cec5SDimitry Andric }
7010b57cec5SDimitry Andric
7020b57cec5SDimitry Andric if (Changed && IncrementalUpdate)
7030b57cec5SDimitry Andric Traces->invalidate(MBB);
7040b57cec5SDimitry Andric return Changed;
7050b57cec5SDimitry Andric }
7060b57cec5SDimitry Andric
runOnMachineFunction(MachineFunction & MF)7070b57cec5SDimitry Andric bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
7080b57cec5SDimitry Andric STI = &MF.getSubtarget();
7090b57cec5SDimitry Andric TII = STI->getInstrInfo();
7100b57cec5SDimitry Andric TRI = STI->getRegisterInfo();
7110b57cec5SDimitry Andric SchedModel = STI->getSchedModel();
7120b57cec5SDimitry Andric TSchedModel.init(STI);
7130b57cec5SDimitry Andric MRI = &MF.getRegInfo();
7140b57cec5SDimitry Andric MLI = &getAnalysis<MachineLoopInfo>();
7150b57cec5SDimitry Andric Traces = &getAnalysis<MachineTraceMetrics>();
716480093f4SDimitry Andric PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
717480093f4SDimitry Andric MBFI = (PSI && PSI->hasProfileSummary()) ?
718480093f4SDimitry Andric &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
719480093f4SDimitry Andric nullptr;
7200b57cec5SDimitry Andric MinInstr = nullptr;
7210b57cec5SDimitry Andric OptSize = MF.getFunction().hasOptSize();
722*af732203SDimitry Andric RegClassInfo.runOnMachineFunction(MF);
7230b57cec5SDimitry Andric
7240b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
7250b57cec5SDimitry Andric if (!TII->useMachineCombiner()) {
7260b57cec5SDimitry Andric LLVM_DEBUG(
7270b57cec5SDimitry Andric dbgs()
7280b57cec5SDimitry Andric << " Skipping pass: Target does not support machine combiner\n");
7290b57cec5SDimitry Andric return false;
7300b57cec5SDimitry Andric }
7310b57cec5SDimitry Andric
7320b57cec5SDimitry Andric bool Changed = false;
7330b57cec5SDimitry Andric
7340b57cec5SDimitry Andric // Try to combine instructions.
7350b57cec5SDimitry Andric for (auto &MBB : MF)
7360b57cec5SDimitry Andric Changed |= combineInstructions(&MBB);
7370b57cec5SDimitry Andric
7380b57cec5SDimitry Andric return Changed;
7390b57cec5SDimitry Andric }
740