1 //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The machine combiner pass uses machine trace metrics to ensure the combined 11 // instructions do not lengthen the critical path or the resource depth. 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/CodeGen/MachineDominators.h" 17 #include "llvm/CodeGen/MachineFunction.h" 18 #include "llvm/CodeGen/MachineFunctionPass.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/CodeGen/MachineLoopInfo.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/MachineTraceMetrics.h" 23 #include "llvm/CodeGen/Passes.h" 24 #include "llvm/CodeGen/TargetSchedule.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include "llvm/Target/TargetInstrInfo.h" 29 #include "llvm/Target/TargetRegisterInfo.h" 30 #include "llvm/Target/TargetSubtargetInfo.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "machine-combiner" 35 36 STATISTIC(NumInstCombined, "Number of machineinst combined"); 37 38 static cl::opt<unsigned> 39 inc_threshold("machine-combiner-inc-threshold", cl::Hidden, 40 cl::desc("Incremental depth computation will be used for basic " 41 "blocks with more instructions."), cl::init(500)); 42 43 namespace { 44 class MachineCombiner : public MachineFunctionPass { 45 const TargetInstrInfo *TII; 46 const TargetRegisterInfo *TRI; 47 MCSchedModel SchedModel; 48 MachineRegisterInfo *MRI; 49 MachineLoopInfo *MLI; // Current MachineLoopInfo 50 MachineTraceMetrics *Traces; 51 MachineTraceMetrics::Ensemble *MinInstr; 52 53 TargetSchedModel TSchedModel; 54 55 /// True if optimizing for code size. 56 bool OptSize; 57 58 public: 59 static char ID; 60 MachineCombiner() : MachineFunctionPass(ID) { 61 initializeMachineCombinerPass(*PassRegistry::getPassRegistry()); 62 } 63 void getAnalysisUsage(AnalysisUsage &AU) const override; 64 bool runOnMachineFunction(MachineFunction &MF) override; 65 StringRef getPassName() const override { return "Machine InstCombiner"; } 66 67 private: 68 bool doSubstitute(unsigned NewSize, unsigned OldSize); 69 bool combineInstructions(MachineBasicBlock *); 70 MachineInstr *getOperandDef(const MachineOperand &MO); 71 unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, 72 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 73 MachineTraceMetrics::Trace BlockTrace); 74 unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, 75 MachineTraceMetrics::Trace BlockTrace); 76 bool 77 improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, 78 MachineTraceMetrics::Trace BlockTrace, 79 SmallVectorImpl<MachineInstr *> &InsInstrs, 80 SmallVectorImpl<MachineInstr *> &DelInstrs, 81 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 82 MachineCombinerPattern Pattern, bool SlackIsAccurate); 83 bool preservesResourceLen(MachineBasicBlock *MBB, 84 MachineTraceMetrics::Trace BlockTrace, 85 SmallVectorImpl<MachineInstr *> &InsInstrs, 86 SmallVectorImpl<MachineInstr *> &DelInstrs); 87 void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs, 88 SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC); 89 }; 90 } 91 92 char MachineCombiner::ID = 0; 93 char &llvm::MachineCombinerID = MachineCombiner::ID; 94 95 INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE, 96 "Machine InstCombiner", false, false) 97 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 98 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) 99 INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", 100 false, false) 101 102 void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 103 AU.setPreservesCFG(); 104 AU.addPreserved<MachineDominatorTree>(); 105 AU.addRequired<MachineLoopInfo>(); 106 AU.addPreserved<MachineLoopInfo>(); 107 AU.addRequired<MachineTraceMetrics>(); 108 AU.addPreserved<MachineTraceMetrics>(); 109 MachineFunctionPass::getAnalysisUsage(AU); 110 } 111 112 MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { 113 MachineInstr *DefInstr = nullptr; 114 // We need a virtual register definition. 115 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) 116 DefInstr = MRI->getUniqueVRegDef(MO.getReg()); 117 // PHI's have no depth etc. 118 if (DefInstr && DefInstr->isPHI()) 119 DefInstr = nullptr; 120 return DefInstr; 121 } 122 123 /// Computes depth of instructions in vector \InsInstr. 124 /// 125 /// \param InsInstrs is a vector of machine instructions 126 /// \param InstrIdxForVirtReg is a dense map of virtual register to index 127 /// of defining machine instruction in \p InsInstrs 128 /// \param BlockTrace is a trace of machine instructions 129 /// 130 /// \returns Depth of last instruction in \InsInstrs ("NewRoot") 131 unsigned 132 MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, 133 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 134 MachineTraceMetrics::Trace BlockTrace) { 135 SmallVector<unsigned, 16> InstrDepth; 136 assert(TSchedModel.hasInstrSchedModelOrItineraries() && 137 "Missing machine model\n"); 138 139 // For each instruction in the new sequence compute the depth based on the 140 // operands. Use the trace information when possible. For new operands which 141 // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth 142 for (auto *InstrPtr : InsInstrs) { // for each Use 143 unsigned IDepth = 0; 144 DEBUG(dbgs() << "NEW INSTR "; 145 InstrPtr->print(dbgs(), TII); 146 dbgs() << "\n";); 147 for (const MachineOperand &MO : InstrPtr->operands()) { 148 // Check for virtual register operand. 149 if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) 150 continue; 151 if (!MO.isUse()) 152 continue; 153 unsigned DepthOp = 0; 154 unsigned LatencyOp = 0; 155 DenseMap<unsigned, unsigned>::iterator II = 156 InstrIdxForVirtReg.find(MO.getReg()); 157 if (II != InstrIdxForVirtReg.end()) { 158 // Operand is new virtual register not in trace 159 assert(II->second < InstrDepth.size() && "Bad Index"); 160 MachineInstr *DefInstr = InsInstrs[II->second]; 161 assert(DefInstr && 162 "There must be a definition for a new virtual register"); 163 DepthOp = InstrDepth[II->second]; 164 LatencyOp = TSchedModel.computeOperandLatency( 165 DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), 166 InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); 167 } else { 168 MachineInstr *DefInstr = getOperandDef(MO); 169 if (DefInstr) { 170 DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth; 171 LatencyOp = TSchedModel.computeOperandLatency( 172 DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), 173 InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); 174 } 175 } 176 IDepth = std::max(IDepth, DepthOp + LatencyOp); 177 } 178 InstrDepth.push_back(IDepth); 179 } 180 unsigned NewRootIdx = InsInstrs.size() - 1; 181 return InstrDepth[NewRootIdx]; 182 } 183 184 /// Computes instruction latency as max of latency of defined operands. 185 /// 186 /// \param Root is a machine instruction that could be replaced by NewRoot. 187 /// It is used to compute a more accurate latency information for NewRoot in 188 /// case there is a dependent instruction in the same trace (\p BlockTrace) 189 /// \param NewRoot is the instruction for which the latency is computed 190 /// \param BlockTrace is a trace of machine instructions 191 /// 192 /// \returns Latency of \p NewRoot 193 unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, 194 MachineTraceMetrics::Trace BlockTrace) { 195 assert(TSchedModel.hasInstrSchedModelOrItineraries() && 196 "Missing machine model\n"); 197 198 // Check each definition in NewRoot and compute the latency 199 unsigned NewRootLatency = 0; 200 201 for (const MachineOperand &MO : NewRoot->operands()) { 202 // Check for virtual register operand. 203 if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) 204 continue; 205 if (!MO.isDef()) 206 continue; 207 // Get the first instruction that uses MO 208 MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg()); 209 RI++; 210 MachineInstr *UseMO = RI->getParent(); 211 unsigned LatencyOp = 0; 212 if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) { 213 LatencyOp = TSchedModel.computeOperandLatency( 214 NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, 215 UseMO->findRegisterUseOperandIdx(MO.getReg())); 216 } else { 217 LatencyOp = TSchedModel.computeInstrLatency(NewRoot); 218 } 219 NewRootLatency = std::max(NewRootLatency, LatencyOp); 220 } 221 return NewRootLatency; 222 } 223 224 /// The combiner's goal may differ based on which pattern it is attempting 225 /// to optimize. 226 enum class CombinerObjective { 227 MustReduceDepth, // The data dependency chain must be improved. 228 Default // The critical path must not be lengthened. 229 }; 230 231 static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { 232 // TODO: If C++ ever gets a real enum class, make this part of the 233 // MachineCombinerPattern class. 234 switch (P) { 235 case MachineCombinerPattern::REASSOC_AX_BY: 236 case MachineCombinerPattern::REASSOC_AX_YB: 237 case MachineCombinerPattern::REASSOC_XA_BY: 238 case MachineCombinerPattern::REASSOC_XA_YB: 239 return CombinerObjective::MustReduceDepth; 240 default: 241 return CombinerObjective::Default; 242 } 243 } 244 245 /// The DAGCombine code sequence ends in MI (Machine Instruction) Root. 246 /// The new code sequence ends in MI NewRoot. A necessary condition for the new 247 /// sequence to replace the old sequence is that it cannot lengthen the critical 248 /// path. The definition of "improve" may be restricted by specifying that the 249 /// new path improves the data dependency chain (MustReduceDepth). 250 bool MachineCombiner::improvesCriticalPathLen( 251 MachineBasicBlock *MBB, MachineInstr *Root, 252 MachineTraceMetrics::Trace BlockTrace, 253 SmallVectorImpl<MachineInstr *> &InsInstrs, 254 SmallVectorImpl<MachineInstr *> &DelInstrs, 255 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 256 MachineCombinerPattern Pattern, 257 bool SlackIsAccurate) { 258 assert(TSchedModel.hasInstrSchedModelOrItineraries() && 259 "Missing machine model\n"); 260 // NewRoot is the last instruction in the \p InsInstrs vector. 261 unsigned NewRootIdx = InsInstrs.size() - 1; 262 MachineInstr *NewRoot = InsInstrs[NewRootIdx]; 263 264 // Get depth and latency of NewRoot and Root. 265 unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); 266 unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth; 267 268 DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n"; 269 dbgs() << " NewRootDepth: " << NewRootDepth << "\n"; 270 dbgs() << " RootDepth: " << RootDepth << "\n"); 271 272 // For a transform such as reassociation, the cost equation is 273 // conservatively calculated so that we must improve the depth (data 274 // dependency cycles) in the critical path to proceed with the transform. 275 // Being conservative also protects against inaccuracies in the underlying 276 // machine trace metrics and CPU models. 277 if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) 278 return NewRootDepth < RootDepth; 279 280 // A more flexible cost calculation for the critical path includes the slack 281 // of the original code sequence. This may allow the transform to proceed 282 // even if the instruction depths (data dependency cycles) become worse. 283 284 unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); 285 unsigned RootLatency = 0; 286 287 for (auto I : DelInstrs) 288 RootLatency += TSchedModel.computeInstrLatency(I); 289 290 unsigned RootSlack = BlockTrace.getInstrSlack(*Root); 291 unsigned NewCycleCount = NewRootDepth + NewRootLatency; 292 unsigned OldCycleCount = RootDepth + RootLatency + 293 (SlackIsAccurate ? RootSlack : 0); 294 DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n"; 295 dbgs() << " RootLatency: " << RootLatency << "\n"; 296 dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate=" 297 << SlackIsAccurate << "\n"; 298 dbgs() << " NewRootDepth + NewRootLatency = " 299 << NewCycleCount << "\n"; 300 dbgs() << " RootDepth + RootLatency + RootSlack = " 301 << OldCycleCount << "\n"; 302 ); 303 304 return NewCycleCount <= OldCycleCount; 305 } 306 307 /// helper routine to convert instructions into SC 308 void MachineCombiner::instr2instrSC( 309 SmallVectorImpl<MachineInstr *> &Instrs, 310 SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) { 311 for (auto *InstrPtr : Instrs) { 312 unsigned Opc = InstrPtr->getOpcode(); 313 unsigned Idx = TII->get(Opc).getSchedClass(); 314 const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx); 315 InstrsSC.push_back(SC); 316 } 317 } 318 319 /// True when the new instructions do not increase resource length 320 bool MachineCombiner::preservesResourceLen( 321 MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, 322 SmallVectorImpl<MachineInstr *> &InsInstrs, 323 SmallVectorImpl<MachineInstr *> &DelInstrs) { 324 if (!TSchedModel.hasInstrSchedModel()) 325 return true; 326 327 // Compute current resource length 328 329 //ArrayRef<const MachineBasicBlock *> MBBarr(MBB); 330 SmallVector <const MachineBasicBlock *, 1> MBBarr; 331 MBBarr.push_back(MBB); 332 unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr); 333 334 // Deal with SC rather than Instructions. 335 SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC; 336 SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC; 337 338 instr2instrSC(InsInstrs, InsInstrsSC); 339 instr2instrSC(DelInstrs, DelInstrsSC); 340 341 ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC); 342 ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC); 343 344 // Compute new resource length. 345 unsigned ResLenAfterCombine = 346 BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); 347 348 DEBUG(dbgs() << "RESOURCE DATA: \n"; 349 dbgs() << " resource len before: " << ResLenBeforeCombine 350 << " after: " << ResLenAfterCombine << "\n";); 351 352 return ResLenAfterCombine <= ResLenBeforeCombine; 353 } 354 355 /// \returns true when new instruction sequence should be generated 356 /// independent if it lengthens critical path or not 357 bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { 358 if (OptSize && (NewSize < OldSize)) 359 return true; 360 if (!TSchedModel.hasInstrSchedModelOrItineraries()) 361 return true; 362 return false; 363 } 364 365 /// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction 366 /// depths if requested. 367 /// 368 /// \param MBB basic block to insert instructions in 369 /// \param MI current machine instruction 370 /// \param InsInstrs new instructions to insert in \p MBB 371 /// \param DelInstrs instruction to delete from \p MBB 372 /// \param MinInstr is a pointer to the machine trace information 373 /// \param RegUnits set of live registers, needed to compute instruction depths 374 /// \param IncrementalUpdate if true, compute instruction depths incrementally, 375 /// otherwise invalidate the trace 376 static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, 377 SmallVector<MachineInstr *, 16> InsInstrs, 378 SmallVector<MachineInstr *, 16> DelInstrs, 379 MachineTraceMetrics::Ensemble *MinInstr, 380 SparseSet<LiveRegUnit> &RegUnits, 381 bool IncrementalUpdate) { 382 for (auto *InstrPtr : InsInstrs) 383 MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr); 384 385 for (auto *InstrPtr : DelInstrs) { 386 InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); 387 // Erase all LiveRegs defined by the removed instruction 388 for (auto I = RegUnits.begin(); I != RegUnits.end(); ) { 389 if (I->MI == InstrPtr) 390 I = RegUnits.erase(I); 391 else 392 I++; 393 } 394 } 395 396 if (IncrementalUpdate) 397 for (auto *InstrPtr : InsInstrs) 398 MinInstr->updateDepth(MBB, *InstrPtr, RegUnits); 399 else 400 MinInstr->invalidate(MBB); 401 402 NumInstCombined++; 403 } 404 405 /// Substitute a slow code sequence with a faster one by 406 /// evaluating instruction combining pattern. 407 /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction 408 /// combining based on machine trace metrics. Only combine a sequence of 409 /// instructions when this neither lengthens the critical path nor increases 410 /// resource pressure. When optimizing for codesize always combine when the new 411 /// sequence is shorter. 412 bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { 413 bool Changed = false; 414 DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); 415 416 bool IncrementalUpdate = false; 417 auto BlockIter = MBB->begin(); 418 decltype(BlockIter) LastUpdate; 419 // Check if the block is in a loop. 420 const MachineLoop *ML = MLI->getLoopFor(MBB); 421 if (!MinInstr) 422 MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); 423 424 SparseSet<LiveRegUnit> RegUnits; 425 RegUnits.setUniverse(TRI->getNumRegUnits()); 426 427 while (BlockIter != MBB->end()) { 428 auto &MI = *BlockIter++; 429 430 DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); 431 SmallVector<MachineCombinerPattern, 16> Patterns; 432 // The motivating example is: 433 // 434 // MUL Other MUL_op1 MUL_op2 Other 435 // \ / \ | / 436 // ADD/SUB => MADD/MSUB 437 // (=Root) (=NewRoot) 438 439 // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is 440 // usually beneficial for code size it unfortunately can hurt performance 441 // when the ADD is on the critical path, but the MUL is not. With the 442 // substitution the MUL becomes part of the critical path (in form of the 443 // MADD) and can lengthen it on architectures where the MADD latency is 444 // longer than the ADD latency. 445 // 446 // For each instruction we check if it can be the root of a combiner 447 // pattern. Then for each pattern the new code sequence in form of MI is 448 // generated and evaluated. When the efficiency criteria (don't lengthen 449 // critical path, don't use more resources) is met the new sequence gets 450 // hooked up into the basic block before the old sequence is removed. 451 // 452 // The algorithm does not try to evaluate all patterns and pick the best. 453 // This is only an artificial restriction though. In practice there is 454 // mostly one pattern, and getMachineCombinerPatterns() can order patterns 455 // based on an internal cost heuristic. 456 457 if (!TII->getMachineCombinerPatterns(MI, Patterns)) 458 continue; 459 460 for (auto P : Patterns) { 461 SmallVector<MachineInstr *, 16> InsInstrs; 462 SmallVector<MachineInstr *, 16> DelInstrs; 463 DenseMap<unsigned, unsigned> InstrIdxForVirtReg; 464 TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, 465 InstrIdxForVirtReg); 466 unsigned NewInstCount = InsInstrs.size(); 467 unsigned OldInstCount = DelInstrs.size(); 468 // Found pattern, but did not generate alternative sequence. 469 // This can happen e.g. when an immediate could not be materialized 470 // in a single instruction. 471 if (!NewInstCount) 472 continue; 473 474 bool SubstituteAlways = false; 475 if (ML && TII->isThroughputPattern(P)) 476 SubstituteAlways = true; 477 478 if (IncrementalUpdate) { 479 // Update depths since the last incremental update. 480 MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits); 481 LastUpdate = BlockIter; 482 } 483 484 // Substitute when we optimize for codesize and the new sequence has 485 // fewer instructions OR 486 // the new sequence neither lengthens the critical path nor increases 487 // resource pressure. 488 if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) { 489 insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, 490 RegUnits, IncrementalUpdate); 491 // Eagerly stop after the first pattern fires. 492 Changed = true; 493 break; 494 } else { 495 // For big basic blocks, we only compute the full trace the first time 496 // we hit this. We do not invalidate the trace, but instead update the 497 // instruction depths incrementally. 498 // NOTE: Only the instruction depths up to MI are accurate. All other 499 // trace information is not updated. 500 MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); 501 Traces->verifyAnalysis(); 502 if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs, 503 InstrIdxForVirtReg, P, 504 !IncrementalUpdate) && 505 preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) { 506 if (MBB->size() > inc_threshold) { 507 // Use incremental depth updates for basic blocks above treshold 508 IncrementalUpdate = true; 509 LastUpdate = BlockIter; 510 } 511 512 insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, 513 RegUnits, IncrementalUpdate); 514 515 // Eagerly stop after the first pattern fires. 516 Changed = true; 517 break; 518 } 519 // Cleanup instructions of the alternative code sequence. There is no 520 // use for them. 521 MachineFunction *MF = MBB->getParent(); 522 for (auto *InstrPtr : InsInstrs) 523 MF->DeleteMachineInstr(InstrPtr); 524 } 525 InstrIdxForVirtReg.clear(); 526 } 527 } 528 529 if (Changed && IncrementalUpdate) 530 Traces->invalidate(MBB); 531 return Changed; 532 } 533 534 bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { 535 const TargetSubtargetInfo &STI = MF.getSubtarget(); 536 TII = STI.getInstrInfo(); 537 TRI = STI.getRegisterInfo(); 538 SchedModel = STI.getSchedModel(); 539 TSchedModel.init(SchedModel, &STI, TII); 540 MRI = &MF.getRegInfo(); 541 MLI = &getAnalysis<MachineLoopInfo>(); 542 Traces = &getAnalysis<MachineTraceMetrics>(); 543 MinInstr = nullptr; 544 OptSize = MF.getFunction()->optForSize(); 545 546 DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); 547 if (!TII->useMachineCombiner()) { 548 DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n"); 549 return false; 550 } 551 552 bool Changed = false; 553 554 // Try to combine instructions. 555 for (auto &MBB : MF) 556 Changed |= combineInstructions(&MBB); 557 558 return Changed; 559 } 560