1 //===- MachineSink.cpp - Sinking for machine instructions -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass moves instructions into successor blocks when possible, so that
10 // they aren't executed on paths where their results aren't needed.
11 //
12 // This pass is not intended to be a replacement or a complete alternative
13 // for an LLVM-IR-level sinking pass. It is only designed to sink simple
14 // constructs that are not exposed before lowering and instruction selection.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/DenseSet.h"
20 #include "llvm/ADT/PointerIntPair.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/SparseBitVector.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/AliasAnalysis.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
29 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
30 #include "llvm/CodeGen/MachineDominators.h"
31 #include "llvm/CodeGen/MachineFunction.h"
32 #include "llvm/CodeGen/MachineFunctionPass.h"
33 #include "llvm/CodeGen/MachineInstr.h"
34 #include "llvm/CodeGen/MachineLoopInfo.h"
35 #include "llvm/CodeGen/MachineOperand.h"
36 #include "llvm/CodeGen/MachinePostDominators.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/TargetInstrInfo.h"
39 #include "llvm/CodeGen/TargetRegisterInfo.h"
40 #include "llvm/CodeGen/TargetSubtargetInfo.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/DebugInfoMetadata.h"
43 #include "llvm/IR/LLVMContext.h"
44 #include "llvm/InitializePasses.h"
45 #include "llvm/MC/MCRegisterInfo.h"
46 #include "llvm/Pass.h"
47 #include "llvm/Support/BranchProbability.h"
48 #include "llvm/Support/CommandLine.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/raw_ostream.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstdint>
54 #include <map>
55 #include <utility>
56 #include <vector>
57 
58 using namespace llvm;
59 
60 #define DEBUG_TYPE "machine-sink"
61 
62 static cl::opt<bool>
63 SplitEdges("machine-sink-split",
64            cl::desc("Split critical edges during machine sinking"),
65            cl::init(true), cl::Hidden);
66 
67 static cl::opt<bool>
68 UseBlockFreqInfo("machine-sink-bfi",
69            cl::desc("Use block frequency info to find successors to sink"),
70            cl::init(true), cl::Hidden);
71 
72 static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
73     "machine-sink-split-probability-threshold",
74     cl::desc(
75         "Percentage threshold for splitting single-instruction critical edge. "
76         "If the branch threshold is higher than this threshold, we allow "
77         "speculative execution of up to 1 instruction to avoid branching to "
78         "splitted critical edge"),
79     cl::init(40), cl::Hidden);
80 
81 STATISTIC(NumSunk,      "Number of machine instructions sunk");
82 STATISTIC(NumSplit,     "Number of critical edges split");
83 STATISTIC(NumCoalesces, "Number of copies coalesced");
84 STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
85 
86 namespace {
87 
88   class MachineSinking : public MachineFunctionPass {
89     const TargetInstrInfo *TII;
90     const TargetRegisterInfo *TRI;
91     MachineRegisterInfo  *MRI;     // Machine register information
92     MachineDominatorTree *DT;      // Machine dominator tree
93     MachinePostDominatorTree *PDT; // Machine post dominator tree
94     MachineLoopInfo *LI;
95     const MachineBlockFrequencyInfo *MBFI;
96     const MachineBranchProbabilityInfo *MBPI;
97     AliasAnalysis *AA;
98 
99     // Remember which edges have been considered for breaking.
100     SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
101     CEBCandidates;
102     // Remember which edges we are about to split.
103     // This is different from CEBCandidates since those edges
104     // will be split.
105     SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
106 
107     SparseBitVector<> RegsToClearKillFlags;
108 
109     using AllSuccsCache =
110         std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
111 
112     /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
113     /// post-dominated by another DBG_VALUE of the same variable location.
114     /// This is necessary to detect sequences such as:
115     ///     %0 = someinst
116     ///     DBG_VALUE %0, !123, !DIExpression()
117     ///     %1 = anotherinst
118     ///     DBG_VALUE %1, !123, !DIExpression()
119     /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
120     /// would re-order assignments.
121     using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
122 
123     /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
124     /// debug instructions to sink.
125     SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
126 
127     /// Record of debug variables that have had their locations set in the
128     /// current block.
129     DenseSet<DebugVariable> SeenDbgVars;
130 
131     /// A set of DBG_VALUEs, indexed by their DebugVariable. Refers to the
132     /// original / non-sunk DBG_VALUE, which should be cloned to the final
133     /// sunk location.
134     using SinkingVarSet = DenseMap<DebugVariable, MachineInstr *>;
135 
136     /// Record of variable locations to re-create after the sinking of a
137     /// vreg definition completes.
138     struct SunkDebugDef {
139       SinkingVarSet InstsToSink; /// Set of DBG_VALUEs to recreate.
140       MachineInstr *MI;          /// Location to place DBG_VALUEs.
141     };
142 
143     /// Map sunk vreg to the final destination of the vreg def, and the
144     /// DBG_VALUEs that were attached to it. We need only create one new
145     /// DBG_VALUE even if the defining instruction sinks through multiple
146     /// blocks.
147     DenseMap<unsigned, SunkDebugDef> SunkDebugDefs;
148 
149   public:
150     static char ID; // Pass identification
151 
152     MachineSinking() : MachineFunctionPass(ID) {
153       initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
154     }
155 
156     bool runOnMachineFunction(MachineFunction &MF) override;
157 
158     void getAnalysisUsage(AnalysisUsage &AU) const override {
159       MachineFunctionPass::getAnalysisUsage(AU);
160       AU.addRequired<AAResultsWrapperPass>();
161       AU.addRequired<MachineDominatorTree>();
162       AU.addRequired<MachinePostDominatorTree>();
163       AU.addRequired<MachineLoopInfo>();
164       AU.addRequired<MachineBranchProbabilityInfo>();
165       AU.addPreserved<MachineLoopInfo>();
166       if (UseBlockFreqInfo)
167         AU.addRequired<MachineBlockFrequencyInfo>();
168     }
169 
170     void releaseMemory() override {
171       CEBCandidates.clear();
172     }
173 
174   private:
175     bool ProcessBlock(MachineBasicBlock &MBB);
176     void ProcessDbgInst(MachineInstr &MI);
177     bool isWorthBreakingCriticalEdge(MachineInstr &MI,
178                                      MachineBasicBlock *From,
179                                      MachineBasicBlock *To);
180 
181     /// Postpone the splitting of the given critical
182     /// edge (\p From, \p To).
183     ///
184     /// We do not split the edges on the fly. Indeed, this invalidates
185     /// the dominance information and thus triggers a lot of updates
186     /// of that information underneath.
187     /// Instead, we postpone all the splits after each iteration of
188     /// the main loop. That way, the information is at least valid
189     /// for the lifetime of an iteration.
190     ///
191     /// \return True if the edge is marked as toSplit, false otherwise.
192     /// False can be returned if, for instance, this is not profitable.
193     bool PostponeSplitCriticalEdge(MachineInstr &MI,
194                                    MachineBasicBlock *From,
195                                    MachineBasicBlock *To,
196                                    bool BreakPHIEdge);
197     bool SinkInstruction(MachineInstr &MI, bool &SawStore,
198                          AllSuccsCache &AllSuccessors);
199 
200     /// If we sink a COPY inst, some debug users of it's destination may no
201     /// longer be dominated by the COPY, and will eventually be dropped.
202     /// This is easily rectified by forwarding the non-dominated debug uses
203     /// to the copy source.
204     void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
205                                        MachineBasicBlock *TargetBlock);
206 
207     /// Re-insert any DBG_VALUE instructions that had their operands sunk
208     /// in this function.
209     void reinsertSunkDebugDefs(MachineFunction &MF);
210 
211     bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
212                                  MachineBasicBlock *DefMBB,
213                                  bool &BreakPHIEdge, bool &LocalUse) const;
214     MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
215                bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
216     bool isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
217                               MachineBasicBlock *MBB,
218                               MachineBasicBlock *SuccToSinkTo,
219                               AllSuccsCache &AllSuccessors);
220 
221     bool PerformTrivialForwardCoalescing(MachineInstr &MI,
222                                          MachineBasicBlock *MBB);
223 
224     SmallVector<MachineBasicBlock *, 4> &
225     GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
226                            AllSuccsCache &AllSuccessors) const;
227   };
228 
229 } // end anonymous namespace
230 
231 char MachineSinking::ID = 0;
232 
233 char &llvm::MachineSinkingID = MachineSinking::ID;
234 
235 INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
236                       "Machine code sinking", false, false)
237 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
238 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
239 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
240 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
241 INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
242                     "Machine code sinking", false, false)
243 
244 bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
245                                                      MachineBasicBlock *MBB) {
246   if (!MI.isCopy())
247     return false;
248 
249   Register SrcReg = MI.getOperand(1).getReg();
250   Register DstReg = MI.getOperand(0).getReg();
251   if (!Register::isVirtualRegister(SrcReg) ||
252       !Register::isVirtualRegister(DstReg) || !MRI->hasOneNonDBGUse(SrcReg))
253     return false;
254 
255   const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
256   const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
257   if (SRC != DRC)
258     return false;
259 
260   MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
261   if (DefMI->isCopyLike())
262     return false;
263   LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
264   LLVM_DEBUG(dbgs() << "*** to: " << MI);
265   MRI->replaceRegWith(DstReg, SrcReg);
266   MI.eraseFromParent();
267 
268   // Conservatively, clear any kill flags, since it's possible that they are no
269   // longer correct.
270   MRI->clearKillFlags(SrcReg);
271 
272   ++NumCoalesces;
273   return true;
274 }
275 
276 /// AllUsesDominatedByBlock - Return true if all uses of the specified register
277 /// occur in blocks dominated by the specified block. If any use is in the
278 /// definition block, then return false since it is never legal to move def
279 /// after uses.
280 bool
281 MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
282                                         MachineBasicBlock *MBB,
283                                         MachineBasicBlock *DefMBB,
284                                         bool &BreakPHIEdge,
285                                         bool &LocalUse) const {
286   assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs");
287 
288   // Ignore debug uses because debug info doesn't affect the code.
289   if (MRI->use_nodbg_empty(Reg))
290     return true;
291 
292   // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
293   // into and they are all PHI nodes. In this case, machine-sink must break
294   // the critical edge first. e.g.
295   //
296   // %bb.1: derived from LLVM BB %bb4.preheader
297   //   Predecessors according to CFG: %bb.0
298   //     ...
299   //     %reg16385 = DEC64_32r %reg16437, implicit-def dead %eflags
300   //     ...
301   //     JE_4 <%bb.37>, implicit %eflags
302   //   Successors according to CFG: %bb.37 %bb.2
303   //
304   // %bb.2: derived from LLVM BB %bb.nph
305   //   Predecessors according to CFG: %bb.0 %bb.1
306   //     %reg16386 = PHI %reg16434, %bb.0, %reg16385, %bb.1
307   BreakPHIEdge = true;
308   for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
309     MachineInstr *UseInst = MO.getParent();
310     unsigned OpNo = &MO - &UseInst->getOperand(0);
311     MachineBasicBlock *UseBlock = UseInst->getParent();
312     if (!(UseBlock == MBB && UseInst->isPHI() &&
313           UseInst->getOperand(OpNo+1).getMBB() == DefMBB)) {
314       BreakPHIEdge = false;
315       break;
316     }
317   }
318   if (BreakPHIEdge)
319     return true;
320 
321   for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
322     // Determine the block of the use.
323     MachineInstr *UseInst = MO.getParent();
324     unsigned OpNo = &MO - &UseInst->getOperand(0);
325     MachineBasicBlock *UseBlock = UseInst->getParent();
326     if (UseInst->isPHI()) {
327       // PHI nodes use the operand in the predecessor block, not the block with
328       // the PHI.
329       UseBlock = UseInst->getOperand(OpNo+1).getMBB();
330     } else if (UseBlock == DefMBB) {
331       LocalUse = true;
332       return false;
333     }
334 
335     // Check that it dominates.
336     if (!DT->dominates(MBB, UseBlock))
337       return false;
338   }
339 
340   return true;
341 }
342 
343 void MachineSinking::reinsertSunkDebugDefs(MachineFunction &MF) {
344   // Re-insert any DBG_VALUEs sunk.
345   for (auto &Iterator : SunkDebugDefs) {
346     SunkDebugDef Def = Iterator.second;
347     unsigned VReg = Iterator.first;
348 
349     MachineBasicBlock::iterator DestLoc = Def.MI->getIterator();
350     MachineBasicBlock *MBB = Def.MI->getParent();
351 
352     // Place DBG_VALUEs immediately after the sunk instruction.
353     ++DestLoc;
354 
355     // Collect the DBG_VALUEs being sunk and put them in a deterministic order.
356     using VarInstPair = std::pair<DebugVariable, MachineInstr *>;
357     SmallVector<VarInstPair, 16> InstsToSink;
358     for (auto &SunkLoc : Def.InstsToSink)
359       InstsToSink.push_back(SunkLoc);
360     llvm::sort(InstsToSink);
361 
362     for (auto &SunkLoc : InstsToSink) {
363       MachineInstr *NewDbgMI = MF.CloneMachineInstr(SunkLoc.second);
364       NewDbgMI->getOperand(0).setReg(VReg);
365       MBB->insert(DestLoc, NewDbgMI);
366     }
367   }
368 }
369 
370 bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
371   if (skipFunction(MF.getFunction()))
372     return false;
373 
374   LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
375 
376   TII = MF.getSubtarget().getInstrInfo();
377   TRI = MF.getSubtarget().getRegisterInfo();
378   MRI = &MF.getRegInfo();
379   DT = &getAnalysis<MachineDominatorTree>();
380   PDT = &getAnalysis<MachinePostDominatorTree>();
381   LI = &getAnalysis<MachineLoopInfo>();
382   MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
383   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
384   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
385 
386   bool EverMadeChange = false;
387 
388   while (true) {
389     bool MadeChange = false;
390 
391     // Process all basic blocks.
392     CEBCandidates.clear();
393     ToSplit.clear();
394     for (auto &MBB: MF)
395       MadeChange |= ProcessBlock(MBB);
396 
397     // If we have anything we marked as toSplit, split it now.
398     for (auto &Pair : ToSplit) {
399       auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
400       if (NewSucc != nullptr) {
401         LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
402                           << printMBBReference(*Pair.first) << " -- "
403                           << printMBBReference(*NewSucc) << " -- "
404                           << printMBBReference(*Pair.second) << '\n');
405         MadeChange = true;
406         ++NumSplit;
407       } else
408         LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
409     }
410     // If this iteration over the code changed anything, keep iterating.
411     if (!MadeChange) break;
412     EverMadeChange = true;
413   }
414 
415   // Now clear any kill flags for recorded registers.
416   for (auto I : RegsToClearKillFlags)
417     MRI->clearKillFlags(I);
418   RegsToClearKillFlags.clear();
419 
420   reinsertSunkDebugDefs(MF);
421   SunkDebugDefs.clear();
422 
423   return EverMadeChange;
424 }
425 
426 bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
427   // Can't sink anything out of a block that has less than two successors.
428   if (MBB.succ_size() <= 1 || MBB.empty()) return false;
429 
430   // Don't bother sinking code out of unreachable blocks. In addition to being
431   // unprofitable, it can also lead to infinite looping, because in an
432   // unreachable loop there may be nowhere to stop.
433   if (!DT->isReachableFromEntry(&MBB)) return false;
434 
435   bool MadeChange = false;
436 
437   // Cache all successors, sorted by frequency info and loop depth.
438   AllSuccsCache AllSuccessors;
439 
440   // Walk the basic block bottom-up.  Remember if we saw a store.
441   MachineBasicBlock::iterator I = MBB.end();
442   --I;
443   bool ProcessedBegin, SawStore = false;
444   do {
445     MachineInstr &MI = *I;  // The instruction to sink.
446 
447     // Predecrement I (if it's not begin) so that it isn't invalidated by
448     // sinking.
449     ProcessedBegin = I == MBB.begin();
450     if (!ProcessedBegin)
451       --I;
452 
453     if (MI.isDebugInstr()) {
454       if (MI.isDebugValue())
455         ProcessDbgInst(MI);
456       continue;
457     }
458 
459     bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
460     if (Joined) {
461       MadeChange = true;
462       continue;
463     }
464 
465     if (SinkInstruction(MI, SawStore, AllSuccessors)) {
466       ++NumSunk;
467       MadeChange = true;
468     }
469 
470     // If we just processed the first instruction in the block, we're done.
471   } while (!ProcessedBegin);
472 
473   SeenDbgUsers.clear();
474   SeenDbgVars.clear();
475 
476   return MadeChange;
477 }
478 
479 void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
480   // When we see DBG_VALUEs for registers, record any vreg it reads, so that
481   // we know what to sink if the vreg def sinks.
482   assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
483 
484   DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
485                     MI.getDebugLoc()->getInlinedAt());
486   bool SeenBefore = SeenDbgVars.count(Var) != 0;
487 
488   MachineOperand &MO = MI.getOperand(0);
489   if (MO.isReg() && MO.getReg().isVirtual())
490     SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore));
491 
492   // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
493   SeenDbgVars.insert(Var);
494 }
495 
496 bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
497                                                  MachineBasicBlock *From,
498                                                  MachineBasicBlock *To) {
499   // FIXME: Need much better heuristics.
500 
501   // If the pass has already considered breaking this edge (during this pass
502   // through the function), then let's go ahead and break it. This means
503   // sinking multiple "cheap" instructions into the same block.
504   if (!CEBCandidates.insert(std::make_pair(From, To)).second)
505     return true;
506 
507   if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
508     return true;
509 
510   if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
511       BranchProbability(SplitEdgeProbabilityThreshold, 100))
512     return true;
513 
514   // MI is cheap, we probably don't want to break the critical edge for it.
515   // However, if this would allow some definitions of its source operands
516   // to be sunk then it's probably worth it.
517   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
518     const MachineOperand &MO = MI.getOperand(i);
519     if (!MO.isReg() || !MO.isUse())
520       continue;
521     Register Reg = MO.getReg();
522     if (Reg == 0)
523       continue;
524 
525     // We don't move live definitions of physical registers,
526     // so sinking their uses won't enable any opportunities.
527     if (Register::isPhysicalRegister(Reg))
528       continue;
529 
530     // If this instruction is the only user of a virtual register,
531     // check if breaking the edge will enable sinking
532     // both this instruction and the defining instruction.
533     if (MRI->hasOneNonDBGUse(Reg)) {
534       // If the definition resides in same MBB,
535       // claim it's likely we can sink these together.
536       // If definition resides elsewhere, we aren't
537       // blocking it from being sunk so don't break the edge.
538       MachineInstr *DefMI = MRI->getVRegDef(Reg);
539       if (DefMI->getParent() == MI.getParent())
540         return true;
541     }
542   }
543 
544   return false;
545 }
546 
547 bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
548                                                MachineBasicBlock *FromBB,
549                                                MachineBasicBlock *ToBB,
550                                                bool BreakPHIEdge) {
551   if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
552     return false;
553 
554   // Avoid breaking back edge. From == To means backedge for single BB loop.
555   if (!SplitEdges || FromBB == ToBB)
556     return false;
557 
558   // Check for backedges of more "complex" loops.
559   if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
560       LI->isLoopHeader(ToBB))
561     return false;
562 
563   // It's not always legal to break critical edges and sink the computation
564   // to the edge.
565   //
566   // %bb.1:
567   // v1024
568   // Beq %bb.3
569   // <fallthrough>
570   // %bb.2:
571   // ... no uses of v1024
572   // <fallthrough>
573   // %bb.3:
574   // ...
575   //       = v1024
576   //
577   // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
578   //
579   // %bb.1:
580   // ...
581   // Bne %bb.2
582   // %bb.4:
583   // v1024 =
584   // B %bb.3
585   // %bb.2:
586   // ... no uses of v1024
587   // <fallthrough>
588   // %bb.3:
589   // ...
590   //       = v1024
591   //
592   // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
593   // flow. We need to ensure the new basic block where the computation is
594   // sunk to dominates all the uses.
595   // It's only legal to break critical edge and sink the computation to the
596   // new block if all the predecessors of "To", except for "From", are
597   // not dominated by "From". Given SSA property, this means these
598   // predecessors are dominated by "To".
599   //
600   // There is no need to do this check if all the uses are PHI nodes. PHI
601   // sources are only defined on the specific predecessor edges.
602   if (!BreakPHIEdge) {
603     for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
604            E = ToBB->pred_end(); PI != E; ++PI) {
605       if (*PI == FromBB)
606         continue;
607       if (!DT->dominates(ToBB, *PI))
608         return false;
609     }
610   }
611 
612   ToSplit.insert(std::make_pair(FromBB, ToBB));
613 
614   return true;
615 }
616 
617 /// isProfitableToSinkTo - Return true if it is profitable to sink MI.
618 bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
619                                           MachineBasicBlock *MBB,
620                                           MachineBasicBlock *SuccToSinkTo,
621                                           AllSuccsCache &AllSuccessors) {
622   assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
623 
624   if (MBB == SuccToSinkTo)
625     return false;
626 
627   // It is profitable if SuccToSinkTo does not post dominate current block.
628   if (!PDT->dominates(SuccToSinkTo, MBB))
629     return true;
630 
631   // It is profitable to sink an instruction from a deeper loop to a shallower
632   // loop, even if the latter post-dominates the former (PR21115).
633   if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo))
634     return true;
635 
636   // Check if only use in post dominated block is PHI instruction.
637   bool NonPHIUse = false;
638   for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
639     MachineBasicBlock *UseBlock = UseInst.getParent();
640     if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
641       NonPHIUse = true;
642   }
643   if (!NonPHIUse)
644     return true;
645 
646   // If SuccToSinkTo post dominates then also it may be profitable if MI
647   // can further profitably sinked into another block in next round.
648   bool BreakPHIEdge = false;
649   // FIXME - If finding successor is compile time expensive then cache results.
650   if (MachineBasicBlock *MBB2 =
651           FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
652     return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
653 
654   // If SuccToSinkTo is final destination and it is a post dominator of current
655   // block then it is not profitable to sink MI into SuccToSinkTo block.
656   return false;
657 }
658 
659 /// Get the sorted sequence of successors for this MachineBasicBlock, possibly
660 /// computing it if it was not already cached.
661 SmallVector<MachineBasicBlock *, 4> &
662 MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
663                                        AllSuccsCache &AllSuccessors) const {
664   // Do we have the sorted successors in cache ?
665   auto Succs = AllSuccessors.find(MBB);
666   if (Succs != AllSuccessors.end())
667     return Succs->second;
668 
669   SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->succ_begin(),
670                                                MBB->succ_end());
671 
672   // Handle cases where sinking can happen but where the sink point isn't a
673   // successor. For example:
674   //
675   //   x = computation
676   //   if () {} else {}
677   //   use x
678   //
679   const std::vector<MachineDomTreeNode *> &Children =
680     DT->getNode(MBB)->getChildren();
681   for (const auto &DTChild : Children)
682     // DomTree children of MBB that have MBB as immediate dominator are added.
683     if (DTChild->getIDom()->getBlock() == MI.getParent() &&
684         // Skip MBBs already added to the AllSuccs vector above.
685         !MBB->isSuccessor(DTChild->getBlock()))
686       AllSuccs.push_back(DTChild->getBlock());
687 
688   // Sort Successors according to their loop depth or block frequency info.
689   llvm::stable_sort(
690       AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
691         uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
692         uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
693         bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
694         return HasBlockFreq ? LHSFreq < RHSFreq
695                             : LI->getLoopDepth(L) < LI->getLoopDepth(R);
696       });
697 
698   auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
699 
700   return it.first->second;
701 }
702 
703 /// FindSuccToSinkTo - Find a successor to sink this instruction to.
704 MachineBasicBlock *
705 MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
706                                  bool &BreakPHIEdge,
707                                  AllSuccsCache &AllSuccessors) {
708   assert (MBB && "Invalid MachineBasicBlock!");
709 
710   // Loop over all the operands of the specified instruction.  If there is
711   // anything we can't handle, bail out.
712 
713   // SuccToSinkTo - This is the successor to sink this instruction to, once we
714   // decide.
715   MachineBasicBlock *SuccToSinkTo = nullptr;
716   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
717     const MachineOperand &MO = MI.getOperand(i);
718     if (!MO.isReg()) continue;  // Ignore non-register operands.
719 
720     Register Reg = MO.getReg();
721     if (Reg == 0) continue;
722 
723     if (Register::isPhysicalRegister(Reg)) {
724       if (MO.isUse()) {
725         // If the physreg has no defs anywhere, it's just an ambient register
726         // and we can freely move its uses. Alternatively, if it's allocatable,
727         // it could get allocated to something with a def during allocation.
728         if (!MRI->isConstantPhysReg(Reg))
729           return nullptr;
730       } else if (!MO.isDead()) {
731         // A def that isn't dead. We can't move it.
732         return nullptr;
733       }
734     } else {
735       // Virtual register uses are always safe to sink.
736       if (MO.isUse()) continue;
737 
738       // If it's not safe to move defs of the register class, then abort.
739       if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
740         return nullptr;
741 
742       // Virtual register defs can only be sunk if all their uses are in blocks
743       // dominated by one of the successors.
744       if (SuccToSinkTo) {
745         // If a previous operand picked a block to sink to, then this operand
746         // must be sinkable to the same block.
747         bool LocalUse = false;
748         if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
749                                      BreakPHIEdge, LocalUse))
750           return nullptr;
751 
752         continue;
753       }
754 
755       // Otherwise, we should look at all the successors and decide which one
756       // we should sink to. If we have reliable block frequency information
757       // (frequency != 0) available, give successors with smaller frequencies
758       // higher priority, otherwise prioritize smaller loop depths.
759       for (MachineBasicBlock *SuccBlock :
760            GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
761         bool LocalUse = false;
762         if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
763                                     BreakPHIEdge, LocalUse)) {
764           SuccToSinkTo = SuccBlock;
765           break;
766         }
767         if (LocalUse)
768           // Def is used locally, it's never safe to move this def.
769           return nullptr;
770       }
771 
772       // If we couldn't find a block to sink to, ignore this instruction.
773       if (!SuccToSinkTo)
774         return nullptr;
775       if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
776         return nullptr;
777     }
778   }
779 
780   // It is not possible to sink an instruction into its own block.  This can
781   // happen with loops.
782   if (MBB == SuccToSinkTo)
783     return nullptr;
784 
785   // It's not safe to sink instructions to EH landing pad. Control flow into
786   // landing pad is implicitly defined.
787   if (SuccToSinkTo && SuccToSinkTo->isEHPad())
788     return nullptr;
789 
790   return SuccToSinkTo;
791 }
792 
793 /// Return true if MI is likely to be usable as a memory operation by the
794 /// implicit null check optimization.
795 ///
796 /// This is a "best effort" heuristic, and should not be relied upon for
797 /// correctness.  This returning true does not guarantee that the implicit null
798 /// check optimization is legal over MI, and this returning false does not
799 /// guarantee MI cannot possibly be used to do a null check.
800 static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
801                                              const TargetInstrInfo *TII,
802                                              const TargetRegisterInfo *TRI) {
803   using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
804 
805   auto *MBB = MI.getParent();
806   if (MBB->pred_size() != 1)
807     return false;
808 
809   auto *PredMBB = *MBB->pred_begin();
810   auto *PredBB = PredMBB->getBasicBlock();
811 
812   // Frontends that don't use implicit null checks have no reason to emit
813   // branches with make.implicit metadata, and this function should always
814   // return false for them.
815   if (!PredBB ||
816       !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
817     return false;
818 
819   const MachineOperand *BaseOp;
820   int64_t Offset;
821   if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
822     return false;
823 
824   if (!BaseOp->isReg())
825     return false;
826 
827   if (!(MI.mayLoad() && !MI.isPredicable()))
828     return false;
829 
830   MachineBranchPredicate MBP;
831   if (TII->analyzeBranchPredicate(*PredMBB, MBP, false))
832     return false;
833 
834   return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
835          (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
836           MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
837          MBP.LHS.getReg() == BaseOp->getReg();
838 }
839 
840 /// If the sunk instruction is a copy, try to forward the copy instead of
841 /// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
842 /// there's any subregister weirdness involved. Returns true if copy
843 /// propagation occurred.
844 static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) {
845   const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
846   const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
847 
848   // Copy DBG_VALUE operand and set the original to undef. We then check to
849   // see whether this is something that can be copy-forwarded. If it isn't,
850   // continue around the loop.
851   MachineOperand DbgMO = DbgMI.getOperand(0);
852 
853   const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
854   auto CopyOperands = TII.isCopyInstr(SinkInst);
855   if (!CopyOperands)
856     return false;
857   SrcMO = CopyOperands->Source;
858   DstMO = CopyOperands->Destination;
859 
860   // Check validity of forwarding this copy.
861   bool PostRA = MRI.getNumVirtRegs() == 0;
862 
863   // Trying to forward between physical and virtual registers is too hard.
864   if (DbgMO.getReg().isVirtual() != SrcMO->getReg().isVirtual())
865     return false;
866 
867   // Only try virtual register copy-forwarding before regalloc, and physical
868   // register copy-forwarding after regalloc.
869   bool arePhysRegs = !DbgMO.getReg().isVirtual();
870   if (arePhysRegs != PostRA)
871     return false;
872 
873   // Pre-regalloc, only forward if all subregisters agree (or there are no
874   // subregs at all). More analysis might recover some forwardable copies.
875   if (!PostRA && (DbgMO.getSubReg() != SrcMO->getSubReg() ||
876                   DbgMO.getSubReg() != DstMO->getSubReg()))
877     return false;
878 
879   // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
880   // of this copy. Only forward the copy if the DBG_VALUE operand exactly
881   // matches the copy destination.
882   if (PostRA && DbgMO.getReg() != DstMO->getReg())
883     return false;
884 
885   DbgMI.getOperand(0).setReg(SrcMO->getReg());
886   DbgMI.getOperand(0).setSubReg(SrcMO->getSubReg());
887   return true;
888 }
889 
890 /// Sink an instruction and its associated debug instructions.
891 static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
892                         MachineBasicBlock::iterator InsertPos,
893                         SmallVectorImpl<MachineInstr *> &DbgValuesToSink) {
894 
895   // If we cannot find a location to use (merge with), then we erase the debug
896   // location to prevent debug-info driven tools from potentially reporting
897   // wrong location information.
898   if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end())
899     MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(),
900                                                  InsertPos->getDebugLoc()));
901   else
902     MI.setDebugLoc(DebugLoc());
903 
904   // Move the instruction.
905   MachineBasicBlock *ParentBlock = MI.getParent();
906   SuccToSinkTo.splice(InsertPos, ParentBlock, MI,
907                       ++MachineBasicBlock::iterator(MI));
908 
909   // Sink a copy of debug users to the insert position. Mark the original
910   // DBG_VALUE location as 'undef', indicating that any earlier variable
911   // location should be terminated as we've optimised away the value at this
912   // point.
913   for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
914                                                  DBE = DbgValuesToSink.end();
915        DBI != DBE; ++DBI) {
916     MachineInstr *DbgMI = *DBI;
917     MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(*DBI);
918     SuccToSinkTo.insert(InsertPos, NewDbgMI);
919 
920     if (!attemptDebugCopyProp(MI, *DbgMI))
921       DbgMI->getOperand(0).setReg(0);
922   }
923 }
924 
925 /// SinkInstruction - Determine whether it is safe to sink the specified machine
926 /// instruction out of its current block into a successor.
927 bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
928                                      AllSuccsCache &AllSuccessors) {
929   // Don't sink instructions that the target prefers not to sink.
930   if (!TII->shouldSink(MI))
931     return false;
932 
933   // Check if it's safe to move the instruction.
934   if (!MI.isSafeToMove(AA, SawStore))
935     return false;
936 
937   // Convergent operations may not be made control-dependent on additional
938   // values.
939   if (MI.isConvergent())
940     return false;
941 
942   // Don't break implicit null checks.  This is a performance heuristic, and not
943   // required for correctness.
944   if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
945     return false;
946 
947   // FIXME: This should include support for sinking instructions within the
948   // block they are currently in to shorten the live ranges.  We often get
949   // instructions sunk into the top of a large block, but it would be better to
950   // also sink them down before their first use in the block.  This xform has to
951   // be careful not to *increase* register pressure though, e.g. sinking
952   // "x = y + z" down if it kills y and z would increase the live ranges of y
953   // and z and only shrink the live range of x.
954 
955   bool BreakPHIEdge = false;
956   MachineBasicBlock *ParentBlock = MI.getParent();
957   MachineBasicBlock *SuccToSinkTo =
958       FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors);
959 
960   // If there are no outputs, it must have side-effects.
961   if (!SuccToSinkTo)
962     return false;
963 
964   // If the instruction to move defines a dead physical register which is live
965   // when leaving the basic block, don't move it because it could turn into a
966   // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
967   for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
968     const MachineOperand &MO = MI.getOperand(I);
969     if (!MO.isReg()) continue;
970     Register Reg = MO.getReg();
971     if (Reg == 0 || !Register::isPhysicalRegister(Reg))
972       continue;
973     if (SuccToSinkTo->isLiveIn(Reg))
974       return false;
975   }
976 
977   LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
978 
979   // If the block has multiple predecessors, this is a critical edge.
980   // Decide if we can sink along it or need to break the edge.
981   if (SuccToSinkTo->pred_size() > 1) {
982     // We cannot sink a load across a critical edge - there may be stores in
983     // other code paths.
984     bool TryBreak = false;
985     bool store = true;
986     if (!MI.isSafeToMove(AA, store)) {
987       LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
988       TryBreak = true;
989     }
990 
991     // We don't want to sink across a critical edge if we don't dominate the
992     // successor. We could be introducing calculations to new code paths.
993     if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
994       LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
995       TryBreak = true;
996     }
997 
998     // Don't sink instructions into a loop.
999     if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
1000       LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n");
1001       TryBreak = true;
1002     }
1003 
1004     // Otherwise we are OK with sinking along a critical edge.
1005     if (!TryBreak)
1006       LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
1007     else {
1008       // Mark this edge as to be split.
1009       // If the edge can actually be split, the next iteration of the main loop
1010       // will sink MI in the newly created block.
1011       bool Status =
1012         PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
1013       if (!Status)
1014         LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1015                              "break critical edge\n");
1016       // The instruction will not be sunk this time.
1017       return false;
1018     }
1019   }
1020 
1021   if (BreakPHIEdge) {
1022     // BreakPHIEdge is true if all the uses are in the successor MBB being
1023     // sunken into and they are all PHI nodes. In this case, machine-sink must
1024     // break the critical edge first.
1025     bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
1026                                             SuccToSinkTo, BreakPHIEdge);
1027     if (!Status)
1028       LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1029                            "break critical edge\n");
1030     // The instruction will not be sunk this time.
1031     return false;
1032   }
1033 
1034   // Determine where to insert into. Skip phi nodes.
1035   MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
1036   while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
1037     ++InsertPos;
1038 
1039   // Collect debug users of any vreg that this inst defines.
1040   for (auto &MO : MI.operands()) {
1041     if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
1042       continue;
1043 
1044     // If no DBG_VALUE uses this reg, skip further analysis.
1045     if (!SeenDbgUsers.count(MO.getReg()))
1046       continue;
1047 
1048     SunkDebugDef &SunkDef = SunkDebugDefs[MO.getReg()];
1049     SunkDef.MI = &MI;
1050 
1051     // Record that these DBG_VALUEs refer to this sinking vreg, so that they
1052     // can be re-specified after sinking completes. Try copy-propagating
1053     // the original location too.
1054     auto &Users = SeenDbgUsers[MO.getReg()];
1055     for (auto &User : Users) {
1056       MachineInstr *DbgMI = User.getPointer();
1057       DebugVariable Var(DbgMI->getDebugVariable(), DbgMI->getDebugExpression(),
1058                         DbgMI->getDebugLoc()->getInlinedAt());
1059       // If we can't copy-propagate the original DBG_VALUE, mark it undef, as
1060       // its operand will not be available.
1061       if (!attemptDebugCopyProp(MI, *DbgMI))
1062         DbgMI->getOperand(0).setReg(0);
1063 
1064       // Debug users that don't pass any other DBG_VALUEs for this variable
1065       // can be sunk.
1066       if (!User.getInt())
1067         SunkDef.InstsToSink.insert({Var, DbgMI});
1068     }
1069   }
1070 
1071   // After sinking, some debug users may not be dominated any more. If possible,
1072   // copy-propagate their operands. As it's expensive, don't do this if there's
1073   // no debuginfo in the program.
1074   if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
1075     SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo);
1076 
1077   SmallVector<MachineInstr *, 4> DbgUsersToSink; // Deliberately empty
1078   performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink);
1079 
1080   // Conservatively, clear any kill flags, since it's possible that they are no
1081   // longer correct.
1082   // Note that we have to clear the kill flags for any register this instruction
1083   // uses as we may sink over another instruction which currently kills the
1084   // used registers.
1085   for (MachineOperand &MO : MI.operands()) {
1086     if (MO.isReg() && MO.isUse())
1087       RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags.
1088   }
1089 
1090   return true;
1091 }
1092 
1093 void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
1094     MachineInstr &MI, MachineBasicBlock *TargetBlock) {
1095   assert(MI.isCopy());
1096   assert(MI.getOperand(1).isReg());
1097 
1098   // Enumerate all users of vreg operands that are def'd. Skip those that will
1099   // be sunk. For the rest, if they are not dominated by the block we will sink
1100   // MI into, propagate the copy source to them.
1101   SmallVector<MachineInstr *, 4> DbgDefUsers;
1102   const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1103   for (auto &MO : MI.operands()) {
1104     if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
1105       continue;
1106     for (auto &User : MRI.use_instructions(MO.getReg())) {
1107       if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent()))
1108         continue;
1109 
1110       // If is in same block, will either sink or be use-before-def.
1111       if (User.getParent() == MI.getParent())
1112         continue;
1113 
1114       assert(User.getOperand(0).isReg() &&
1115              "DBG_VALUE user of vreg, but non reg operand?");
1116       DbgDefUsers.push_back(&User);
1117     }
1118   }
1119 
1120   // Point the users of this copy that are no longer dominated, at the source
1121   // of the copy.
1122   for (auto *User : DbgDefUsers) {
1123     User->getOperand(0).setReg(MI.getOperand(1).getReg());
1124     User->getOperand(0).setSubReg(MI.getOperand(1).getSubReg());
1125   }
1126 }
1127 
1128 //===----------------------------------------------------------------------===//
1129 // This pass is not intended to be a replacement or a complete alternative
1130 // for the pre-ra machine sink pass. It is only designed to sink COPY
1131 // instructions which should be handled after RA.
1132 //
1133 // This pass sinks COPY instructions into a successor block, if the COPY is not
1134 // used in the current block and the COPY is live-in to a single successor
1135 // (i.e., doesn't require the COPY to be duplicated).  This avoids executing the
1136 // copy on paths where their results aren't needed.  This also exposes
1137 // additional opportunites for dead copy elimination and shrink wrapping.
1138 //
1139 // These copies were either not handled by or are inserted after the MachineSink
1140 // pass. As an example of the former case, the MachineSink pass cannot sink
1141 // COPY instructions with allocatable source registers; for AArch64 these type
1142 // of copy instructions are frequently used to move function parameters (PhyReg)
1143 // into virtual registers in the entry block.
1144 //
1145 // For the machine IR below, this pass will sink %w19 in the entry into its
1146 // successor (%bb.1) because %w19 is only live-in in %bb.1.
1147 // %bb.0:
1148 //   %wzr = SUBSWri %w1, 1
1149 //   %w19 = COPY %w0
1150 //   Bcc 11, %bb.2
1151 // %bb.1:
1152 //   Live Ins: %w19
1153 //   BL @fun
1154 //   %w0 = ADDWrr %w0, %w19
1155 //   RET %w0
1156 // %bb.2:
1157 //   %w0 = COPY %wzr
1158 //   RET %w0
1159 // As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
1160 // able to see %bb.0 as a candidate.
1161 //===----------------------------------------------------------------------===//
1162 namespace {
1163 
1164 class PostRAMachineSinking : public MachineFunctionPass {
1165 public:
1166   bool runOnMachineFunction(MachineFunction &MF) override;
1167 
1168   static char ID;
1169   PostRAMachineSinking() : MachineFunctionPass(ID) {}
1170   StringRef getPassName() const override { return "PostRA Machine Sink"; }
1171 
1172   void getAnalysisUsage(AnalysisUsage &AU) const override {
1173     AU.setPreservesCFG();
1174     MachineFunctionPass::getAnalysisUsage(AU);
1175   }
1176 
1177   MachineFunctionProperties getRequiredProperties() const override {
1178     return MachineFunctionProperties().set(
1179         MachineFunctionProperties::Property::NoVRegs);
1180   }
1181 
1182 private:
1183   /// Track which register units have been modified and used.
1184   LiveRegUnits ModifiedRegUnits, UsedRegUnits;
1185 
1186   /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
1187   /// entry in this map for each unit it touches.
1188   DenseMap<unsigned, TinyPtrVector<MachineInstr *>> SeenDbgInstrs;
1189 
1190   /// Sink Copy instructions unused in the same block close to their uses in
1191   /// successors.
1192   bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
1193                      const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
1194 };
1195 } // namespace
1196 
1197 char PostRAMachineSinking::ID = 0;
1198 char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
1199 
1200 INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
1201                 "PostRA Machine Sink", false, false)
1202 
1203 static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg,
1204                                   const TargetRegisterInfo *TRI) {
1205   LiveRegUnits LiveInRegUnits(*TRI);
1206   LiveInRegUnits.addLiveIns(MBB);
1207   return !LiveInRegUnits.available(Reg);
1208 }
1209 
1210 static MachineBasicBlock *
1211 getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
1212                       const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
1213                       unsigned Reg, const TargetRegisterInfo *TRI) {
1214   // Try to find a single sinkable successor in which Reg is live-in.
1215   MachineBasicBlock *BB = nullptr;
1216   for (auto *SI : SinkableBBs) {
1217     if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) {
1218       // If BB is set here, Reg is live-in to at least two sinkable successors,
1219       // so quit.
1220       if (BB)
1221         return nullptr;
1222       BB = SI;
1223     }
1224   }
1225   // Reg is not live-in to any sinkable successors.
1226   if (!BB)
1227     return nullptr;
1228 
1229   // Check if any register aliased with Reg is live-in in other successors.
1230   for (auto *SI : CurBB.successors()) {
1231     if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI))
1232       return nullptr;
1233   }
1234   return BB;
1235 }
1236 
1237 static MachineBasicBlock *
1238 getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
1239                       const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
1240                       ArrayRef<unsigned> DefedRegsInCopy,
1241                       const TargetRegisterInfo *TRI) {
1242   MachineBasicBlock *SingleBB = nullptr;
1243   for (auto DefReg : DefedRegsInCopy) {
1244     MachineBasicBlock *BB =
1245         getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI);
1246     if (!BB || (SingleBB && SingleBB != BB))
1247       return nullptr;
1248     SingleBB = BB;
1249   }
1250   return SingleBB;
1251 }
1252 
1253 static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
1254                            SmallVectorImpl<unsigned> &UsedOpsInCopy,
1255                            LiveRegUnits &UsedRegUnits,
1256                            const TargetRegisterInfo *TRI) {
1257   for (auto U : UsedOpsInCopy) {
1258     MachineOperand &MO = MI->getOperand(U);
1259     Register SrcReg = MO.getReg();
1260     if (!UsedRegUnits.available(SrcReg)) {
1261       MachineBasicBlock::iterator NI = std::next(MI->getIterator());
1262       for (MachineInstr &UI : make_range(NI, CurBB.end())) {
1263         if (UI.killsRegister(SrcReg, TRI)) {
1264           UI.clearRegisterKills(SrcReg, TRI);
1265           MO.setIsKill(true);
1266           break;
1267         }
1268       }
1269     }
1270   }
1271 }
1272 
1273 static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
1274                          SmallVectorImpl<unsigned> &UsedOpsInCopy,
1275                          SmallVectorImpl<unsigned> &DefedRegsInCopy) {
1276   MachineFunction &MF = *SuccBB->getParent();
1277   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1278   for (unsigned DefReg : DefedRegsInCopy)
1279     for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
1280       SuccBB->removeLiveIn(*S);
1281   for (auto U : UsedOpsInCopy) {
1282     Register Reg = MI->getOperand(U).getReg();
1283     if (!SuccBB->isLiveIn(Reg))
1284       SuccBB->addLiveIn(Reg);
1285   }
1286 }
1287 
1288 static bool hasRegisterDependency(MachineInstr *MI,
1289                                   SmallVectorImpl<unsigned> &UsedOpsInCopy,
1290                                   SmallVectorImpl<unsigned> &DefedRegsInCopy,
1291                                   LiveRegUnits &ModifiedRegUnits,
1292                                   LiveRegUnits &UsedRegUnits) {
1293   bool HasRegDependency = false;
1294   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1295     MachineOperand &MO = MI->getOperand(i);
1296     if (!MO.isReg())
1297       continue;
1298     Register Reg = MO.getReg();
1299     if (!Reg)
1300       continue;
1301     if (MO.isDef()) {
1302       if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
1303         HasRegDependency = true;
1304         break;
1305       }
1306       DefedRegsInCopy.push_back(Reg);
1307 
1308       // FIXME: instead of isUse(), readsReg() would be a better fix here,
1309       // For example, we can ignore modifications in reg with undef. However,
1310       // it's not perfectly clear if skipping the internal read is safe in all
1311       // other targets.
1312     } else if (MO.isUse()) {
1313       if (!ModifiedRegUnits.available(Reg)) {
1314         HasRegDependency = true;
1315         break;
1316       }
1317       UsedOpsInCopy.push_back(i);
1318     }
1319   }
1320   return HasRegDependency;
1321 }
1322 
1323 static SmallSet<unsigned, 4> getRegUnits(unsigned Reg,
1324                                          const TargetRegisterInfo *TRI) {
1325   SmallSet<unsigned, 4> RegUnits;
1326   for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI)
1327     RegUnits.insert(*RI);
1328   return RegUnits;
1329 }
1330 
1331 bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
1332                                          MachineFunction &MF,
1333                                          const TargetRegisterInfo *TRI,
1334                                          const TargetInstrInfo *TII) {
1335   SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs;
1336   // FIXME: For now, we sink only to a successor which has a single predecessor
1337   // so that we can directly sink COPY instructions to the successor without
1338   // adding any new block or branch instruction.
1339   for (MachineBasicBlock *SI : CurBB.successors())
1340     if (!SI->livein_empty() && SI->pred_size() == 1)
1341       SinkableBBs.insert(SI);
1342 
1343   if (SinkableBBs.empty())
1344     return false;
1345 
1346   bool Changed = false;
1347 
1348   // Track which registers have been modified and used between the end of the
1349   // block and the current instruction.
1350   ModifiedRegUnits.clear();
1351   UsedRegUnits.clear();
1352   SeenDbgInstrs.clear();
1353 
1354   for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) {
1355     MachineInstr *MI = &*I;
1356     ++I;
1357 
1358     // Track the operand index for use in Copy.
1359     SmallVector<unsigned, 2> UsedOpsInCopy;
1360     // Track the register number defed in Copy.
1361     SmallVector<unsigned, 2> DefedRegsInCopy;
1362 
1363     // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
1364     // for DBG_VALUEs later, record them when they're encountered.
1365     if (MI->isDebugValue()) {
1366       auto &MO = MI->getOperand(0);
1367       if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) {
1368         // Bail if we can already tell the sink would be rejected, rather
1369         // than needlessly accumulating lots of DBG_VALUEs.
1370         if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
1371                                   ModifiedRegUnits, UsedRegUnits))
1372           continue;
1373 
1374         // Record debug use of each reg unit.
1375         SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI);
1376         for (unsigned Reg : Units)
1377           SeenDbgInstrs[Reg].push_back(MI);
1378       }
1379       continue;
1380     }
1381 
1382     if (MI->isDebugInstr())
1383       continue;
1384 
1385     // Do not move any instruction across function call.
1386     if (MI->isCall())
1387       return false;
1388 
1389     if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) {
1390       LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
1391                                         TRI);
1392       continue;
1393     }
1394 
1395     // Don't sink the COPY if it would violate a register dependency.
1396     if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
1397                               ModifiedRegUnits, UsedRegUnits)) {
1398       LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
1399                                         TRI);
1400       continue;
1401     }
1402     assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
1403            "Unexpect SrcReg or DefReg");
1404     MachineBasicBlock *SuccBB =
1405         getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
1406     // Don't sink if we cannot find a single sinkable successor in which Reg
1407     // is live-in.
1408     if (!SuccBB) {
1409       LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
1410                                         TRI);
1411       continue;
1412     }
1413     assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
1414            "Unexpected predecessor");
1415 
1416     // Collect DBG_VALUEs that must sink with this copy. We've previously
1417     // recorded which reg units that DBG_VALUEs read, if this instruction
1418     // writes any of those units then the corresponding DBG_VALUEs must sink.
1419     SetVector<MachineInstr *> DbgValsToSinkSet;
1420     SmallVector<MachineInstr *, 4> DbgValsToSink;
1421     for (auto &MO : MI->operands()) {
1422       if (!MO.isReg() || !MO.isDef())
1423         continue;
1424 
1425       SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI);
1426       for (unsigned Reg : Units)
1427         for (auto *MI : SeenDbgInstrs.lookup(Reg))
1428           DbgValsToSinkSet.insert(MI);
1429     }
1430     DbgValsToSink.insert(DbgValsToSink.begin(), DbgValsToSinkSet.begin(),
1431                          DbgValsToSinkSet.end());
1432 
1433     // Clear the kill flag if SrcReg is killed between MI and the end of the
1434     // block.
1435     clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
1436     MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
1437     performSink(*MI, *SuccBB, InsertPos, DbgValsToSink);
1438     updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
1439 
1440     Changed = true;
1441     ++NumPostRACopySink;
1442   }
1443   return Changed;
1444 }
1445 
1446 bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
1447   if (skipFunction(MF.getFunction()))
1448     return false;
1449 
1450   bool Changed = false;
1451   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1452   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1453 
1454   ModifiedRegUnits.init(*TRI);
1455   UsedRegUnits.init(*TRI);
1456   for (auto &BB : MF)
1457     Changed |= tryToSinkCopy(BB, MF, TRI, TII);
1458 
1459   return Changed;
1460 }
1461