1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The purpose of this pass is to employ a canonical code transformation so
11 // that code compiled with slightly different IR passes can be diffed more
12 // effectively than otherwise. This is done by renaming vregs in a given
13 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
14 // move defs closer to their use inorder to reduce diffs caused by slightly
15 // different schedules.
16 //
17 // Basic Usage:
18 //
19 // llc -o - -run-pass mir-canonicalizer example.mir
20 //
21 // Reorders instructions canonically.
22 // Renames virtual register operands canonically.
23 // Strips certain MIR artifacts (optionally).
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/CodeGen/TargetInstrInfo.h"
34 #include "llvm/Support/raw_ostream.h"
35 
36 #include <queue>
37 
38 using namespace llvm;
39 
40 namespace llvm {
41 extern char &MIRCanonicalizerID;
42 } // namespace llvm
43 
44 #define DEBUG_TYPE "mir-canonicalizer"
45 
46 static cl::opt<unsigned>
47 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
48                            cl::value_desc("N"),
49                            cl::desc("Function number to canonicalize."));
50 
51 static cl::opt<unsigned>
52 CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u),
53                              cl::value_desc("N"),
54                              cl::desc("BasicBlock number to canonicalize."));
55 
56 namespace {
57 
58 class MIRCanonicalizer : public MachineFunctionPass {
59 public:
60   static char ID;
61   MIRCanonicalizer() : MachineFunctionPass(ID) {}
62 
63   StringRef getPassName() const override {
64     return "Rename register operands in a canonical ordering.";
65   }
66 
67   void getAnalysisUsage(AnalysisUsage &AU) const override {
68     AU.setPreservesCFG();
69     MachineFunctionPass::getAnalysisUsage(AU);
70   }
71 
72   bool runOnMachineFunction(MachineFunction &MF) override;
73 };
74 
75 } // end anonymous namespace
76 
77 enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate };
78 class TypedVReg {
79   VRType type;
80   unsigned reg;
81 
82 public:
83   TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {}
84   TypedVReg(VRType type) : type(type), reg(~0U) {
85     assert(type != RSE_Reg && "Expected a non-register type.");
86   }
87 
88   bool isReg()        const { return type == RSE_Reg;          }
89   bool isFrameIndex() const { return type == RSE_FrameIndex;   }
90   bool isCandidate()  const { return type == RSE_NewCandidate; }
91 
92   VRType getType() const { return type; }
93   unsigned getReg() const {
94     assert(this->isReg() && "Expected a virtual or physical register.");
95     return reg;
96   }
97 };
98 
99 char MIRCanonicalizer::ID;
100 
101 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
102 
103 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
104                       "Rename Register Operands Canonically", false, false)
105 
106 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
107                     "Rename Register Operands Canonically", false, false)
108 
109 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
110   ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
111   std::vector<MachineBasicBlock *> RPOList;
112   for (auto MBB : RPOT) {
113     RPOList.push_back(MBB);
114   }
115 
116   return RPOList;
117 }
118 
119 // Set a dummy vreg. We use this vregs register class to generate throw-away
120 // vregs that are used to skip vreg numbers so that vreg numbers line up.
121 static unsigned GetDummyVReg(const MachineFunction &MF) {
122   for (auto &MBB : MF) {
123     for (auto &MI : MBB) {
124       for (auto &MO : MI.operands()) {
125         if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
126           continue;
127         return MO.getReg();
128       }
129     }
130   }
131 
132   return ~0U;
133 }
134 
135 static bool rescheduleCanonically(MachineBasicBlock *MBB) {
136 
137   bool Changed = false;
138 
139   // Calculates the distance of MI from the begining of its parent BB.
140   auto getInstrIdx = [](const MachineInstr &MI) {
141     unsigned i = 0;
142     for (auto &CurMI : *MI.getParent()) {
143       if (&CurMI == &MI)
144         return i;
145       i++;
146     }
147     return ~0U;
148   };
149 
150   // Pre-Populate vector of instructions to reschedule so that we don't
151   // clobber the iterator.
152   std::vector<MachineInstr *> Instructions;
153   for (auto &MI : *MBB) {
154     Instructions.push_back(&MI);
155   }
156 
157   for (auto *II : Instructions) {
158     if (II->getNumOperands() == 0)
159       continue;
160 
161     MachineOperand &MO = II->getOperand(0);
162     if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
163       continue;
164 
165     DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
166 
167     MachineInstr *Def = II;
168     unsigned Distance = ~0U;
169     MachineInstr *UseToBringDefCloserTo = nullptr;
170     MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
171     for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
172       MachineInstr *UseInst = UO.getParent();
173 
174       const unsigned DefLoc = getInstrIdx(*Def);
175       const unsigned UseLoc = getInstrIdx(*UseInst);
176       const unsigned Delta = (UseLoc - DefLoc);
177 
178       if (UseInst->getParent() != Def->getParent())
179         continue;
180       if (DefLoc >= UseLoc)
181         continue;
182 
183       if (Delta < Distance) {
184         Distance = Delta;
185         UseToBringDefCloserTo = UseInst;
186       }
187     }
188 
189     const auto BBE = MBB->instr_end();
190     MachineBasicBlock::iterator DefI = BBE;
191     MachineBasicBlock::iterator UseI = BBE;
192 
193     for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
194 
195       if (DefI != BBE && UseI != BBE)
196         break;
197 
198       if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo))
199         continue;
200 
201       if (&*BBI == Def) {
202         DefI = BBI;
203         continue;
204       }
205 
206       if (&*BBI == UseToBringDefCloserTo) {
207         UseI = BBI;
208         continue;
209       }
210     }
211 
212     if (DefI == BBE || UseI == BBE)
213       continue;
214 
215     DEBUG({
216       dbgs() << "Splicing ";
217       DefI->dump();
218       dbgs() << " right before: ";
219       UseI->dump();
220     });
221 
222     Changed = true;
223     MBB->splice(UseI, MBB, DefI);
224   }
225 
226   return Changed;
227 }
228 
229 /// Here we find our candidates. What makes an interesting candidate?
230 /// An candidate for a canonicalization tree root is normally any kind of
231 /// instruction that causes side effects such as a store to memory or a copy to
232 /// a physical register or a return instruction. We use these as an expression
233 /// tree root that we walk inorder to build a canonical walk which should result
234 /// in canoncal vreg renaming.
235 static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
236   std::vector<MachineInstr *> Candidates;
237   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
238 
239   for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
240     MachineInstr *MI = &*II;
241 
242     bool DoesMISideEffect = false;
243 
244     if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) {
245       const unsigned Dst = MI->getOperand(0).getReg();
246       DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst);
247 
248       for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
249         if (DoesMISideEffect) break;
250         DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());
251       }
252     }
253 
254     if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)
255       continue;
256 
257     DEBUG(dbgs() << "Found Candidate:  "; MI->dump(););
258     Candidates.push_back(MI);
259   }
260 
261   return Candidates;
262 }
263 
264 void doCandidateWalk(std::vector<TypedVReg> &VRegs,
265                      std::queue <TypedVReg> &RegQueue,
266                      std::vector<MachineInstr *> &VisitedMIs,
267                      const MachineBasicBlock *MBB) {
268 
269   const MachineFunction &MF = *MBB->getParent();
270   const MachineRegisterInfo &MRI = MF.getRegInfo();
271 
272   while (!RegQueue.empty()) {
273 
274     auto TReg = RegQueue.front();
275     RegQueue.pop();
276 
277     if (TReg.isFrameIndex()) {
278       DEBUG(dbgs() << "Popping frame index.\n";);
279       VRegs.push_back(TypedVReg(RSE_FrameIndex));
280       continue;
281     }
282 
283     assert(TReg.isReg() && "Expected vreg or physreg.");
284     unsigned Reg = TReg.getReg();
285 
286     if (TargetRegisterInfo::isVirtualRegister(Reg)) {
287       DEBUG({
288         dbgs() << "Popping vreg ";
289         MRI.def_begin(Reg)->dump();
290         dbgs() << "\n";
291       });
292 
293       if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) {
294             return TR.isReg() && TR.getReg() == Reg;
295           })) {
296         VRegs.push_back(TypedVReg(Reg));
297       }
298     } else {
299       DEBUG(dbgs() << "Popping physreg.\n";);
300       VRegs.push_back(TypedVReg(Reg));
301       continue;
302     }
303 
304     for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) {
305       MachineInstr *Def = RI->getParent();
306 
307       if (Def->getParent() != MBB)
308         continue;
309 
310       if (llvm::any_of(VisitedMIs,
311                        [&](const MachineInstr *VMI) { return Def == VMI; })) {
312         break;
313       }
314 
315       DEBUG({
316         dbgs() << "\n========================\n";
317         dbgs() << "Visited MI: ";
318         Def->dump();
319         dbgs() << "BB Name: " << Def->getParent()->getName() << "\n";
320         dbgs() << "\n========================\n";
321       });
322       VisitedMIs.push_back(Def);
323       for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
324 
325         MachineOperand &MO = Def->getOperand(I);
326         if (MO.isFI()) {
327           DEBUG(dbgs() << "Pushing frame index.\n";);
328           RegQueue.push(TypedVReg(RSE_FrameIndex));
329         }
330 
331         if (!MO.isReg())
332           continue;
333         RegQueue.push(TypedVReg(MO.getReg()));
334       }
335     }
336   }
337 }
338 
339 // TODO: Work to remove this in the future. One day when we have named vregs
340 // we should be able to form the canonical name based on some characteristic
341 // we see in that point of the expression tree (like if we were to name based
342 // on some sort of value numbering scheme).
343 static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI,
344                       const TargetRegisterClass *RC) {
345   const unsigned VR_GAP = (++VRegGapIndex * 1000);
346 
347   DEBUG({
348     dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to "
349            << VR_GAP << "\n";
350   });
351 
352   unsigned I = MRI.createVirtualRegister(RC);
353   const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
354   while (I != E) {
355     I = MRI.createVirtualRegister(RC);
356   }
357 }
358 
359 static std::map<unsigned, unsigned>
360 GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
361                  const std::vector<unsigned> &renamedInOtherBB,
362                  MachineRegisterInfo &MRI,
363                  const TargetRegisterClass *RC) {
364   std::map<unsigned, unsigned> VRegRenameMap;
365   unsigned LastRenameReg = MRI.createVirtualRegister(RC);
366   bool FirstCandidate = true;
367 
368   for (auto &vreg : VRegs) {
369     if (vreg.isFrameIndex()) {
370       // We skip one vreg for any frame index because there is a good chance
371       // (especially when comparing SelectionDAG to GlobalISel generated MIR)
372       // that in the other file we are just getting an incoming vreg that comes
373       // from a copy from a frame index. So it's safe to skip by one.
374       LastRenameReg = MRI.createVirtualRegister(RC);
375       DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
376       continue;
377     } else if (vreg.isCandidate()) {
378 
379       // After the first candidate, for every subsequent candidate, we skip mod
380       // 10 registers so that the candidates are more likely to start at the
381       // same vreg number making it more likely that the canonical walk from the
382       // candidate insruction. We don't need to skip from the first candidate of
383       // the BasicBlock because we already skip ahead several vregs for each BB.
384       while (LastRenameReg % 10) {
385         if (!FirstCandidate) break;
386         LastRenameReg = MRI.createVirtualRegister(RC);
387 
388         DEBUG({
389           dbgs() << "Skipping rename for new candidate " << LastRenameReg
390                  << "\n";
391         });
392       }
393       FirstCandidate = false;
394       continue;
395     } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) {
396       LastRenameReg = MRI.createVirtualRegister(RC);
397       DEBUG({
398         dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";
399       });
400       continue;
401     }
402 
403     auto Reg = vreg.getReg();
404     if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) {
405       DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";);
406       continue;
407     }
408 
409     auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg));
410     LastRenameReg = Rename;
411 
412     if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
413       DEBUG(dbgs() << "Mapping vreg ";);
414       if (MRI.reg_begin(Reg) != MRI.reg_end()) {
415         DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
416       } else {
417         DEBUG(dbgs() << Reg;);
418       }
419       DEBUG(dbgs() << " to ";);
420       if (MRI.reg_begin(Rename) != MRI.reg_end()) {
421         DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
422       } else {
423         DEBUG(dbgs() << Rename;);
424       }
425       DEBUG(dbgs() << "\n";);
426 
427       VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
428     }
429   }
430 
431   return VRegRenameMap;
432 }
433 
434 static bool doVRegRenaming(std::vector<unsigned> &RenamedInOtherBB,
435                            const std::map<unsigned, unsigned> &VRegRenameMap,
436                            MachineRegisterInfo &MRI) {
437   bool Changed = false;
438   for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) {
439 
440     auto VReg = I->first;
441     auto Rename = I->second;
442 
443     RenamedInOtherBB.push_back(Rename);
444 
445     std::vector<MachineOperand *> RenameMOs;
446     for (auto &MO : MRI.reg_operands(VReg)) {
447       RenameMOs.push_back(&MO);
448     }
449 
450     for (auto *MO : RenameMOs) {
451       Changed = true;
452       MO->setReg(Rename);
453 
454       if (!MO->isDef())
455         MO->setIsKill(false);
456     }
457   }
458 
459   return Changed;
460 }
461 
462 static bool doDefKillClear(MachineBasicBlock *MBB) {
463   bool Changed = false;
464 
465   for (auto &MI : *MBB) {
466     for (auto &MO : MI.operands()) {
467       if (!MO.isReg())
468         continue;
469       if (!MO.isDef() && MO.isKill()) {
470         Changed = true;
471         MO.setIsKill(false);
472       }
473 
474       if (MO.isDef() && MO.isDead()) {
475         Changed = true;
476         MO.setIsDead(false);
477       }
478     }
479   }
480 
481   return Changed;
482 }
483 
484 static bool runOnBasicBlock(MachineBasicBlock *MBB,
485                             std::vector<StringRef> &bbNames,
486                             std::vector<unsigned> &renamedInOtherBB,
487                             unsigned &basicBlockNum, unsigned &VRegGapIndex) {
488 
489   if (CanonicalizeBasicBlockNumber != ~0U) {
490     if (CanonicalizeBasicBlockNumber != basicBlockNum++)
491       return false;
492     DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";);
493   }
494 
495   if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
496     DEBUG({
497       dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
498              << "\n";
499     });
500     return false;
501   }
502 
503   DEBUG({
504     dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";
505     dbgs() << "\n\n================================================\n\n";
506   });
507 
508   bool Changed = false;
509   MachineFunction &MF = *MBB->getParent();
510   MachineRegisterInfo &MRI = MF.getRegInfo();
511 
512   const unsigned DummyVReg = GetDummyVReg(MF);
513   const TargetRegisterClass *DummyRC =
514     (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg);
515   if (!DummyRC) return false;
516 
517   bbNames.push_back(MBB->getName());
518   DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
519 
520   DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
521   Changed |= rescheduleCanonically(MBB);
522   DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
523 
524   std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
525   std::vector<MachineInstr *> VisitedMIs;
526   std::copy(Candidates.begin(), Candidates.end(),
527             std::back_inserter(VisitedMIs));
528 
529   std::vector<TypedVReg> VRegs;
530   for (auto candidate : Candidates) {
531     VRegs.push_back(TypedVReg(RSE_NewCandidate));
532 
533     std::queue<TypedVReg> RegQueue;
534 
535     // Here we walk the vreg operands of a non-root node along our walk.
536     // The root nodes are the original candidates (stores normally).
537     // These are normally not the root nodes (except for the case of copies to
538     // physical registers).
539     for (unsigned i = 1; i < candidate->getNumOperands(); i++) {
540       if (candidate->mayStore() || candidate->isBranch())
541         break;
542 
543       MachineOperand &MO = candidate->getOperand(i);
544       if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
545         continue;
546 
547       DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
548       RegQueue.push(TypedVReg(MO.getReg()));
549     }
550 
551     // Here we walk the root candidates. We start from the 0th operand because
552     // the root is normally a store to a vreg.
553     for (unsigned i = 0; i < candidate->getNumOperands(); i++) {
554 
555       if (!candidate->mayStore() && !candidate->isBranch())
556         break;
557 
558       MachineOperand &MO = candidate->getOperand(i);
559 
560       // TODO: Do we want to only add vregs here?
561       if (!MO.isReg() && !MO.isFI())
562         continue;
563 
564       DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
565 
566       RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) :
567                                   TypedVReg(RSE_FrameIndex));
568     }
569 
570     doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB);
571   }
572 
573   // If we have populated no vregs to rename then bail.
574   // The rest of this function does the vreg remaping.
575   if (VRegs.size() == 0)
576     return Changed;
577 
578   // Skip some vregs, so we can recon where we'll land next.
579   SkipVRegs(VRegGapIndex, MRI, DummyRC);
580 
581   auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC);
582   Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
583   Changed |= doDefKillClear(MBB);
584 
585   DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";);
586   DEBUG(dbgs() << "\n\n================================================\n\n");
587   return Changed;
588 }
589 
590 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
591 
592   static unsigned functionNum = 0;
593   if (CanonicalizeFunctionNumber != ~0U) {
594     if (CanonicalizeFunctionNumber != functionNum++)
595       return false;
596     DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";);
597   }
598 
599   // we need a valid vreg to create a vreg type for skipping all those
600   // stray vreg numbers so reach alignment/canonical vreg values.
601   std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF);
602 
603   DEBUG(
604     dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n";
605     dbgs() << "\n\n================================================\n\n";
606     dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
607     for (auto MBB : RPOList) {
608       dbgs() << MBB->getName() << "\n";
609     }
610     dbgs() << "\n\n================================================\n\n";
611   );
612 
613   std::vector<StringRef> BBNames;
614   std::vector<unsigned> RenamedInOtherBB;
615 
616   unsigned GapIdx = 0;
617   unsigned BBNum = 0;
618 
619   bool Changed = false;
620 
621   for (auto MBB : RPOList)
622     Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx);
623 
624   return Changed;
625 }
626 
627