1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
15 //
16 // Basic Usage:
17 //
18 // llc -o - -run-pass mir-canonicalizer example.mir
19 //
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 
37 #include <queue>
38 
39 using namespace llvm;
40 
41 namespace llvm {
42 extern char &MIRCanonicalizerID;
43 } // namespace llvm
44 
45 #define DEBUG_TYPE "mir-canonicalizer"
46 
47 static cl::opt<unsigned>
48     CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
49                                cl::value_desc("N"),
50                                cl::desc("Function number to canonicalize."));
51 
52 static cl::opt<unsigned> CanonicalizeBasicBlockNumber(
53     "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"),
54     cl::desc("BasicBlock number to canonicalize."));
55 
56 namespace {
57 
58 class MIRCanonicalizer : public MachineFunctionPass {
59 public:
60   static char ID;
61   MIRCanonicalizer() : MachineFunctionPass(ID) {}
62 
63   StringRef getPassName() const override {
64     return "Rename register operands in a canonical ordering.";
65   }
66 
67   void getAnalysisUsage(AnalysisUsage &AU) const override {
68     AU.setPreservesCFG();
69     MachineFunctionPass::getAnalysisUsage(AU);
70   }
71 
72   bool runOnMachineFunction(MachineFunction &MF) override;
73 };
74 
75 } // end anonymous namespace
76 
77 char MIRCanonicalizer::ID;
78 
79 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
80 
81 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
82                       "Rename Register Operands Canonically", false, false)
83 
84 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
85                     "Rename Register Operands Canonically", false, false)
86 
87 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
88   if (MF.empty())
89     return {};
90   ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
91   std::vector<MachineBasicBlock *> RPOList;
92   for (auto MBB : RPOT) {
93     RPOList.push_back(MBB);
94   }
95 
96   return RPOList;
97 }
98 
99 static bool
100 rescheduleLexographically(std::vector<MachineInstr *> instructions,
101                           MachineBasicBlock *MBB,
102                           std::function<MachineBasicBlock::iterator()> getPos) {
103 
104   bool Changed = false;
105   using StringInstrPair = std::pair<std::string, MachineInstr *>;
106   std::vector<StringInstrPair> StringInstrMap;
107 
108   for (auto *II : instructions) {
109     std::string S;
110     raw_string_ostream OS(S);
111     II->print(OS);
112     OS.flush();
113 
114     // Trim the assignment, or start from the begining in the case of a store.
115     const size_t i = S.find("=");
116     StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
117   }
118 
119   llvm::sort(StringInstrMap,
120              [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
121                return (a.first < b.first);
122              });
123 
124   for (auto &II : StringInstrMap) {
125 
126     LLVM_DEBUG({
127       dbgs() << "Splicing ";
128       II.second->dump();
129       dbgs() << " right before: ";
130       getPos()->dump();
131     });
132 
133     Changed = true;
134     MBB->splice(getPos(), MBB, II.second);
135   }
136 
137   return Changed;
138 }
139 
140 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
141                                   MachineBasicBlock *MBB) {
142 
143   bool Changed = false;
144 
145   // Calculates the distance of MI from the begining of its parent BB.
146   auto getInstrIdx = [](const MachineInstr &MI) {
147     unsigned i = 0;
148     for (auto &CurMI : *MI.getParent()) {
149       if (&CurMI == &MI)
150         return i;
151       i++;
152     }
153     return ~0U;
154   };
155 
156   // Pre-Populate vector of instructions to reschedule so that we don't
157   // clobber the iterator.
158   std::vector<MachineInstr *> Instructions;
159   for (auto &MI : *MBB) {
160     Instructions.push_back(&MI);
161   }
162 
163   std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
164   std::map<unsigned, MachineInstr *> MultiUserLookup;
165   unsigned UseToBringDefCloserToCount = 0;
166   std::vector<MachineInstr *> PseudoIdempotentInstructions;
167   std::vector<unsigned> PhysRegDefs;
168   for (auto *II : Instructions) {
169     for (unsigned i = 1; i < II->getNumOperands(); i++) {
170       MachineOperand &MO = II->getOperand(i);
171       if (!MO.isReg())
172         continue;
173 
174       if (Register::isVirtualRegister(MO.getReg()))
175         continue;
176 
177       if (!MO.isDef())
178         continue;
179 
180       PhysRegDefs.push_back(MO.getReg());
181     }
182   }
183 
184   for (auto *II : Instructions) {
185     if (II->getNumOperands() == 0)
186       continue;
187     if (II->mayLoadOrStore())
188       continue;
189 
190     MachineOperand &MO = II->getOperand(0);
191     if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
192       continue;
193     if (!MO.isDef())
194       continue;
195 
196     bool IsPseudoIdempotent = true;
197     for (unsigned i = 1; i < II->getNumOperands(); i++) {
198 
199       if (II->getOperand(i).isImm()) {
200         continue;
201       }
202 
203       if (II->getOperand(i).isReg()) {
204         if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
205           if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
206               PhysRegDefs.end()) {
207             continue;
208           }
209       }
210 
211       IsPseudoIdempotent = false;
212       break;
213     }
214 
215     if (IsPseudoIdempotent) {
216       PseudoIdempotentInstructions.push_back(II);
217       continue;
218     }
219 
220     LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
221 
222     MachineInstr *Def = II;
223     unsigned Distance = ~0U;
224     MachineInstr *UseToBringDefCloserTo = nullptr;
225     MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
226     for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
227       MachineInstr *UseInst = UO.getParent();
228 
229       const unsigned DefLoc = getInstrIdx(*Def);
230       const unsigned UseLoc = getInstrIdx(*UseInst);
231       const unsigned Delta = (UseLoc - DefLoc);
232 
233       if (UseInst->getParent() != Def->getParent())
234         continue;
235       if (DefLoc >= UseLoc)
236         continue;
237 
238       if (Delta < Distance) {
239         Distance = Delta;
240         UseToBringDefCloserTo = UseInst;
241         MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
242       }
243     }
244 
245     const auto BBE = MBB->instr_end();
246     MachineBasicBlock::iterator DefI = BBE;
247     MachineBasicBlock::iterator UseI = BBE;
248 
249     for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
250 
251       if (DefI != BBE && UseI != BBE)
252         break;
253 
254       if (&*BBI == Def) {
255         DefI = BBI;
256         continue;
257       }
258 
259       if (&*BBI == UseToBringDefCloserTo) {
260         UseI = BBI;
261         continue;
262       }
263     }
264 
265     if (DefI == BBE || UseI == BBE)
266       continue;
267 
268     LLVM_DEBUG({
269       dbgs() << "Splicing ";
270       DefI->dump();
271       dbgs() << " right before: ";
272       UseI->dump();
273     });
274 
275     MultiUsers[UseToBringDefCloserTo].push_back(Def);
276     Changed = true;
277     MBB->splice(UseI, MBB, DefI);
278   }
279 
280   // Sort the defs for users of multiple defs lexographically.
281   for (const auto &E : MultiUserLookup) {
282 
283     auto UseI =
284         std::find_if(MBB->instr_begin(), MBB->instr_end(),
285                      [&](MachineInstr &MI) -> bool { return &MI == E.second; });
286 
287     if (UseI == MBB->instr_end())
288       continue;
289 
290     LLVM_DEBUG(
291         dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
292     Changed |= rescheduleLexographically(
293         MultiUsers[E.second], MBB,
294         [&]() -> MachineBasicBlock::iterator { return UseI; });
295   }
296 
297   PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
298   LLVM_DEBUG(
299       dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
300   Changed |= rescheduleLexographically(
301       PseudoIdempotentInstructions, MBB,
302       [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
303 
304   return Changed;
305 }
306 
307 static bool propagateLocalCopies(MachineBasicBlock *MBB) {
308   bool Changed = false;
309   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
310 
311   std::vector<MachineInstr *> Copies;
312   for (MachineInstr &MI : MBB->instrs()) {
313     if (MI.isCopy())
314       Copies.push_back(&MI);
315   }
316 
317   for (MachineInstr *MI : Copies) {
318 
319     if (!MI->getOperand(0).isReg())
320       continue;
321     if (!MI->getOperand(1).isReg())
322       continue;
323 
324     const Register Dst = MI->getOperand(0).getReg();
325     const Register Src = MI->getOperand(1).getReg();
326 
327     if (!Register::isVirtualRegister(Dst))
328       continue;
329     if (!Register::isVirtualRegister(Src))
330       continue;
331     // Not folding COPY instructions if regbankselect has not set the RCs.
332     // Why are we only considering Register Classes? Because the verifier
333     // sometimes gets upset if the register classes don't match even if the
334     // types do. A future patch might add COPY folding for matching types in
335     // pre-registerbankselect code.
336     if (!MRI.getRegClassOrNull(Dst))
337       continue;
338     if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
339       continue;
340 
341     std::vector<MachineOperand *> Uses;
342     for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
343       Uses.push_back(&*UI);
344     for (auto *MO : Uses)
345       MO->setReg(Src);
346 
347     Changed = true;
348     MI->eraseFromParent();
349   }
350 
351   return Changed;
352 }
353 
354 static bool doDefKillClear(MachineBasicBlock *MBB) {
355   bool Changed = false;
356 
357   for (auto &MI : *MBB) {
358     for (auto &MO : MI.operands()) {
359       if (!MO.isReg())
360         continue;
361       if (!MO.isDef() && MO.isKill()) {
362         Changed = true;
363         MO.setIsKill(false);
364       }
365 
366       if (MO.isDef() && MO.isDead()) {
367         Changed = true;
368         MO.setIsDead(false);
369       }
370     }
371   }
372 
373   return Changed;
374 }
375 
376 static bool runOnBasicBlock(MachineBasicBlock *MBB,
377                             std::vector<StringRef> &bbNames,
378                             unsigned &basicBlockNum, VRegRenamer &Renamer) {
379 
380   if (CanonicalizeBasicBlockNumber != ~0U) {
381     if (CanonicalizeBasicBlockNumber != basicBlockNum++)
382       return false;
383     LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName()
384                       << "\n";);
385   }
386 
387   if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
388     LLVM_DEBUG({
389       dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
390              << "\n";
391     });
392     return false;
393   }
394 
395   LLVM_DEBUG({
396     dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";
397     dbgs() << "\n\n================================================\n\n";
398   });
399 
400   bool Changed = false;
401 
402   bbNames.push_back(MBB->getName());
403   LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
404 
405   LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
406              MBB->dump(););
407   Changed |= propagateLocalCopies(MBB);
408   LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
409 
410   LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
411   unsigned IdempotentInstCount = 0;
412   Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
413   LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
414 
415   Changed |= Renamer.renameVRegs(MBB);
416 
417   Changed |= doDefKillClear(MBB);
418 
419   LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
420              dbgs() << "\n";);
421   LLVM_DEBUG(
422       dbgs() << "\n\n================================================\n\n");
423   return Changed;
424 }
425 
426 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
427 
428   static unsigned functionNum = 0;
429   if (CanonicalizeFunctionNumber != ~0U) {
430     if (CanonicalizeFunctionNumber != functionNum++)
431       return false;
432     LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
433                       << "\n";);
434   }
435 
436   // we need a valid vreg to create a vreg type for skipping all those
437   // stray vreg numbers so reach alignment/canonical vreg values.
438   std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
439 
440   LLVM_DEBUG(
441       dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n";
442       dbgs() << "\n\n================================================\n\n";
443       dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
444       for (auto MBB
445            : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
446       << "\n\n================================================\n\n";);
447 
448   std::vector<StringRef> BBNames;
449 
450   unsigned BBNum = 0;
451 
452   bool Changed = false;
453 
454   MachineRegisterInfo &MRI = MF.getRegInfo();
455   VRegRenamer Renamer(MRI);
456   for (auto MBB : RPOList)
457     Changed |= runOnBasicBlock(MBB, BBNames, BBNum, Renamer);
458 
459   return Changed;
460 }
461