1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
15 //
16 // Basic Usage:
17 //
18 // llc -o - -run-pass mir-canonicalizer example.mir
19 //
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/Support/raw_ostream.h"
34 
35 #include <queue>
36 
37 using namespace llvm;
38 
39 namespace llvm {
40 extern char &MIRCanonicalizerID;
41 } // namespace llvm
42 
43 #define DEBUG_TYPE "mir-canonicalizer"
44 
45 static cl::opt<unsigned>
46     CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
47                                cl::value_desc("N"),
48                                cl::desc("Function number to canonicalize."));
49 
50 static cl::opt<unsigned> CanonicalizeBasicBlockNumber(
51     "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"),
52     cl::desc("BasicBlock number to canonicalize."));
53 
54 namespace {
55 
56 class MIRCanonicalizer : public MachineFunctionPass {
57 public:
58   static char ID;
59   MIRCanonicalizer() : MachineFunctionPass(ID) {}
60 
61   StringRef getPassName() const override {
62     return "Rename register operands in a canonical ordering.";
63   }
64 
65   void getAnalysisUsage(AnalysisUsage &AU) const override {
66     AU.setPreservesCFG();
67     MachineFunctionPass::getAnalysisUsage(AU);
68   }
69 
70   bool runOnMachineFunction(MachineFunction &MF) override;
71 };
72 
73 } // end anonymous namespace
74 
75 char MIRCanonicalizer::ID;
76 
77 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
78 
79 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
80                       "Rename Register Operands Canonically", false, false)
81 
82 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
83                     "Rename Register Operands Canonically", false, false)
84 
85 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
86   if (MF.empty())
87     return {};
88   ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
89   std::vector<MachineBasicBlock *> RPOList;
90   for (auto MBB : RPOT) {
91     RPOList.push_back(MBB);
92   }
93 
94   return RPOList;
95 }
96 
97 static bool
98 rescheduleLexographically(std::vector<MachineInstr *> instructions,
99                           MachineBasicBlock *MBB,
100                           std::function<MachineBasicBlock::iterator()> getPos) {
101 
102   bool Changed = false;
103   using StringInstrPair = std::pair<std::string, MachineInstr *>;
104   std::vector<StringInstrPair> StringInstrMap;
105 
106   for (auto *II : instructions) {
107     std::string S;
108     raw_string_ostream OS(S);
109     II->print(OS);
110     OS.flush();
111 
112     // Trim the assignment, or start from the begining in the case of a store.
113     const size_t i = S.find("=");
114     StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
115   }
116 
117   llvm::sort(StringInstrMap,
118              [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
119                return (a.first < b.first);
120              });
121 
122   for (auto &II : StringInstrMap) {
123 
124     LLVM_DEBUG({
125       dbgs() << "Splicing ";
126       II.second->dump();
127       dbgs() << " right before: ";
128       getPos()->dump();
129     });
130 
131     Changed = true;
132     MBB->splice(getPos(), MBB, II.second);
133   }
134 
135   return Changed;
136 }
137 
138 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
139                                   MachineBasicBlock *MBB) {
140 
141   bool Changed = false;
142 
143   // Calculates the distance of MI from the begining of its parent BB.
144   auto getInstrIdx = [](const MachineInstr &MI) {
145     unsigned i = 0;
146     for (auto &CurMI : *MI.getParent()) {
147       if (&CurMI == &MI)
148         return i;
149       i++;
150     }
151     return ~0U;
152   };
153 
154   // Pre-Populate vector of instructions to reschedule so that we don't
155   // clobber the iterator.
156   std::vector<MachineInstr *> Instructions;
157   for (auto &MI : *MBB) {
158     Instructions.push_back(&MI);
159   }
160 
161   std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
162   std::map<unsigned, MachineInstr *> MultiUserLookup;
163   unsigned UseToBringDefCloserToCount = 0;
164   std::vector<MachineInstr *> PseudoIdempotentInstructions;
165   std::vector<unsigned> PhysRegDefs;
166   for (auto *II : Instructions) {
167     for (unsigned i = 1; i < II->getNumOperands(); i++) {
168       MachineOperand &MO = II->getOperand(i);
169       if (!MO.isReg())
170         continue;
171 
172       if (Register::isVirtualRegister(MO.getReg()))
173         continue;
174 
175       if (!MO.isDef())
176         continue;
177 
178       PhysRegDefs.push_back(MO.getReg());
179     }
180   }
181 
182   for (auto *II : Instructions) {
183     if (II->getNumOperands() == 0)
184       continue;
185     if (II->mayLoadOrStore())
186       continue;
187 
188     MachineOperand &MO = II->getOperand(0);
189     if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
190       continue;
191     if (!MO.isDef())
192       continue;
193 
194     bool IsPseudoIdempotent = true;
195     for (unsigned i = 1; i < II->getNumOperands(); i++) {
196 
197       if (II->getOperand(i).isImm()) {
198         continue;
199       }
200 
201       if (II->getOperand(i).isReg()) {
202         if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
203           if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
204               PhysRegDefs.end()) {
205             continue;
206           }
207       }
208 
209       IsPseudoIdempotent = false;
210       break;
211     }
212 
213     if (IsPseudoIdempotent) {
214       PseudoIdempotentInstructions.push_back(II);
215       continue;
216     }
217 
218     LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
219 
220     MachineInstr *Def = II;
221     unsigned Distance = ~0U;
222     MachineInstr *UseToBringDefCloserTo = nullptr;
223     MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
224     for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
225       MachineInstr *UseInst = UO.getParent();
226 
227       const unsigned DefLoc = getInstrIdx(*Def);
228       const unsigned UseLoc = getInstrIdx(*UseInst);
229       const unsigned Delta = (UseLoc - DefLoc);
230 
231       if (UseInst->getParent() != Def->getParent())
232         continue;
233       if (DefLoc >= UseLoc)
234         continue;
235 
236       if (Delta < Distance) {
237         Distance = Delta;
238         UseToBringDefCloserTo = UseInst;
239         MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
240       }
241     }
242 
243     const auto BBE = MBB->instr_end();
244     MachineBasicBlock::iterator DefI = BBE;
245     MachineBasicBlock::iterator UseI = BBE;
246 
247     for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
248 
249       if (DefI != BBE && UseI != BBE)
250         break;
251 
252       if (&*BBI == Def) {
253         DefI = BBI;
254         continue;
255       }
256 
257       if (&*BBI == UseToBringDefCloserTo) {
258         UseI = BBI;
259         continue;
260       }
261     }
262 
263     if (DefI == BBE || UseI == BBE)
264       continue;
265 
266     LLVM_DEBUG({
267       dbgs() << "Splicing ";
268       DefI->dump();
269       dbgs() << " right before: ";
270       UseI->dump();
271     });
272 
273     MultiUsers[UseToBringDefCloserTo].push_back(Def);
274     Changed = true;
275     MBB->splice(UseI, MBB, DefI);
276   }
277 
278   // Sort the defs for users of multiple defs lexographically.
279   for (const auto &E : MultiUserLookup) {
280 
281     auto UseI =
282         std::find_if(MBB->instr_begin(), MBB->instr_end(),
283                      [&](MachineInstr &MI) -> bool { return &MI == E.second; });
284 
285     if (UseI == MBB->instr_end())
286       continue;
287 
288     LLVM_DEBUG(
289         dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
290     Changed |= rescheduleLexographically(
291         MultiUsers[E.second], MBB,
292         [&]() -> MachineBasicBlock::iterator { return UseI; });
293   }
294 
295   PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
296   LLVM_DEBUG(
297       dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
298   Changed |= rescheduleLexographically(
299       PseudoIdempotentInstructions, MBB,
300       [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
301 
302   return Changed;
303 }
304 
305 static bool propagateLocalCopies(MachineBasicBlock *MBB) {
306   bool Changed = false;
307   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
308 
309   std::vector<MachineInstr *> Copies;
310   for (MachineInstr &MI : MBB->instrs()) {
311     if (MI.isCopy())
312       Copies.push_back(&MI);
313   }
314 
315   for (MachineInstr *MI : Copies) {
316 
317     if (!MI->getOperand(0).isReg())
318       continue;
319     if (!MI->getOperand(1).isReg())
320       continue;
321 
322     const Register Dst = MI->getOperand(0).getReg();
323     const Register Src = MI->getOperand(1).getReg();
324 
325     if (!Register::isVirtualRegister(Dst))
326       continue;
327     if (!Register::isVirtualRegister(Src))
328       continue;
329     // Not folding COPY instructions if regbankselect has not set the RCs.
330     // Why are we only considering Register Classes? Because the verifier
331     // sometimes gets upset if the register classes don't match even if the
332     // types do. A future patch might add COPY folding for matching types in
333     // pre-registerbankselect code.
334     if (!MRI.getRegClassOrNull(Dst))
335       continue;
336     if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
337       continue;
338 
339     std::vector<MachineOperand *> Uses;
340     for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
341       Uses.push_back(&*UI);
342     for (auto *MO : Uses)
343       MO->setReg(Src);
344 
345     Changed = true;
346     MI->eraseFromParent();
347   }
348 
349   return Changed;
350 }
351 
352 static bool doDefKillClear(MachineBasicBlock *MBB) {
353   bool Changed = false;
354 
355   for (auto &MI : *MBB) {
356     for (auto &MO : MI.operands()) {
357       if (!MO.isReg())
358         continue;
359       if (!MO.isDef() && MO.isKill()) {
360         Changed = true;
361         MO.setIsKill(false);
362       }
363 
364       if (MO.isDef() && MO.isDead()) {
365         Changed = true;
366         MO.setIsDead(false);
367       }
368     }
369   }
370 
371   return Changed;
372 }
373 
374 static bool runOnBasicBlock(MachineBasicBlock *MBB,
375                             std::vector<StringRef> &bbNames,
376                             unsigned &basicBlockNum, NamedVRegCursor &NVC) {
377 
378   if (CanonicalizeBasicBlockNumber != ~0U) {
379     if (CanonicalizeBasicBlockNumber != basicBlockNum++)
380       return false;
381     LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName()
382                       << "\n";);
383   }
384 
385   if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
386     LLVM_DEBUG({
387       dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
388              << "\n";
389     });
390     return false;
391   }
392 
393   LLVM_DEBUG({
394     dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";
395     dbgs() << "\n\n================================================\n\n";
396   });
397 
398   bool Changed = false;
399   MachineFunction &MF = *MBB->getParent();
400   MachineRegisterInfo &MRI = MF.getRegInfo();
401 
402   bbNames.push_back(MBB->getName());
403   LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
404 
405   LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
406              MBB->dump(););
407   Changed |= propagateLocalCopies(MBB);
408   LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
409 
410   LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
411   unsigned IdempotentInstCount = 0;
412   Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
413   LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
414 
415   Changed |= NVC.renameVRegs(MBB);
416 
417   // Here we renumber the def vregs for the idempotent instructions from the top
418   // of the MachineBasicBlock so that they are named in the order that we sorted
419   // them alphabetically. Eventually we wont need SkipVRegs because we will use
420   // named vregs instead.
421   if (IdempotentInstCount)
422     NVC.skipVRegs();
423 
424   auto MII = MBB->begin();
425   for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
426     MachineInstr &MI = *MII++;
427     Changed = true;
428     Register vRegToRename = MI.getOperand(0).getReg();
429     auto Rename = NVC.createVirtualRegister(vRegToRename);
430 
431     std::vector<MachineOperand *> RenameMOs;
432     for (auto &MO : MRI.reg_operands(vRegToRename)) {
433       RenameMOs.push_back(&MO);
434     }
435 
436     for (auto *MO : RenameMOs) {
437       MO->setReg(Rename);
438     }
439   }
440 
441   Changed |= doDefKillClear(MBB);
442 
443   LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
444              dbgs() << "\n";);
445   LLVM_DEBUG(
446       dbgs() << "\n\n================================================\n\n");
447   return Changed;
448 }
449 
450 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
451 
452   static unsigned functionNum = 0;
453   if (CanonicalizeFunctionNumber != ~0U) {
454     if (CanonicalizeFunctionNumber != functionNum++)
455       return false;
456     LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
457                       << "\n";);
458   }
459 
460   // we need a valid vreg to create a vreg type for skipping all those
461   // stray vreg numbers so reach alignment/canonical vreg values.
462   std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
463 
464   LLVM_DEBUG(
465       dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n";
466       dbgs() << "\n\n================================================\n\n";
467       dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
468       for (auto MBB
469            : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
470       << "\n\n================================================\n\n";);
471 
472   std::vector<StringRef> BBNames;
473 
474   unsigned BBNum = 0;
475 
476   bool Changed = false;
477 
478   MachineRegisterInfo &MRI = MF.getRegInfo();
479   NamedVRegCursor NVC(MRI);
480   for (auto MBB : RPOList)
481     Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC);
482 
483   return Changed;
484 }
485