1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
15 //
16 // Basic Usage:
17 //
18 // llc -o - -run-pass mir-canonicalizer example.mir
19 //
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
35 
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "mir-canonicalizer"
39 
40 static cl::opt<unsigned>
41     CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
42                                cl::value_desc("N"),
43                                cl::desc("Function number to canonicalize."));
44 
45 namespace {
46 
47 class MIRCanonicalizer : public MachineFunctionPass {
48 public:
49   static char ID;
50   MIRCanonicalizer() : MachineFunctionPass(ID) {}
51 
52   StringRef getPassName() const override {
53     return "Rename register operands in a canonical ordering.";
54   }
55 
56   void getAnalysisUsage(AnalysisUsage &AU) const override {
57     AU.setPreservesCFG();
58     MachineFunctionPass::getAnalysisUsage(AU);
59   }
60 
61   bool runOnMachineFunction(MachineFunction &MF) override;
62 };
63 
64 } // end anonymous namespace
65 
66 char MIRCanonicalizer::ID;
67 
68 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
69 
70 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
71                       "Rename Register Operands Canonically", false, false)
72 
73 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
74                     "Rename Register Operands Canonically", false, false)
75 
76 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
77   if (MF.empty())
78     return {};
79   ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
80   std::vector<MachineBasicBlock *> RPOList;
81   append_range(RPOList, RPOT);
82 
83   return RPOList;
84 }
85 
86 static bool
87 rescheduleLexographically(std::vector<MachineInstr *> instructions,
88                           MachineBasicBlock *MBB,
89                           std::function<MachineBasicBlock::iterator()> getPos) {
90 
91   bool Changed = false;
92   using StringInstrPair = std::pair<std::string, MachineInstr *>;
93   std::vector<StringInstrPair> StringInstrMap;
94 
95   for (auto *II : instructions) {
96     std::string S;
97     raw_string_ostream OS(S);
98     II->print(OS);
99     OS.flush();
100 
101     // Trim the assignment, or start from the beginning in the case of a store.
102     const size_t i = S.find('=');
103     StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
104   }
105 
106   llvm::sort(StringInstrMap,
107              [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
108                return (a.first < b.first);
109              });
110 
111   for (auto &II : StringInstrMap) {
112 
113     LLVM_DEBUG({
114       dbgs() << "Splicing ";
115       II.second->dump();
116       dbgs() << " right before: ";
117       getPos()->dump();
118     });
119 
120     Changed = true;
121     MBB->splice(getPos(), MBB, II.second);
122   }
123 
124   return Changed;
125 }
126 
127 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
128                                   MachineBasicBlock *MBB) {
129 
130   bool Changed = false;
131 
132   // Calculates the distance of MI from the beginning of its parent BB.
133   auto getInstrIdx = [](const MachineInstr &MI) {
134     unsigned i = 0;
135     for (auto &CurMI : *MI.getParent()) {
136       if (&CurMI == &MI)
137         return i;
138       i++;
139     }
140     return ~0U;
141   };
142 
143   // Pre-Populate vector of instructions to reschedule so that we don't
144   // clobber the iterator.
145   std::vector<MachineInstr *> Instructions;
146   for (auto &MI : *MBB) {
147     Instructions.push_back(&MI);
148   }
149 
150   std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
151   std::map<unsigned, MachineInstr *> MultiUserLookup;
152   unsigned UseToBringDefCloserToCount = 0;
153   std::vector<MachineInstr *> PseudoIdempotentInstructions;
154   std::vector<unsigned> PhysRegDefs;
155   for (auto *II : Instructions) {
156     for (unsigned i = 1; i < II->getNumOperands(); i++) {
157       MachineOperand &MO = II->getOperand(i);
158       if (!MO.isReg())
159         continue;
160 
161       if (Register::isVirtualRegister(MO.getReg()))
162         continue;
163 
164       if (!MO.isDef())
165         continue;
166 
167       PhysRegDefs.push_back(MO.getReg());
168     }
169   }
170 
171   for (auto *II : Instructions) {
172     if (II->getNumOperands() == 0)
173       continue;
174     if (II->mayLoadOrStore())
175       continue;
176 
177     MachineOperand &MO = II->getOperand(0);
178     if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
179       continue;
180     if (!MO.isDef())
181       continue;
182 
183     bool IsPseudoIdempotent = true;
184     for (unsigned i = 1; i < II->getNumOperands(); i++) {
185 
186       if (II->getOperand(i).isImm()) {
187         continue;
188       }
189 
190       if (II->getOperand(i).isReg()) {
191         if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
192           if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
193             continue;
194           }
195       }
196 
197       IsPseudoIdempotent = false;
198       break;
199     }
200 
201     if (IsPseudoIdempotent) {
202       PseudoIdempotentInstructions.push_back(II);
203       continue;
204     }
205 
206     LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
207 
208     MachineInstr *Def = II;
209     unsigned Distance = ~0U;
210     MachineInstr *UseToBringDefCloserTo = nullptr;
211     MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
212     for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
213       MachineInstr *UseInst = UO.getParent();
214 
215       const unsigned DefLoc = getInstrIdx(*Def);
216       const unsigned UseLoc = getInstrIdx(*UseInst);
217       const unsigned Delta = (UseLoc - DefLoc);
218 
219       if (UseInst->getParent() != Def->getParent())
220         continue;
221       if (DefLoc >= UseLoc)
222         continue;
223 
224       if (Delta < Distance) {
225         Distance = Delta;
226         UseToBringDefCloserTo = UseInst;
227         MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
228       }
229     }
230 
231     const auto BBE = MBB->instr_end();
232     MachineBasicBlock::iterator DefI = BBE;
233     MachineBasicBlock::iterator UseI = BBE;
234 
235     for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
236 
237       if (DefI != BBE && UseI != BBE)
238         break;
239 
240       if (&*BBI == Def) {
241         DefI = BBI;
242         continue;
243       }
244 
245       if (&*BBI == UseToBringDefCloserTo) {
246         UseI = BBI;
247         continue;
248       }
249     }
250 
251     if (DefI == BBE || UseI == BBE)
252       continue;
253 
254     LLVM_DEBUG({
255       dbgs() << "Splicing ";
256       DefI->dump();
257       dbgs() << " right before: ";
258       UseI->dump();
259     });
260 
261     MultiUsers[UseToBringDefCloserTo].push_back(Def);
262     Changed = true;
263     MBB->splice(UseI, MBB, DefI);
264   }
265 
266   // Sort the defs for users of multiple defs lexographically.
267   for (const auto &E : MultiUserLookup) {
268 
269     auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
270       return &MI == E.second;
271     });
272 
273     if (UseI == MBB->instr_end())
274       continue;
275 
276     LLVM_DEBUG(
277         dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
278     Changed |= rescheduleLexographically(
279         MultiUsers[E.second], MBB,
280         [&]() -> MachineBasicBlock::iterator { return UseI; });
281   }
282 
283   PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
284   LLVM_DEBUG(
285       dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
286   Changed |= rescheduleLexographically(
287       PseudoIdempotentInstructions, MBB,
288       [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
289 
290   return Changed;
291 }
292 
293 static bool propagateLocalCopies(MachineBasicBlock *MBB) {
294   bool Changed = false;
295   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
296 
297   std::vector<MachineInstr *> Copies;
298   for (MachineInstr &MI : MBB->instrs()) {
299     if (MI.isCopy())
300       Copies.push_back(&MI);
301   }
302 
303   for (MachineInstr *MI : Copies) {
304 
305     if (!MI->getOperand(0).isReg())
306       continue;
307     if (!MI->getOperand(1).isReg())
308       continue;
309 
310     const Register Dst = MI->getOperand(0).getReg();
311     const Register Src = MI->getOperand(1).getReg();
312 
313     if (!Register::isVirtualRegister(Dst))
314       continue;
315     if (!Register::isVirtualRegister(Src))
316       continue;
317     // Not folding COPY instructions if regbankselect has not set the RCs.
318     // Why are we only considering Register Classes? Because the verifier
319     // sometimes gets upset if the register classes don't match even if the
320     // types do. A future patch might add COPY folding for matching types in
321     // pre-registerbankselect code.
322     if (!MRI.getRegClassOrNull(Dst))
323       continue;
324     if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
325       continue;
326 
327     std::vector<MachineOperand *> Uses;
328     for (MachineOperand &MO : MRI.use_operands(Dst))
329       Uses.push_back(&MO);
330     for (auto *MO : Uses)
331       MO->setReg(Src);
332 
333     Changed = true;
334     MI->eraseFromParent();
335   }
336 
337   return Changed;
338 }
339 
340 static bool doDefKillClear(MachineBasicBlock *MBB) {
341   bool Changed = false;
342 
343   for (auto &MI : *MBB) {
344     for (auto &MO : MI.operands()) {
345       if (!MO.isReg())
346         continue;
347       if (!MO.isDef() && MO.isKill()) {
348         Changed = true;
349         MO.setIsKill(false);
350       }
351 
352       if (MO.isDef() && MO.isDead()) {
353         Changed = true;
354         MO.setIsDead(false);
355       }
356     }
357   }
358 
359   return Changed;
360 }
361 
362 static bool runOnBasicBlock(MachineBasicBlock *MBB,
363                             unsigned BasicBlockNum, VRegRenamer &Renamer) {
364   LLVM_DEBUG({
365     dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";
366     dbgs() << "\n\n================================================\n\n";
367   });
368 
369   bool Changed = false;
370 
371   LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
372 
373   LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
374              MBB->dump(););
375   Changed |= propagateLocalCopies(MBB);
376   LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
377 
378   LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
379   unsigned IdempotentInstCount = 0;
380   Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
381   LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
382 
383   Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
384 
385   // TODO: Consider dropping this. Dropping kill defs is probably not
386   // semantically sound.
387   Changed |= doDefKillClear(MBB);
388 
389   LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
390              dbgs() << "\n";);
391   LLVM_DEBUG(
392       dbgs() << "\n\n================================================\n\n");
393   return Changed;
394 }
395 
396 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
397 
398   static unsigned functionNum = 0;
399   if (CanonicalizeFunctionNumber != ~0U) {
400     if (CanonicalizeFunctionNumber != functionNum++)
401       return false;
402     LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
403                       << "\n";);
404   }
405 
406   // we need a valid vreg to create a vreg type for skipping all those
407   // stray vreg numbers so reach alignment/canonical vreg values.
408   std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
409 
410   LLVM_DEBUG(
411       dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n";
412       dbgs() << "\n\n================================================\n\n";
413       dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
414       for (auto MBB
415            : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
416       << "\n\n================================================\n\n";);
417 
418   unsigned BBNum = 0;
419   bool Changed = false;
420   MachineRegisterInfo &MRI = MF.getRegInfo();
421   VRegRenamer Renamer(MRI);
422   for (auto MBB : RPOList)
423     Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
424 
425   return Changed;
426 }
427