1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The purpose of this pass is to employ a canonical code transformation so 10 // that code compiled with slightly different IR passes can be diffed more 11 // effectively than otherwise. This is done by renaming vregs in a given 12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to 13 // move defs closer to their use inorder to reduce diffs caused by slightly 14 // different schedules. 15 // 16 // Basic Usage: 17 // 18 // llc -o - -run-pass mir-canonicalizer example.mir 19 // 20 // Reorders instructions canonically. 21 // Renames virtual register operands canonically. 22 // Strips certain MIR artifacts (optionally). 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "MIRVRegNamerUtils.h" 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/Passes.h" 33 #include "llvm/InitializePasses.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/raw_ostream.h" 36 37 #include <queue> 38 39 using namespace llvm; 40 41 namespace llvm { 42 extern char &MIRCanonicalizerID; 43 } // namespace llvm 44 45 #define DEBUG_TYPE "mir-canonicalizer" 46 47 static cl::opt<unsigned> 48 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), 49 cl::value_desc("N"), 50 cl::desc("Function number to canonicalize.")); 51 52 static cl::opt<unsigned> CanonicalizeBasicBlockNumber( 53 "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"), 54 cl::desc("BasicBlock number to canonicalize.")); 55 56 namespace { 57 58 class MIRCanonicalizer : public MachineFunctionPass { 59 public: 60 static char ID; 61 MIRCanonicalizer() : MachineFunctionPass(ID) {} 62 63 StringRef getPassName() const override { 64 return "Rename register operands in a canonical ordering."; 65 } 66 67 void getAnalysisUsage(AnalysisUsage &AU) const override { 68 AU.setPreservesCFG(); 69 MachineFunctionPass::getAnalysisUsage(AU); 70 } 71 72 bool runOnMachineFunction(MachineFunction &MF) override; 73 }; 74 75 } // end anonymous namespace 76 77 char MIRCanonicalizer::ID; 78 79 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; 80 81 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", 82 "Rename Register Operands Canonically", false, false) 83 84 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", 85 "Rename Register Operands Canonically", false, false) 86 87 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { 88 if (MF.empty()) 89 return {}; 90 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); 91 std::vector<MachineBasicBlock *> RPOList; 92 for (auto MBB : RPOT) { 93 RPOList.push_back(MBB); 94 } 95 96 return RPOList; 97 } 98 99 static bool 100 rescheduleLexographically(std::vector<MachineInstr *> instructions, 101 MachineBasicBlock *MBB, 102 std::function<MachineBasicBlock::iterator()> getPos) { 103 104 bool Changed = false; 105 using StringInstrPair = std::pair<std::string, MachineInstr *>; 106 std::vector<StringInstrPair> StringInstrMap; 107 108 for (auto *II : instructions) { 109 std::string S; 110 raw_string_ostream OS(S); 111 II->print(OS); 112 OS.flush(); 113 114 // Trim the assignment, or start from the begining in the case of a store. 115 const size_t i = S.find("="); 116 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); 117 } 118 119 llvm::sort(StringInstrMap, 120 [](const StringInstrPair &a, const StringInstrPair &b) -> bool { 121 return (a.first < b.first); 122 }); 123 124 for (auto &II : StringInstrMap) { 125 126 LLVM_DEBUG({ 127 dbgs() << "Splicing "; 128 II.second->dump(); 129 dbgs() << " right before: "; 130 getPos()->dump(); 131 }); 132 133 Changed = true; 134 MBB->splice(getPos(), MBB, II.second); 135 } 136 137 return Changed; 138 } 139 140 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, 141 MachineBasicBlock *MBB) { 142 143 bool Changed = false; 144 145 // Calculates the distance of MI from the begining of its parent BB. 146 auto getInstrIdx = [](const MachineInstr &MI) { 147 unsigned i = 0; 148 for (auto &CurMI : *MI.getParent()) { 149 if (&CurMI == &MI) 150 return i; 151 i++; 152 } 153 return ~0U; 154 }; 155 156 // Pre-Populate vector of instructions to reschedule so that we don't 157 // clobber the iterator. 158 std::vector<MachineInstr *> Instructions; 159 for (auto &MI : *MBB) { 160 Instructions.push_back(&MI); 161 } 162 163 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers; 164 std::map<unsigned, MachineInstr *> MultiUserLookup; 165 unsigned UseToBringDefCloserToCount = 0; 166 std::vector<MachineInstr *> PseudoIdempotentInstructions; 167 std::vector<unsigned> PhysRegDefs; 168 for (auto *II : Instructions) { 169 for (unsigned i = 1; i < II->getNumOperands(); i++) { 170 MachineOperand &MO = II->getOperand(i); 171 if (!MO.isReg()) 172 continue; 173 174 if (Register::isVirtualRegister(MO.getReg())) 175 continue; 176 177 if (!MO.isDef()) 178 continue; 179 180 PhysRegDefs.push_back(MO.getReg()); 181 } 182 } 183 184 for (auto *II : Instructions) { 185 if (II->getNumOperands() == 0) 186 continue; 187 if (II->mayLoadOrStore()) 188 continue; 189 190 MachineOperand &MO = II->getOperand(0); 191 if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) 192 continue; 193 if (!MO.isDef()) 194 continue; 195 196 bool IsPseudoIdempotent = true; 197 for (unsigned i = 1; i < II->getNumOperands(); i++) { 198 199 if (II->getOperand(i).isImm()) { 200 continue; 201 } 202 203 if (II->getOperand(i).isReg()) { 204 if (!Register::isVirtualRegister(II->getOperand(i).getReg())) 205 if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == 206 PhysRegDefs.end()) { 207 continue; 208 } 209 } 210 211 IsPseudoIdempotent = false; 212 break; 213 } 214 215 if (IsPseudoIdempotent) { 216 PseudoIdempotentInstructions.push_back(II); 217 continue; 218 } 219 220 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); 221 222 MachineInstr *Def = II; 223 unsigned Distance = ~0U; 224 MachineInstr *UseToBringDefCloserTo = nullptr; 225 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); 226 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { 227 MachineInstr *UseInst = UO.getParent(); 228 229 const unsigned DefLoc = getInstrIdx(*Def); 230 const unsigned UseLoc = getInstrIdx(*UseInst); 231 const unsigned Delta = (UseLoc - DefLoc); 232 233 if (UseInst->getParent() != Def->getParent()) 234 continue; 235 if (DefLoc >= UseLoc) 236 continue; 237 238 if (Delta < Distance) { 239 Distance = Delta; 240 UseToBringDefCloserTo = UseInst; 241 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo; 242 } 243 } 244 245 const auto BBE = MBB->instr_end(); 246 MachineBasicBlock::iterator DefI = BBE; 247 MachineBasicBlock::iterator UseI = BBE; 248 249 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { 250 251 if (DefI != BBE && UseI != BBE) 252 break; 253 254 if (&*BBI == Def) { 255 DefI = BBI; 256 continue; 257 } 258 259 if (&*BBI == UseToBringDefCloserTo) { 260 UseI = BBI; 261 continue; 262 } 263 } 264 265 if (DefI == BBE || UseI == BBE) 266 continue; 267 268 LLVM_DEBUG({ 269 dbgs() << "Splicing "; 270 DefI->dump(); 271 dbgs() << " right before: "; 272 UseI->dump(); 273 }); 274 275 MultiUsers[UseToBringDefCloserTo].push_back(Def); 276 Changed = true; 277 MBB->splice(UseI, MBB, DefI); 278 } 279 280 // Sort the defs for users of multiple defs lexographically. 281 for (const auto &E : MultiUserLookup) { 282 283 auto UseI = 284 std::find_if(MBB->instr_begin(), MBB->instr_end(), 285 [&](MachineInstr &MI) -> bool { return &MI == E.second; }); 286 287 if (UseI == MBB->instr_end()) 288 continue; 289 290 LLVM_DEBUG( 291 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); 292 Changed |= rescheduleLexographically( 293 MultiUsers[E.second], MBB, 294 [&]() -> MachineBasicBlock::iterator { return UseI; }); 295 } 296 297 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); 298 LLVM_DEBUG( 299 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); 300 Changed |= rescheduleLexographically( 301 PseudoIdempotentInstructions, MBB, 302 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); 303 304 return Changed; 305 } 306 307 static bool propagateLocalCopies(MachineBasicBlock *MBB) { 308 bool Changed = false; 309 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 310 311 std::vector<MachineInstr *> Copies; 312 for (MachineInstr &MI : MBB->instrs()) { 313 if (MI.isCopy()) 314 Copies.push_back(&MI); 315 } 316 317 for (MachineInstr *MI : Copies) { 318 319 if (!MI->getOperand(0).isReg()) 320 continue; 321 if (!MI->getOperand(1).isReg()) 322 continue; 323 324 const Register Dst = MI->getOperand(0).getReg(); 325 const Register Src = MI->getOperand(1).getReg(); 326 327 if (!Register::isVirtualRegister(Dst)) 328 continue; 329 if (!Register::isVirtualRegister(Src)) 330 continue; 331 // Not folding COPY instructions if regbankselect has not set the RCs. 332 // Why are we only considering Register Classes? Because the verifier 333 // sometimes gets upset if the register classes don't match even if the 334 // types do. A future patch might add COPY folding for matching types in 335 // pre-registerbankselect code. 336 if (!MRI.getRegClassOrNull(Dst)) 337 continue; 338 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src)) 339 continue; 340 341 std::vector<MachineOperand *> Uses; 342 for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) 343 Uses.push_back(&*UI); 344 for (auto *MO : Uses) 345 MO->setReg(Src); 346 347 Changed = true; 348 MI->eraseFromParent(); 349 } 350 351 return Changed; 352 } 353 354 static bool doDefKillClear(MachineBasicBlock *MBB) { 355 bool Changed = false; 356 357 for (auto &MI : *MBB) { 358 for (auto &MO : MI.operands()) { 359 if (!MO.isReg()) 360 continue; 361 if (!MO.isDef() && MO.isKill()) { 362 Changed = true; 363 MO.setIsKill(false); 364 } 365 366 if (MO.isDef() && MO.isDead()) { 367 Changed = true; 368 MO.setIsDead(false); 369 } 370 } 371 } 372 373 return Changed; 374 } 375 376 static bool runOnBasicBlock(MachineBasicBlock *MBB, 377 std::vector<StringRef> &bbNames, 378 unsigned &basicBlockNum, VRegRenamer &Renamer) { 379 380 if (CanonicalizeBasicBlockNumber != ~0U) { 381 if (CanonicalizeBasicBlockNumber != basicBlockNum++) 382 return false; 383 LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() 384 << "\n";); 385 } 386 387 if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { 388 LLVM_DEBUG({ 389 dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() 390 << "\n"; 391 }); 392 return false; 393 } 394 395 LLVM_DEBUG({ 396 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; 397 dbgs() << "\n\n================================================\n\n"; 398 }); 399 400 bool Changed = false; 401 402 bbNames.push_back(MBB->getName()); 403 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); 404 405 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; 406 MBB->dump();); 407 Changed |= propagateLocalCopies(MBB); 408 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump();); 409 410 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); 411 unsigned IdempotentInstCount = 0; 412 Changed |= rescheduleCanonically(IdempotentInstCount, MBB); 413 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); 414 415 Changed |= Renamer.renameVRegs(MBB); 416 417 Changed |= doDefKillClear(MBB); 418 419 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); 420 dbgs() << "\n";); 421 LLVM_DEBUG( 422 dbgs() << "\n\n================================================\n\n"); 423 return Changed; 424 } 425 426 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { 427 428 static unsigned functionNum = 0; 429 if (CanonicalizeFunctionNumber != ~0U) { 430 if (CanonicalizeFunctionNumber != functionNum++) 431 return false; 432 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() 433 << "\n";); 434 } 435 436 // we need a valid vreg to create a vreg type for skipping all those 437 // stray vreg numbers so reach alignment/canonical vreg values. 438 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF); 439 440 LLVM_DEBUG( 441 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; 442 dbgs() << "\n\n================================================\n\n"; 443 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; 444 for (auto MBB 445 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() 446 << "\n\n================================================\n\n";); 447 448 std::vector<StringRef> BBNames; 449 450 unsigned BBNum = 0; 451 452 bool Changed = false; 453 454 MachineRegisterInfo &MRI = MF.getRegInfo(); 455 VRegRenamer Renamer(MRI); 456 for (auto MBB : RPOList) 457 Changed |= runOnBasicBlock(MBB, BBNames, BBNum, Renamer); 458 459 return Changed; 460 } 461