1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The purpose of this pass is to employ a canonical code transformation so 10 // that code compiled with slightly different IR passes can be diffed more 11 // effectively than otherwise. This is done by renaming vregs in a given 12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to 13 // move defs closer to their use inorder to reduce diffs caused by slightly 14 // different schedules. 15 // 16 // Basic Usage: 17 // 18 // llc -o - -run-pass mir-canonicalizer example.mir 19 // 20 // Reorders instructions canonically. 21 // Renames virtual register operands canonically. 22 // Strips certain MIR artifacts (optionally). 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "MIRVRegNamerUtils.h" 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/Passes.h" 33 #include "llvm/Support/raw_ostream.h" 34 35 #include <queue> 36 37 using namespace llvm; 38 39 namespace llvm { 40 extern char &MIRCanonicalizerID; 41 } // namespace llvm 42 43 #define DEBUG_TYPE "mir-canonicalizer" 44 45 static cl::opt<unsigned> 46 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), 47 cl::value_desc("N"), 48 cl::desc("Function number to canonicalize.")); 49 50 static cl::opt<unsigned> CanonicalizeBasicBlockNumber( 51 "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"), 52 cl::desc("BasicBlock number to canonicalize.")); 53 54 namespace { 55 56 class MIRCanonicalizer : public MachineFunctionPass { 57 public: 58 static char ID; 59 MIRCanonicalizer() : MachineFunctionPass(ID) {} 60 61 StringRef getPassName() const override { 62 return "Rename register operands in a canonical ordering."; 63 } 64 65 void getAnalysisUsage(AnalysisUsage &AU) const override { 66 AU.setPreservesCFG(); 67 MachineFunctionPass::getAnalysisUsage(AU); 68 } 69 70 bool runOnMachineFunction(MachineFunction &MF) override; 71 }; 72 73 } // end anonymous namespace 74 75 char MIRCanonicalizer::ID; 76 77 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; 78 79 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", 80 "Rename Register Operands Canonically", false, false) 81 82 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", 83 "Rename Register Operands Canonically", false, false) 84 85 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { 86 if (MF.empty()) 87 return {}; 88 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); 89 std::vector<MachineBasicBlock *> RPOList; 90 for (auto MBB : RPOT) { 91 RPOList.push_back(MBB); 92 } 93 94 return RPOList; 95 } 96 97 static bool 98 rescheduleLexographically(std::vector<MachineInstr *> instructions, 99 MachineBasicBlock *MBB, 100 std::function<MachineBasicBlock::iterator()> getPos) { 101 102 bool Changed = false; 103 using StringInstrPair = std::pair<std::string, MachineInstr *>; 104 std::vector<StringInstrPair> StringInstrMap; 105 106 for (auto *II : instructions) { 107 std::string S; 108 raw_string_ostream OS(S); 109 II->print(OS); 110 OS.flush(); 111 112 // Trim the assignment, or start from the begining in the case of a store. 113 const size_t i = S.find("="); 114 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); 115 } 116 117 llvm::sort(StringInstrMap, 118 [](const StringInstrPair &a, const StringInstrPair &b) -> bool { 119 return (a.first < b.first); 120 }); 121 122 for (auto &II : StringInstrMap) { 123 124 LLVM_DEBUG({ 125 dbgs() << "Splicing "; 126 II.second->dump(); 127 dbgs() << " right before: "; 128 getPos()->dump(); 129 }); 130 131 Changed = true; 132 MBB->splice(getPos(), MBB, II.second); 133 } 134 135 return Changed; 136 } 137 138 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, 139 MachineBasicBlock *MBB) { 140 141 bool Changed = false; 142 143 // Calculates the distance of MI from the begining of its parent BB. 144 auto getInstrIdx = [](const MachineInstr &MI) { 145 unsigned i = 0; 146 for (auto &CurMI : *MI.getParent()) { 147 if (&CurMI == &MI) 148 return i; 149 i++; 150 } 151 return ~0U; 152 }; 153 154 // Pre-Populate vector of instructions to reschedule so that we don't 155 // clobber the iterator. 156 std::vector<MachineInstr *> Instructions; 157 for (auto &MI : *MBB) { 158 Instructions.push_back(&MI); 159 } 160 161 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers; 162 std::map<unsigned, MachineInstr *> MultiUserLookup; 163 unsigned UseToBringDefCloserToCount = 0; 164 std::vector<MachineInstr *> PseudoIdempotentInstructions; 165 std::vector<unsigned> PhysRegDefs; 166 for (auto *II : Instructions) { 167 for (unsigned i = 1; i < II->getNumOperands(); i++) { 168 MachineOperand &MO = II->getOperand(i); 169 if (!MO.isReg()) 170 continue; 171 172 if (Register::isVirtualRegister(MO.getReg())) 173 continue; 174 175 if (!MO.isDef()) 176 continue; 177 178 PhysRegDefs.push_back(MO.getReg()); 179 } 180 } 181 182 for (auto *II : Instructions) { 183 if (II->getNumOperands() == 0) 184 continue; 185 if (II->mayLoadOrStore()) 186 continue; 187 188 MachineOperand &MO = II->getOperand(0); 189 if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) 190 continue; 191 if (!MO.isDef()) 192 continue; 193 194 bool IsPseudoIdempotent = true; 195 for (unsigned i = 1; i < II->getNumOperands(); i++) { 196 197 if (II->getOperand(i).isImm()) { 198 continue; 199 } 200 201 if (II->getOperand(i).isReg()) { 202 if (!Register::isVirtualRegister(II->getOperand(i).getReg())) 203 if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == 204 PhysRegDefs.end()) { 205 continue; 206 } 207 } 208 209 IsPseudoIdempotent = false; 210 break; 211 } 212 213 if (IsPseudoIdempotent) { 214 PseudoIdempotentInstructions.push_back(II); 215 continue; 216 } 217 218 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); 219 220 MachineInstr *Def = II; 221 unsigned Distance = ~0U; 222 MachineInstr *UseToBringDefCloserTo = nullptr; 223 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); 224 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { 225 MachineInstr *UseInst = UO.getParent(); 226 227 const unsigned DefLoc = getInstrIdx(*Def); 228 const unsigned UseLoc = getInstrIdx(*UseInst); 229 const unsigned Delta = (UseLoc - DefLoc); 230 231 if (UseInst->getParent() != Def->getParent()) 232 continue; 233 if (DefLoc >= UseLoc) 234 continue; 235 236 if (Delta < Distance) { 237 Distance = Delta; 238 UseToBringDefCloserTo = UseInst; 239 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo; 240 } 241 } 242 243 const auto BBE = MBB->instr_end(); 244 MachineBasicBlock::iterator DefI = BBE; 245 MachineBasicBlock::iterator UseI = BBE; 246 247 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { 248 249 if (DefI != BBE && UseI != BBE) 250 break; 251 252 if (&*BBI == Def) { 253 DefI = BBI; 254 continue; 255 } 256 257 if (&*BBI == UseToBringDefCloserTo) { 258 UseI = BBI; 259 continue; 260 } 261 } 262 263 if (DefI == BBE || UseI == BBE) 264 continue; 265 266 LLVM_DEBUG({ 267 dbgs() << "Splicing "; 268 DefI->dump(); 269 dbgs() << " right before: "; 270 UseI->dump(); 271 }); 272 273 MultiUsers[UseToBringDefCloserTo].push_back(Def); 274 Changed = true; 275 MBB->splice(UseI, MBB, DefI); 276 } 277 278 // Sort the defs for users of multiple defs lexographically. 279 for (const auto &E : MultiUserLookup) { 280 281 auto UseI = 282 std::find_if(MBB->instr_begin(), MBB->instr_end(), 283 [&](MachineInstr &MI) -> bool { return &MI == E.second; }); 284 285 if (UseI == MBB->instr_end()) 286 continue; 287 288 LLVM_DEBUG( 289 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); 290 Changed |= rescheduleLexographically( 291 MultiUsers[E.second], MBB, 292 [&]() -> MachineBasicBlock::iterator { return UseI; }); 293 } 294 295 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); 296 LLVM_DEBUG( 297 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); 298 Changed |= rescheduleLexographically( 299 PseudoIdempotentInstructions, MBB, 300 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); 301 302 return Changed; 303 } 304 305 static bool propagateLocalCopies(MachineBasicBlock *MBB) { 306 bool Changed = false; 307 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 308 309 std::vector<MachineInstr *> Copies; 310 for (MachineInstr &MI : MBB->instrs()) { 311 if (MI.isCopy()) 312 Copies.push_back(&MI); 313 } 314 315 for (MachineInstr *MI : Copies) { 316 317 if (!MI->getOperand(0).isReg()) 318 continue; 319 if (!MI->getOperand(1).isReg()) 320 continue; 321 322 const Register Dst = MI->getOperand(0).getReg(); 323 const Register Src = MI->getOperand(1).getReg(); 324 325 if (!Register::isVirtualRegister(Dst)) 326 continue; 327 if (!Register::isVirtualRegister(Src)) 328 continue; 329 // Not folding COPY instructions if regbankselect has not set the RCs. 330 // Why are we only considering Register Classes? Because the verifier 331 // sometimes gets upset if the register classes don't match even if the 332 // types do. A future patch might add COPY folding for matching types in 333 // pre-registerbankselect code. 334 if (!MRI.getRegClassOrNull(Dst)) 335 continue; 336 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src)) 337 continue; 338 339 std::vector<MachineOperand *> Uses; 340 for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) 341 Uses.push_back(&*UI); 342 for (auto *MO : Uses) 343 MO->setReg(Src); 344 345 Changed = true; 346 MI->eraseFromParent(); 347 } 348 349 return Changed; 350 } 351 352 static bool doDefKillClear(MachineBasicBlock *MBB) { 353 bool Changed = false; 354 355 for (auto &MI : *MBB) { 356 for (auto &MO : MI.operands()) { 357 if (!MO.isReg()) 358 continue; 359 if (!MO.isDef() && MO.isKill()) { 360 Changed = true; 361 MO.setIsKill(false); 362 } 363 364 if (MO.isDef() && MO.isDead()) { 365 Changed = true; 366 MO.setIsDead(false); 367 } 368 } 369 } 370 371 return Changed; 372 } 373 374 static bool runOnBasicBlock(MachineBasicBlock *MBB, 375 std::vector<StringRef> &bbNames, 376 unsigned &basicBlockNum, NamedVRegCursor &NVC) { 377 378 if (CanonicalizeBasicBlockNumber != ~0U) { 379 if (CanonicalizeBasicBlockNumber != basicBlockNum++) 380 return false; 381 LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() 382 << "\n";); 383 } 384 385 if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { 386 LLVM_DEBUG({ 387 dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() 388 << "\n"; 389 }); 390 return false; 391 } 392 393 LLVM_DEBUG({ 394 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; 395 dbgs() << "\n\n================================================\n\n"; 396 }); 397 398 bool Changed = false; 399 MachineFunction &MF = *MBB->getParent(); 400 MachineRegisterInfo &MRI = MF.getRegInfo(); 401 402 bbNames.push_back(MBB->getName()); 403 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); 404 405 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; 406 MBB->dump();); 407 Changed |= propagateLocalCopies(MBB); 408 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump();); 409 410 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); 411 unsigned IdempotentInstCount = 0; 412 Changed |= rescheduleCanonically(IdempotentInstCount, MBB); 413 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); 414 415 Changed |= NVC.renameVRegs(MBB); 416 417 // Here we renumber the def vregs for the idempotent instructions from the top 418 // of the MachineBasicBlock so that they are named in the order that we sorted 419 // them alphabetically. Eventually we wont need SkipVRegs because we will use 420 // named vregs instead. 421 if (IdempotentInstCount) 422 NVC.skipVRegs(); 423 424 auto MII = MBB->begin(); 425 for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) { 426 MachineInstr &MI = *MII++; 427 Changed = true; 428 Register vRegToRename = MI.getOperand(0).getReg(); 429 auto Rename = NVC.createVirtualRegister(vRegToRename); 430 431 std::vector<MachineOperand *> RenameMOs; 432 for (auto &MO : MRI.reg_operands(vRegToRename)) { 433 RenameMOs.push_back(&MO); 434 } 435 436 for (auto *MO : RenameMOs) { 437 MO->setReg(Rename); 438 } 439 } 440 441 Changed |= doDefKillClear(MBB); 442 443 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); 444 dbgs() << "\n";); 445 LLVM_DEBUG( 446 dbgs() << "\n\n================================================\n\n"); 447 return Changed; 448 } 449 450 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { 451 452 static unsigned functionNum = 0; 453 if (CanonicalizeFunctionNumber != ~0U) { 454 if (CanonicalizeFunctionNumber != functionNum++) 455 return false; 456 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() 457 << "\n";); 458 } 459 460 // we need a valid vreg to create a vreg type for skipping all those 461 // stray vreg numbers so reach alignment/canonical vreg values. 462 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF); 463 464 LLVM_DEBUG( 465 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; 466 dbgs() << "\n\n================================================\n\n"; 467 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; 468 for (auto MBB 469 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() 470 << "\n\n================================================\n\n";); 471 472 std::vector<StringRef> BBNames; 473 474 unsigned BBNum = 0; 475 476 bool Changed = false; 477 478 MachineRegisterInfo &MRI = MF.getRegInfo(); 479 NamedVRegCursor NVC(MRI); 480 for (auto MBB : RPOList) 481 Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC); 482 483 return Changed; 484 } 485