1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The purpose of this pass is to employ a canonical code transformation so 11 // that code compiled with slightly different IR passes can be diffed more 12 // effectively than otherwise. This is done by renaming vregs in a given 13 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to 14 // move defs closer to their use inorder to reduce diffs caused by slightly 15 // different schedules. 16 // 17 // Basic Usage: 18 // 19 // llc -o - -run-pass mir-canonicalizer example.mir 20 // 21 // Reorders instructions canonically. 22 // Renames virtual register operands canonically. 23 // Strips certain MIR artifacts (optionally). 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/Passes.h" 33 #include "llvm/Support/raw_ostream.h" 34 35 #include <queue> 36 37 using namespace llvm; 38 39 namespace llvm { 40 extern char &MIRCanonicalizerID; 41 } // namespace llvm 42 43 #define DEBUG_TYPE "mir-canonicalizer" 44 45 static cl::opt<unsigned> 46 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), 47 cl::value_desc("N"), 48 cl::desc("Function number to canonicalize.")); 49 50 static cl::opt<unsigned> 51 CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u), 52 cl::value_desc("N"), 53 cl::desc("BasicBlock number to canonicalize.")); 54 55 namespace { 56 57 class MIRCanonicalizer : public MachineFunctionPass { 58 public: 59 static char ID; 60 MIRCanonicalizer() : MachineFunctionPass(ID) {} 61 62 StringRef getPassName() const override { 63 return "Rename register operands in a canonical ordering."; 64 } 65 66 void getAnalysisUsage(AnalysisUsage &AU) const override { 67 AU.setPreservesCFG(); 68 MachineFunctionPass::getAnalysisUsage(AU); 69 } 70 71 bool runOnMachineFunction(MachineFunction &MF) override; 72 }; 73 74 } // end anonymous namespace 75 76 enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; 77 class TypedVReg { 78 VRType type; 79 unsigned reg; 80 81 public: 82 TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {} 83 TypedVReg(VRType type) : type(type), reg(~0U) { 84 assert(type != RSE_Reg && "Expected a non-register type."); 85 } 86 87 bool isReg() const { return type == RSE_Reg; } 88 bool isFrameIndex() const { return type == RSE_FrameIndex; } 89 bool isCandidate() const { return type == RSE_NewCandidate; } 90 91 VRType getType() const { return type; } 92 unsigned getReg() const { 93 assert(this->isReg() && "Expected a virtual or physical register."); 94 return reg; 95 } 96 }; 97 98 char MIRCanonicalizer::ID; 99 100 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; 101 102 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", 103 "Rename Register Operands Canonically", false, false) 104 105 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", 106 "Rename Register Operands Canonically", false, false) 107 108 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { 109 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); 110 std::vector<MachineBasicBlock *> RPOList; 111 for (auto MBB : RPOT) { 112 RPOList.push_back(MBB); 113 } 114 115 return RPOList; 116 } 117 118 // Set a dummy vreg. We use this vregs register class to generate throw-away 119 // vregs that are used to skip vreg numbers so that vreg numbers line up. 120 static unsigned GetDummyVReg(const MachineFunction &MF) { 121 for (auto &MBB : MF) { 122 for (auto &MI : MBB) { 123 for (auto &MO : MI.operands()) { 124 if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) 125 continue; 126 return MO.getReg(); 127 } 128 } 129 } 130 131 return ~0U; 132 } 133 134 static bool rescheduleCanonically(MachineBasicBlock *MBB) { 135 136 bool Changed = false; 137 138 // Calculates the distance of MI from the begining of its parent BB. 139 auto getInstrIdx = [](const MachineInstr &MI) { 140 unsigned i = 0; 141 for (auto &CurMI : *MI.getParent()) { 142 if (&CurMI == &MI) 143 return i; 144 i++; 145 } 146 return ~0U; 147 }; 148 149 // Pre-Populate vector of instructions to reschedule so that we don't 150 // clobber the iterator. 151 std::vector<MachineInstr *> Instructions; 152 for (auto &MI : *MBB) { 153 Instructions.push_back(&MI); 154 } 155 156 for (auto *II : Instructions) { 157 if (II->getNumOperands() == 0) 158 continue; 159 160 MachineOperand &MO = II->getOperand(0); 161 if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) 162 continue; 163 164 DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); 165 166 MachineInstr *Def = II; 167 unsigned Distance = ~0U; 168 MachineInstr *UseToBringDefCloserTo = nullptr; 169 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); 170 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { 171 MachineInstr *UseInst = UO.getParent(); 172 173 const unsigned DefLoc = getInstrIdx(*Def); 174 const unsigned UseLoc = getInstrIdx(*UseInst); 175 const unsigned Delta = (UseLoc - DefLoc); 176 177 if (UseInst->getParent() != Def->getParent()) 178 continue; 179 if (DefLoc >= UseLoc) 180 continue; 181 182 if (Delta < Distance) { 183 Distance = Delta; 184 UseToBringDefCloserTo = UseInst; 185 } 186 } 187 188 const auto BBE = MBB->instr_end(); 189 MachineBasicBlock::iterator DefI = BBE; 190 MachineBasicBlock::iterator UseI = BBE; 191 192 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { 193 194 if (DefI != BBE && UseI != BBE) 195 break; 196 197 if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo)) 198 continue; 199 200 if (&*BBI == Def) { 201 DefI = BBI; 202 continue; 203 } 204 205 if (&*BBI == UseToBringDefCloserTo) { 206 UseI = BBI; 207 continue; 208 } 209 } 210 211 if (DefI == BBE || UseI == BBE) 212 continue; 213 214 DEBUG({ 215 dbgs() << "Splicing "; 216 DefI->dump(); 217 dbgs() << " right before: "; 218 UseI->dump(); 219 }); 220 221 Changed = true; 222 MBB->splice(UseI, MBB, DefI); 223 } 224 225 return Changed; 226 } 227 228 /// Here we find our candidates. What makes an interesting candidate? 229 /// An candidate for a canonicalization tree root is normally any kind of 230 /// instruction that causes side effects such as a store to memory or a copy to 231 /// a physical register or a return instruction. We use these as an expression 232 /// tree root that we walk inorder to build a canonical walk which should result 233 /// in canoncal vreg renaming. 234 static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) { 235 std::vector<MachineInstr *> Candidates; 236 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 237 238 for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { 239 MachineInstr *MI = &*II; 240 241 bool DoesMISideEffect = false; 242 243 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { 244 const unsigned Dst = MI->getOperand(0).getReg(); 245 DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst); 246 247 for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { 248 if (DoesMISideEffect) break; 249 DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); 250 } 251 } 252 253 if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) 254 continue; 255 256 DEBUG(dbgs() << "Found Candidate: "; MI->dump();); 257 Candidates.push_back(MI); 258 } 259 260 return Candidates; 261 } 262 263 static void doCandidateWalk(std::vector<TypedVReg> &VRegs, 264 std::queue<TypedVReg> &RegQueue, 265 std::vector<MachineInstr *> &VisitedMIs, 266 const MachineBasicBlock *MBB) { 267 268 const MachineFunction &MF = *MBB->getParent(); 269 const MachineRegisterInfo &MRI = MF.getRegInfo(); 270 271 while (!RegQueue.empty()) { 272 273 auto TReg = RegQueue.front(); 274 RegQueue.pop(); 275 276 if (TReg.isFrameIndex()) { 277 DEBUG(dbgs() << "Popping frame index.\n";); 278 VRegs.push_back(TypedVReg(RSE_FrameIndex)); 279 continue; 280 } 281 282 assert(TReg.isReg() && "Expected vreg or physreg."); 283 unsigned Reg = TReg.getReg(); 284 285 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 286 DEBUG({ 287 dbgs() << "Popping vreg "; 288 MRI.def_begin(Reg)->dump(); 289 dbgs() << "\n"; 290 }); 291 292 if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { 293 return TR.isReg() && TR.getReg() == Reg; 294 })) { 295 VRegs.push_back(TypedVReg(Reg)); 296 } 297 } else { 298 DEBUG(dbgs() << "Popping physreg.\n";); 299 VRegs.push_back(TypedVReg(Reg)); 300 continue; 301 } 302 303 for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { 304 MachineInstr *Def = RI->getParent(); 305 306 if (Def->getParent() != MBB) 307 continue; 308 309 if (llvm::any_of(VisitedMIs, 310 [&](const MachineInstr *VMI) { return Def == VMI; })) { 311 break; 312 } 313 314 DEBUG({ 315 dbgs() << "\n========================\n"; 316 dbgs() << "Visited MI: "; 317 Def->dump(); 318 dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; 319 dbgs() << "\n========================\n"; 320 }); 321 VisitedMIs.push_back(Def); 322 for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { 323 324 MachineOperand &MO = Def->getOperand(I); 325 if (MO.isFI()) { 326 DEBUG(dbgs() << "Pushing frame index.\n";); 327 RegQueue.push(TypedVReg(RSE_FrameIndex)); 328 } 329 330 if (!MO.isReg()) 331 continue; 332 RegQueue.push(TypedVReg(MO.getReg())); 333 } 334 } 335 } 336 } 337 338 // TODO: Work to remove this in the future. One day when we have named vregs 339 // we should be able to form the canonical name based on some characteristic 340 // we see in that point of the expression tree (like if we were to name based 341 // on some sort of value numbering scheme). 342 static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI, 343 const TargetRegisterClass *RC) { 344 const unsigned VR_GAP = (++VRegGapIndex * 1000); 345 346 DEBUG({ 347 dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to " 348 << VR_GAP << "\n"; 349 }); 350 351 unsigned I = MRI.createVirtualRegister(RC); 352 const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; 353 while (I != E) { 354 I = MRI.createVirtualRegister(RC); 355 } 356 } 357 358 static std::map<unsigned, unsigned> 359 GetVRegRenameMap(const std::vector<TypedVReg> &VRegs, 360 const std::vector<unsigned> &renamedInOtherBB, 361 MachineRegisterInfo &MRI, 362 const TargetRegisterClass *RC) { 363 std::map<unsigned, unsigned> VRegRenameMap; 364 unsigned LastRenameReg = MRI.createVirtualRegister(RC); 365 bool FirstCandidate = true; 366 367 for (auto &vreg : VRegs) { 368 if (vreg.isFrameIndex()) { 369 // We skip one vreg for any frame index because there is a good chance 370 // (especially when comparing SelectionDAG to GlobalISel generated MIR) 371 // that in the other file we are just getting an incoming vreg that comes 372 // from a copy from a frame index. So it's safe to skip by one. 373 LastRenameReg = MRI.createVirtualRegister(RC); 374 DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); 375 continue; 376 } else if (vreg.isCandidate()) { 377 378 // After the first candidate, for every subsequent candidate, we skip mod 379 // 10 registers so that the candidates are more likely to start at the 380 // same vreg number making it more likely that the canonical walk from the 381 // candidate insruction. We don't need to skip from the first candidate of 382 // the BasicBlock because we already skip ahead several vregs for each BB. 383 while (LastRenameReg % 10) { 384 if (!FirstCandidate) break; 385 LastRenameReg = MRI.createVirtualRegister(RC); 386 387 DEBUG({ 388 dbgs() << "Skipping rename for new candidate " << LastRenameReg 389 << "\n"; 390 }); 391 } 392 FirstCandidate = false; 393 continue; 394 } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) { 395 LastRenameReg = MRI.createVirtualRegister(RC); 396 DEBUG({ 397 dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; 398 }); 399 continue; 400 } 401 402 auto Reg = vreg.getReg(); 403 if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { 404 DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";); 405 continue; 406 } 407 408 auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg)); 409 LastRenameReg = Rename; 410 411 if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { 412 DEBUG(dbgs() << "Mapping vreg ";); 413 if (MRI.reg_begin(Reg) != MRI.reg_end()) { 414 DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); 415 } else { 416 DEBUG(dbgs() << Reg;); 417 } 418 DEBUG(dbgs() << " to ";); 419 if (MRI.reg_begin(Rename) != MRI.reg_end()) { 420 DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); 421 } else { 422 DEBUG(dbgs() << Rename;); 423 } 424 DEBUG(dbgs() << "\n";); 425 426 VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename)); 427 } 428 } 429 430 return VRegRenameMap; 431 } 432 433 static bool doVRegRenaming(std::vector<unsigned> &RenamedInOtherBB, 434 const std::map<unsigned, unsigned> &VRegRenameMap, 435 MachineRegisterInfo &MRI) { 436 bool Changed = false; 437 for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { 438 439 auto VReg = I->first; 440 auto Rename = I->second; 441 442 RenamedInOtherBB.push_back(Rename); 443 444 std::vector<MachineOperand *> RenameMOs; 445 for (auto &MO : MRI.reg_operands(VReg)) { 446 RenameMOs.push_back(&MO); 447 } 448 449 for (auto *MO : RenameMOs) { 450 Changed = true; 451 MO->setReg(Rename); 452 453 if (!MO->isDef()) 454 MO->setIsKill(false); 455 } 456 } 457 458 return Changed; 459 } 460 461 static bool doDefKillClear(MachineBasicBlock *MBB) { 462 bool Changed = false; 463 464 for (auto &MI : *MBB) { 465 for (auto &MO : MI.operands()) { 466 if (!MO.isReg()) 467 continue; 468 if (!MO.isDef() && MO.isKill()) { 469 Changed = true; 470 MO.setIsKill(false); 471 } 472 473 if (MO.isDef() && MO.isDead()) { 474 Changed = true; 475 MO.setIsDead(false); 476 } 477 } 478 } 479 480 return Changed; 481 } 482 483 static bool runOnBasicBlock(MachineBasicBlock *MBB, 484 std::vector<StringRef> &bbNames, 485 std::vector<unsigned> &renamedInOtherBB, 486 unsigned &basicBlockNum, unsigned &VRegGapIndex) { 487 488 if (CanonicalizeBasicBlockNumber != ~0U) { 489 if (CanonicalizeBasicBlockNumber != basicBlockNum++) 490 return false; 491 DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";); 492 } 493 494 if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { 495 DEBUG({ 496 dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() 497 << "\n"; 498 }); 499 return false; 500 } 501 502 DEBUG({ 503 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; 504 dbgs() << "\n\n================================================\n\n"; 505 }); 506 507 bool Changed = false; 508 MachineFunction &MF = *MBB->getParent(); 509 MachineRegisterInfo &MRI = MF.getRegInfo(); 510 511 const unsigned DummyVReg = GetDummyVReg(MF); 512 const TargetRegisterClass *DummyRC = 513 (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg); 514 if (!DummyRC) return false; 515 516 bbNames.push_back(MBB->getName()); 517 DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); 518 519 DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); 520 Changed |= rescheduleCanonically(MBB); 521 DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); 522 523 std::vector<MachineInstr *> Candidates = populateCandidates(MBB); 524 std::vector<MachineInstr *> VisitedMIs; 525 std::copy(Candidates.begin(), Candidates.end(), 526 std::back_inserter(VisitedMIs)); 527 528 std::vector<TypedVReg> VRegs; 529 for (auto candidate : Candidates) { 530 VRegs.push_back(TypedVReg(RSE_NewCandidate)); 531 532 std::queue<TypedVReg> RegQueue; 533 534 // Here we walk the vreg operands of a non-root node along our walk. 535 // The root nodes are the original candidates (stores normally). 536 // These are normally not the root nodes (except for the case of copies to 537 // physical registers). 538 for (unsigned i = 1; i < candidate->getNumOperands(); i++) { 539 if (candidate->mayStore() || candidate->isBranch()) 540 break; 541 542 MachineOperand &MO = candidate->getOperand(i); 543 if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) 544 continue; 545 546 DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); 547 RegQueue.push(TypedVReg(MO.getReg())); 548 } 549 550 // Here we walk the root candidates. We start from the 0th operand because 551 // the root is normally a store to a vreg. 552 for (unsigned i = 0; i < candidate->getNumOperands(); i++) { 553 554 if (!candidate->mayStore() && !candidate->isBranch()) 555 break; 556 557 MachineOperand &MO = candidate->getOperand(i); 558 559 // TODO: Do we want to only add vregs here? 560 if (!MO.isReg() && !MO.isFI()) 561 continue; 562 563 DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); 564 565 RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) : 566 TypedVReg(RSE_FrameIndex)); 567 } 568 569 doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); 570 } 571 572 // If we have populated no vregs to rename then bail. 573 // The rest of this function does the vreg remaping. 574 if (VRegs.size() == 0) 575 return Changed; 576 577 // Skip some vregs, so we can recon where we'll land next. 578 SkipVRegs(VRegGapIndex, MRI, DummyRC); 579 580 auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC); 581 Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); 582 Changed |= doDefKillClear(MBB); 583 584 DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";); 585 DEBUG(dbgs() << "\n\n================================================\n\n"); 586 return Changed; 587 } 588 589 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { 590 591 static unsigned functionNum = 0; 592 if (CanonicalizeFunctionNumber != ~0U) { 593 if (CanonicalizeFunctionNumber != functionNum++) 594 return false; 595 DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";); 596 } 597 598 // we need a valid vreg to create a vreg type for skipping all those 599 // stray vreg numbers so reach alignment/canonical vreg values. 600 std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF); 601 602 DEBUG( 603 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; 604 dbgs() << "\n\n================================================\n\n"; 605 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; 606 for (auto MBB : RPOList) { 607 dbgs() << MBB->getName() << "\n"; 608 } 609 dbgs() << "\n\n================================================\n\n"; 610 ); 611 612 std::vector<StringRef> BBNames; 613 std::vector<unsigned> RenamedInOtherBB; 614 615 unsigned GapIdx = 0; 616 unsigned BBNum = 0; 617 618 bool Changed = false; 619 620 for (auto MBB : RPOList) 621 Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx); 622 623 return Changed; 624 } 625 626