1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The purpose of this pass is to employ a canonical code transformation so 11 // that code compiled with slightly different IR passes can be diffed more 12 // effectively than otherwise. This is done by renaming vregs in a given 13 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to 14 // move defs closer to their use inorder to reduce diffs caused by slightly 15 // different schedules. 16 // 17 // Basic Usage: 18 // 19 // llc -o - -run-pass mir-canonicalizer example.mir 20 // 21 // Reorders instructions canonically. 22 // Renames virtual register operands canonically. 23 // Strips certain MIR artifacts (optionally). 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/Passes.h" 33 #include "llvm/CodeGen/TargetInstrInfo.h" 34 #include "llvm/Support/raw_ostream.h" 35 36 #include <queue> 37 38 using namespace llvm; 39 40 namespace llvm { 41 extern char &MIRCanonicalizerID; 42 } // namespace llvm 43 44 #define DEBUG_TYPE "mir-canonicalizer" 45 46 static cl::opt<unsigned> 47 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), 48 cl::value_desc("N"), 49 cl::desc("Function number to canonicalize.")); 50 51 static cl::opt<unsigned> 52 CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u), 53 cl::value_desc("N"), 54 cl::desc("BasicBlock number to canonicalize.")); 55 56 namespace { 57 58 class MIRCanonicalizer : public MachineFunctionPass { 59 public: 60 static char ID; 61 MIRCanonicalizer() : MachineFunctionPass(ID) {} 62 63 StringRef getPassName() const override { 64 return "Rename register operands in a canonical ordering."; 65 } 66 67 void getAnalysisUsage(AnalysisUsage &AU) const override { 68 AU.setPreservesCFG(); 69 MachineFunctionPass::getAnalysisUsage(AU); 70 } 71 72 bool runOnMachineFunction(MachineFunction &MF) override; 73 }; 74 75 } // end anonymous namespace 76 77 enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; 78 class TypedVReg { 79 VRType type; 80 unsigned reg; 81 82 public: 83 TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {} 84 TypedVReg(VRType type) : type(type), reg(~0U) { 85 assert(type != RSE_Reg && "Expected a non-register type."); 86 } 87 88 bool isReg() const { return type == RSE_Reg; } 89 bool isFrameIndex() const { return type == RSE_FrameIndex; } 90 bool isCandidate() const { return type == RSE_NewCandidate; } 91 92 VRType getType() const { return type; } 93 unsigned getReg() const { 94 assert(this->isReg() && "Expected a virtual or physical register."); 95 return reg; 96 } 97 }; 98 99 char MIRCanonicalizer::ID; 100 101 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; 102 103 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", 104 "Rename Register Operands Canonically", false, false) 105 106 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", 107 "Rename Register Operands Canonically", false, false) 108 109 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { 110 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); 111 std::vector<MachineBasicBlock *> RPOList; 112 for (auto MBB : RPOT) { 113 RPOList.push_back(MBB); 114 } 115 116 return RPOList; 117 } 118 119 // Set a dummy vreg. We use this vregs register class to generate throw-away 120 // vregs that are used to skip vreg numbers so that vreg numbers line up. 121 static unsigned GetDummyVReg(const MachineFunction &MF) { 122 for (auto &MBB : MF) { 123 for (auto &MI : MBB) { 124 for (auto &MO : MI.operands()) { 125 if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) 126 continue; 127 return MO.getReg(); 128 } 129 } 130 } 131 132 return ~0U; 133 } 134 135 static bool rescheduleCanonically(MachineBasicBlock *MBB) { 136 137 bool Changed = false; 138 139 // Calculates the distance of MI from the begining of its parent BB. 140 auto getInstrIdx = [](const MachineInstr &MI) { 141 unsigned i = 0; 142 for (auto &CurMI : *MI.getParent()) { 143 if (&CurMI == &MI) 144 return i; 145 i++; 146 } 147 return ~0U; 148 }; 149 150 // Pre-Populate vector of instructions to reschedule so that we don't 151 // clobber the iterator. 152 std::vector<MachineInstr *> Instructions; 153 for (auto &MI : *MBB) { 154 Instructions.push_back(&MI); 155 } 156 157 for (auto *II : Instructions) { 158 if (II->getNumOperands() == 0) 159 continue; 160 161 MachineOperand &MO = II->getOperand(0); 162 if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) 163 continue; 164 165 DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); 166 167 MachineInstr *Def = II; 168 unsigned Distance = ~0U; 169 MachineInstr *UseToBringDefCloserTo = nullptr; 170 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); 171 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { 172 MachineInstr *UseInst = UO.getParent(); 173 174 const unsigned DefLoc = getInstrIdx(*Def); 175 const unsigned UseLoc = getInstrIdx(*UseInst); 176 const unsigned Delta = (UseLoc - DefLoc); 177 178 if (UseInst->getParent() != Def->getParent()) 179 continue; 180 if (DefLoc >= UseLoc) 181 continue; 182 183 if (Delta < Distance) { 184 Distance = Delta; 185 UseToBringDefCloserTo = UseInst; 186 } 187 } 188 189 const auto BBE = MBB->instr_end(); 190 MachineBasicBlock::iterator DefI = BBE; 191 MachineBasicBlock::iterator UseI = BBE; 192 193 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { 194 195 if (DefI != BBE && UseI != BBE) 196 break; 197 198 if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo)) 199 continue; 200 201 if (&*BBI == Def) { 202 DefI = BBI; 203 continue; 204 } 205 206 if (&*BBI == UseToBringDefCloserTo) { 207 UseI = BBI; 208 continue; 209 } 210 } 211 212 if (DefI == BBE || UseI == BBE) 213 continue; 214 215 DEBUG({ 216 dbgs() << "Splicing "; 217 DefI->dump(); 218 dbgs() << " right before: "; 219 UseI->dump(); 220 }); 221 222 Changed = true; 223 MBB->splice(UseI, MBB, DefI); 224 } 225 226 return Changed; 227 } 228 229 /// Here we find our candidates. What makes an interesting candidate? 230 /// An candidate for a canonicalization tree root is normally any kind of 231 /// instruction that causes side effects such as a store to memory or a copy to 232 /// a physical register or a return instruction. We use these as an expression 233 /// tree root that we walk inorder to build a canonical walk which should result 234 /// in canoncal vreg renaming. 235 static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) { 236 std::vector<MachineInstr *> Candidates; 237 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 238 239 for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { 240 MachineInstr *MI = &*II; 241 242 bool DoesMISideEffect = false; 243 244 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { 245 const unsigned Dst = MI->getOperand(0).getReg(); 246 DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst); 247 248 for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { 249 if (DoesMISideEffect) break; 250 DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); 251 } 252 } 253 254 if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) 255 continue; 256 257 DEBUG(dbgs() << "Found Candidate: "; MI->dump();); 258 Candidates.push_back(MI); 259 } 260 261 return Candidates; 262 } 263 264 void doCandidateWalk(std::vector<TypedVReg> &VRegs, 265 std::queue <TypedVReg> &RegQueue, 266 std::vector<MachineInstr *> &VisitedMIs, 267 const MachineBasicBlock *MBB) { 268 269 const MachineFunction &MF = *MBB->getParent(); 270 const MachineRegisterInfo &MRI = MF.getRegInfo(); 271 272 while (!RegQueue.empty()) { 273 274 auto TReg = RegQueue.front(); 275 RegQueue.pop(); 276 277 if (TReg.isFrameIndex()) { 278 DEBUG(dbgs() << "Popping frame index.\n";); 279 VRegs.push_back(TypedVReg(RSE_FrameIndex)); 280 continue; 281 } 282 283 assert(TReg.isReg() && "Expected vreg or physreg."); 284 unsigned Reg = TReg.getReg(); 285 286 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 287 DEBUG({ 288 dbgs() << "Popping vreg "; 289 MRI.def_begin(Reg)->dump(); 290 dbgs() << "\n"; 291 }); 292 293 if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { 294 return TR.isReg() && TR.getReg() == Reg; 295 })) { 296 VRegs.push_back(TypedVReg(Reg)); 297 } 298 } else { 299 DEBUG(dbgs() << "Popping physreg.\n";); 300 VRegs.push_back(TypedVReg(Reg)); 301 continue; 302 } 303 304 for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { 305 MachineInstr *Def = RI->getParent(); 306 307 if (Def->getParent() != MBB) 308 continue; 309 310 if (llvm::any_of(VisitedMIs, 311 [&](const MachineInstr *VMI) { return Def == VMI; })) { 312 break; 313 } 314 315 DEBUG({ 316 dbgs() << "\n========================\n"; 317 dbgs() << "Visited MI: "; 318 Def->dump(); 319 dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; 320 dbgs() << "\n========================\n"; 321 }); 322 VisitedMIs.push_back(Def); 323 for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { 324 325 MachineOperand &MO = Def->getOperand(I); 326 if (MO.isFI()) { 327 DEBUG(dbgs() << "Pushing frame index.\n";); 328 RegQueue.push(TypedVReg(RSE_FrameIndex)); 329 } 330 331 if (!MO.isReg()) 332 continue; 333 RegQueue.push(TypedVReg(MO.getReg())); 334 } 335 } 336 } 337 } 338 339 // TODO: Work to remove this in the future. One day when we have named vregs 340 // we should be able to form the canonical name based on some characteristic 341 // we see in that point of the expression tree (like if we were to name based 342 // on some sort of value numbering scheme). 343 static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI, 344 const TargetRegisterClass *RC) { 345 const unsigned VR_GAP = (++VRegGapIndex * 1000); 346 347 DEBUG({ 348 dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to " 349 << VR_GAP << "\n"; 350 }); 351 352 unsigned I = MRI.createVirtualRegister(RC); 353 const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; 354 while (I != E) { 355 I = MRI.createVirtualRegister(RC); 356 } 357 } 358 359 static std::map<unsigned, unsigned> 360 GetVRegRenameMap(const std::vector<TypedVReg> &VRegs, 361 const std::vector<unsigned> &renamedInOtherBB, 362 MachineRegisterInfo &MRI, 363 const TargetRegisterClass *RC) { 364 std::map<unsigned, unsigned> VRegRenameMap; 365 unsigned LastRenameReg = MRI.createVirtualRegister(RC); 366 bool FirstCandidate = true; 367 368 for (auto &vreg : VRegs) { 369 if (vreg.isFrameIndex()) { 370 // We skip one vreg for any frame index because there is a good chance 371 // (especially when comparing SelectionDAG to GlobalISel generated MIR) 372 // that in the other file we are just getting an incoming vreg that comes 373 // from a copy from a frame index. So it's safe to skip by one. 374 LastRenameReg = MRI.createVirtualRegister(RC); 375 DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); 376 continue; 377 } else if (vreg.isCandidate()) { 378 379 // After the first candidate, for every subsequent candidate, we skip mod 380 // 10 registers so that the candidates are more likely to start at the 381 // same vreg number making it more likely that the canonical walk from the 382 // candidate insruction. We don't need to skip from the first candidate of 383 // the BasicBlock because we already skip ahead several vregs for each BB. 384 while (LastRenameReg % 10) { 385 if (!FirstCandidate) break; 386 LastRenameReg = MRI.createVirtualRegister(RC); 387 388 DEBUG({ 389 dbgs() << "Skipping rename for new candidate " << LastRenameReg 390 << "\n"; 391 }); 392 } 393 FirstCandidate = false; 394 continue; 395 } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) { 396 LastRenameReg = MRI.createVirtualRegister(RC); 397 DEBUG({ 398 dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; 399 }); 400 continue; 401 } 402 403 auto Reg = vreg.getReg(); 404 if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { 405 DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";); 406 continue; 407 } 408 409 auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg)); 410 LastRenameReg = Rename; 411 412 if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { 413 DEBUG(dbgs() << "Mapping vreg ";); 414 if (MRI.reg_begin(Reg) != MRI.reg_end()) { 415 DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); 416 } else { 417 DEBUG(dbgs() << Reg;); 418 } 419 DEBUG(dbgs() << " to ";); 420 if (MRI.reg_begin(Rename) != MRI.reg_end()) { 421 DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); 422 } else { 423 DEBUG(dbgs() << Rename;); 424 } 425 DEBUG(dbgs() << "\n";); 426 427 VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename)); 428 } 429 } 430 431 return VRegRenameMap; 432 } 433 434 static bool doVRegRenaming(std::vector<unsigned> &RenamedInOtherBB, 435 const std::map<unsigned, unsigned> &VRegRenameMap, 436 MachineRegisterInfo &MRI) { 437 bool Changed = false; 438 for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { 439 440 auto VReg = I->first; 441 auto Rename = I->second; 442 443 RenamedInOtherBB.push_back(Rename); 444 445 std::vector<MachineOperand *> RenameMOs; 446 for (auto &MO : MRI.reg_operands(VReg)) { 447 RenameMOs.push_back(&MO); 448 } 449 450 for (auto *MO : RenameMOs) { 451 Changed = true; 452 MO->setReg(Rename); 453 454 if (!MO->isDef()) 455 MO->setIsKill(false); 456 } 457 } 458 459 return Changed; 460 } 461 462 static bool doDefKillClear(MachineBasicBlock *MBB) { 463 bool Changed = false; 464 465 for (auto &MI : *MBB) { 466 for (auto &MO : MI.operands()) { 467 if (!MO.isReg()) 468 continue; 469 if (!MO.isDef() && MO.isKill()) { 470 Changed = true; 471 MO.setIsKill(false); 472 } 473 474 if (MO.isDef() && MO.isDead()) { 475 Changed = true; 476 MO.setIsDead(false); 477 } 478 } 479 } 480 481 return Changed; 482 } 483 484 static bool runOnBasicBlock(MachineBasicBlock *MBB, 485 std::vector<StringRef> &bbNames, 486 std::vector<unsigned> &renamedInOtherBB, 487 unsigned &basicBlockNum, unsigned &VRegGapIndex) { 488 489 if (CanonicalizeBasicBlockNumber != ~0U) { 490 if (CanonicalizeBasicBlockNumber != basicBlockNum++) 491 return false; 492 DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";); 493 } 494 495 if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { 496 DEBUG({ 497 dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() 498 << "\n"; 499 }); 500 return false; 501 } 502 503 DEBUG({ 504 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; 505 dbgs() << "\n\n================================================\n\n"; 506 }); 507 508 bool Changed = false; 509 MachineFunction &MF = *MBB->getParent(); 510 MachineRegisterInfo &MRI = MF.getRegInfo(); 511 512 const unsigned DummyVReg = GetDummyVReg(MF); 513 const TargetRegisterClass *DummyRC = 514 (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg); 515 if (!DummyRC) return false; 516 517 bbNames.push_back(MBB->getName()); 518 DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); 519 520 DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); 521 Changed |= rescheduleCanonically(MBB); 522 DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); 523 524 std::vector<MachineInstr *> Candidates = populateCandidates(MBB); 525 std::vector<MachineInstr *> VisitedMIs; 526 std::copy(Candidates.begin(), Candidates.end(), 527 std::back_inserter(VisitedMIs)); 528 529 std::vector<TypedVReg> VRegs; 530 for (auto candidate : Candidates) { 531 VRegs.push_back(TypedVReg(RSE_NewCandidate)); 532 533 std::queue<TypedVReg> RegQueue; 534 535 // Here we walk the vreg operands of a non-root node along our walk. 536 // The root nodes are the original candidates (stores normally). 537 // These are normally not the root nodes (except for the case of copies to 538 // physical registers). 539 for (unsigned i = 1; i < candidate->getNumOperands(); i++) { 540 if (candidate->mayStore() || candidate->isBranch()) 541 break; 542 543 MachineOperand &MO = candidate->getOperand(i); 544 if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) 545 continue; 546 547 DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); 548 RegQueue.push(TypedVReg(MO.getReg())); 549 } 550 551 // Here we walk the root candidates. We start from the 0th operand because 552 // the root is normally a store to a vreg. 553 for (unsigned i = 0; i < candidate->getNumOperands(); i++) { 554 555 if (!candidate->mayStore() && !candidate->isBranch()) 556 break; 557 558 MachineOperand &MO = candidate->getOperand(i); 559 560 // TODO: Do we want to only add vregs here? 561 if (!MO.isReg() && !MO.isFI()) 562 continue; 563 564 DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); 565 566 RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) : 567 TypedVReg(RSE_FrameIndex)); 568 } 569 570 doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); 571 } 572 573 // If we have populated no vregs to rename then bail. 574 // The rest of this function does the vreg remaping. 575 if (VRegs.size() == 0) 576 return Changed; 577 578 // Skip some vregs, so we can recon where we'll land next. 579 SkipVRegs(VRegGapIndex, MRI, DummyRC); 580 581 auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC); 582 Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); 583 Changed |= doDefKillClear(MBB); 584 585 DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";); 586 DEBUG(dbgs() << "\n\n================================================\n\n"); 587 return Changed; 588 } 589 590 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { 591 592 static unsigned functionNum = 0; 593 if (CanonicalizeFunctionNumber != ~0U) { 594 if (CanonicalizeFunctionNumber != functionNum++) 595 return false; 596 DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";); 597 } 598 599 // we need a valid vreg to create a vreg type for skipping all those 600 // stray vreg numbers so reach alignment/canonical vreg values. 601 std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF); 602 603 DEBUG( 604 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; 605 dbgs() << "\n\n================================================\n\n"; 606 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; 607 for (auto MBB : RPOList) { 608 dbgs() << MBB->getName() << "\n"; 609 } 610 dbgs() << "\n\n================================================\n\n"; 611 ); 612 613 std::vector<StringRef> BBNames; 614 std::vector<unsigned> RenamedInOtherBB; 615 616 unsigned GapIdx = 0; 617 unsigned BBNum = 0; 618 619 bool Changed = false; 620 621 for (auto MBB : RPOList) 622 Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx); 623 624 return Changed; 625 } 626 627