1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The purpose of this pass is to employ a canonical code transformation so 10 // that code compiled with slightly different IR passes can be diffed more 11 // effectively than otherwise. This is done by renaming vregs in a given 12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to 13 // move defs closer to their use inorder to reduce diffs caused by slightly 14 // different schedules. 15 // 16 // Basic Usage: 17 // 18 // llc -o - -run-pass mir-canonicalizer example.mir 19 // 20 // Reorders instructions canonically. 21 // Renames virtual register operands canonically. 22 // Strips certain MIR artifacts (optionally). 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "MIRVRegNamerUtils.h" 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/InitializePasses.h" 32 #include "llvm/Pass.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/raw_ostream.h" 35 36 using namespace llvm; 37 38 #define DEBUG_TYPE "mir-canonicalizer" 39 40 static cl::opt<unsigned> 41 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), 42 cl::value_desc("N"), 43 cl::desc("Function number to canonicalize.")); 44 45 namespace { 46 47 class MIRCanonicalizer : public MachineFunctionPass { 48 public: 49 static char ID; 50 MIRCanonicalizer() : MachineFunctionPass(ID) {} 51 52 StringRef getPassName() const override { 53 return "Rename register operands in a canonical ordering."; 54 } 55 56 void getAnalysisUsage(AnalysisUsage &AU) const override { 57 AU.setPreservesCFG(); 58 MachineFunctionPass::getAnalysisUsage(AU); 59 } 60 61 bool runOnMachineFunction(MachineFunction &MF) override; 62 }; 63 64 } // end anonymous namespace 65 66 char MIRCanonicalizer::ID; 67 68 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; 69 70 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", 71 "Rename Register Operands Canonically", false, false) 72 73 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", 74 "Rename Register Operands Canonically", false, false) 75 76 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { 77 if (MF.empty()) 78 return {}; 79 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); 80 std::vector<MachineBasicBlock *> RPOList; 81 append_range(RPOList, RPOT); 82 83 return RPOList; 84 } 85 86 static bool 87 rescheduleLexographically(std::vector<MachineInstr *> instructions, 88 MachineBasicBlock *MBB, 89 std::function<MachineBasicBlock::iterator()> getPos) { 90 91 bool Changed = false; 92 using StringInstrPair = std::pair<std::string, MachineInstr *>; 93 std::vector<StringInstrPair> StringInstrMap; 94 95 for (auto *II : instructions) { 96 std::string S; 97 raw_string_ostream OS(S); 98 II->print(OS); 99 OS.flush(); 100 101 // Trim the assignment, or start from the beginning in the case of a store. 102 const size_t i = S.find('='); 103 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); 104 } 105 106 llvm::sort(StringInstrMap, 107 [](const StringInstrPair &a, const StringInstrPair &b) -> bool { 108 return (a.first < b.first); 109 }); 110 111 for (auto &II : StringInstrMap) { 112 113 LLVM_DEBUG({ 114 dbgs() << "Splicing "; 115 II.second->dump(); 116 dbgs() << " right before: "; 117 getPos()->dump(); 118 }); 119 120 Changed = true; 121 MBB->splice(getPos(), MBB, II.second); 122 } 123 124 return Changed; 125 } 126 127 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, 128 MachineBasicBlock *MBB) { 129 130 bool Changed = false; 131 132 // Calculates the distance of MI from the beginning of its parent BB. 133 auto getInstrIdx = [](const MachineInstr &MI) { 134 unsigned i = 0; 135 for (auto &CurMI : *MI.getParent()) { 136 if (&CurMI == &MI) 137 return i; 138 i++; 139 } 140 return ~0U; 141 }; 142 143 // Pre-Populate vector of instructions to reschedule so that we don't 144 // clobber the iterator. 145 std::vector<MachineInstr *> Instructions; 146 for (auto &MI : *MBB) { 147 Instructions.push_back(&MI); 148 } 149 150 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers; 151 std::map<unsigned, MachineInstr *> MultiUserLookup; 152 unsigned UseToBringDefCloserToCount = 0; 153 std::vector<MachineInstr *> PseudoIdempotentInstructions; 154 std::vector<unsigned> PhysRegDefs; 155 for (auto *II : Instructions) { 156 for (unsigned i = 1; i < II->getNumOperands(); i++) { 157 MachineOperand &MO = II->getOperand(i); 158 if (!MO.isReg()) 159 continue; 160 161 if (Register::isVirtualRegister(MO.getReg())) 162 continue; 163 164 if (!MO.isDef()) 165 continue; 166 167 PhysRegDefs.push_back(MO.getReg()); 168 } 169 } 170 171 for (auto *II : Instructions) { 172 if (II->getNumOperands() == 0) 173 continue; 174 if (II->mayLoadOrStore()) 175 continue; 176 177 MachineOperand &MO = II->getOperand(0); 178 if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) 179 continue; 180 if (!MO.isDef()) 181 continue; 182 183 bool IsPseudoIdempotent = true; 184 for (unsigned i = 1; i < II->getNumOperands(); i++) { 185 186 if (II->getOperand(i).isImm()) { 187 continue; 188 } 189 190 if (II->getOperand(i).isReg()) { 191 if (!Register::isVirtualRegister(II->getOperand(i).getReg())) 192 if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) { 193 continue; 194 } 195 } 196 197 IsPseudoIdempotent = false; 198 break; 199 } 200 201 if (IsPseudoIdempotent) { 202 PseudoIdempotentInstructions.push_back(II); 203 continue; 204 } 205 206 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); 207 208 MachineInstr *Def = II; 209 unsigned Distance = ~0U; 210 MachineInstr *UseToBringDefCloserTo = nullptr; 211 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); 212 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { 213 MachineInstr *UseInst = UO.getParent(); 214 215 const unsigned DefLoc = getInstrIdx(*Def); 216 const unsigned UseLoc = getInstrIdx(*UseInst); 217 const unsigned Delta = (UseLoc - DefLoc); 218 219 if (UseInst->getParent() != Def->getParent()) 220 continue; 221 if (DefLoc >= UseLoc) 222 continue; 223 224 if (Delta < Distance) { 225 Distance = Delta; 226 UseToBringDefCloserTo = UseInst; 227 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo; 228 } 229 } 230 231 const auto BBE = MBB->instr_end(); 232 MachineBasicBlock::iterator DefI = BBE; 233 MachineBasicBlock::iterator UseI = BBE; 234 235 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { 236 237 if (DefI != BBE && UseI != BBE) 238 break; 239 240 if (&*BBI == Def) { 241 DefI = BBI; 242 continue; 243 } 244 245 if (&*BBI == UseToBringDefCloserTo) { 246 UseI = BBI; 247 continue; 248 } 249 } 250 251 if (DefI == BBE || UseI == BBE) 252 continue; 253 254 LLVM_DEBUG({ 255 dbgs() << "Splicing "; 256 DefI->dump(); 257 dbgs() << " right before: "; 258 UseI->dump(); 259 }); 260 261 MultiUsers[UseToBringDefCloserTo].push_back(Def); 262 Changed = true; 263 MBB->splice(UseI, MBB, DefI); 264 } 265 266 // Sort the defs for users of multiple defs lexographically. 267 for (const auto &E : MultiUserLookup) { 268 269 auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool { 270 return &MI == E.second; 271 }); 272 273 if (UseI == MBB->instr_end()) 274 continue; 275 276 LLVM_DEBUG( 277 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); 278 Changed |= rescheduleLexographically( 279 MultiUsers[E.second], MBB, 280 [&]() -> MachineBasicBlock::iterator { return UseI; }); 281 } 282 283 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); 284 LLVM_DEBUG( 285 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); 286 Changed |= rescheduleLexographically( 287 PseudoIdempotentInstructions, MBB, 288 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); 289 290 return Changed; 291 } 292 293 static bool propagateLocalCopies(MachineBasicBlock *MBB) { 294 bool Changed = false; 295 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 296 297 std::vector<MachineInstr *> Copies; 298 for (MachineInstr &MI : MBB->instrs()) { 299 if (MI.isCopy()) 300 Copies.push_back(&MI); 301 } 302 303 for (MachineInstr *MI : Copies) { 304 305 if (!MI->getOperand(0).isReg()) 306 continue; 307 if (!MI->getOperand(1).isReg()) 308 continue; 309 310 const Register Dst = MI->getOperand(0).getReg(); 311 const Register Src = MI->getOperand(1).getReg(); 312 313 if (!Register::isVirtualRegister(Dst)) 314 continue; 315 if (!Register::isVirtualRegister(Src)) 316 continue; 317 // Not folding COPY instructions if regbankselect has not set the RCs. 318 // Why are we only considering Register Classes? Because the verifier 319 // sometimes gets upset if the register classes don't match even if the 320 // types do. A future patch might add COPY folding for matching types in 321 // pre-registerbankselect code. 322 if (!MRI.getRegClassOrNull(Dst)) 323 continue; 324 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src)) 325 continue; 326 327 std::vector<MachineOperand *> Uses; 328 for (MachineOperand &MO : MRI.use_operands(Dst)) 329 Uses.push_back(&MO); 330 for (auto *MO : Uses) 331 MO->setReg(Src); 332 333 Changed = true; 334 MI->eraseFromParent(); 335 } 336 337 return Changed; 338 } 339 340 static bool doDefKillClear(MachineBasicBlock *MBB) { 341 bool Changed = false; 342 343 for (auto &MI : *MBB) { 344 for (auto &MO : MI.operands()) { 345 if (!MO.isReg()) 346 continue; 347 if (!MO.isDef() && MO.isKill()) { 348 Changed = true; 349 MO.setIsKill(false); 350 } 351 352 if (MO.isDef() && MO.isDead()) { 353 Changed = true; 354 MO.setIsDead(false); 355 } 356 } 357 } 358 359 return Changed; 360 } 361 362 static bool runOnBasicBlock(MachineBasicBlock *MBB, 363 unsigned BasicBlockNum, VRegRenamer &Renamer) { 364 LLVM_DEBUG({ 365 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; 366 dbgs() << "\n\n================================================\n\n"; 367 }); 368 369 bool Changed = false; 370 371 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); 372 373 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; 374 MBB->dump();); 375 Changed |= propagateLocalCopies(MBB); 376 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump();); 377 378 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); 379 unsigned IdempotentInstCount = 0; 380 Changed |= rescheduleCanonically(IdempotentInstCount, MBB); 381 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); 382 383 Changed |= Renamer.renameVRegs(MBB, BasicBlockNum); 384 385 // TODO: Consider dropping this. Dropping kill defs is probably not 386 // semantically sound. 387 Changed |= doDefKillClear(MBB); 388 389 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); 390 dbgs() << "\n";); 391 LLVM_DEBUG( 392 dbgs() << "\n\n================================================\n\n"); 393 return Changed; 394 } 395 396 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { 397 398 static unsigned functionNum = 0; 399 if (CanonicalizeFunctionNumber != ~0U) { 400 if (CanonicalizeFunctionNumber != functionNum++) 401 return false; 402 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() 403 << "\n";); 404 } 405 406 // we need a valid vreg to create a vreg type for skipping all those 407 // stray vreg numbers so reach alignment/canonical vreg values. 408 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF); 409 410 LLVM_DEBUG( 411 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; 412 dbgs() << "\n\n================================================\n\n"; 413 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; 414 for (auto MBB 415 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() 416 << "\n\n================================================\n\n";); 417 418 unsigned BBNum = 0; 419 bool Changed = false; 420 MachineRegisterInfo &MRI = MF.getRegInfo(); 421 VRegRenamer Renamer(MRI); 422 for (auto MBB : RPOList) 423 Changed |= runOnBasicBlock(MBB, BBNum++, Renamer); 424 425 return Changed; 426 } 427