1 //====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// 11 /// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual 12 /// flag bits. 13 /// 14 /// We have to do this by carefully analyzing and rewriting the usage of the 15 /// copied EFLAGS register because there is no general way to rematerialize the 16 /// entire EFLAGS register safely and efficiently. Using `popf` both forces 17 /// dynamic stack adjustment and can create correctness issues due to IF, TF, 18 /// and other non-status flags being overwritten. Using sequences involving 19 /// SAHF don't work on all x86 processors and are often quite slow compared to 20 /// directly testing a single status preserved in its own GPR. 21 /// 22 //===----------------------------------------------------------------------===// 23 24 #include "X86.h" 25 #include "X86InstrBuilder.h" 26 #include "X86InstrInfo.h" 27 #include "X86Subtarget.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/ScopeExit.h" 32 #include "llvm/ADT/SmallPtrSet.h" 33 #include "llvm/ADT/SmallSet.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/ADT/SparseBitVector.h" 36 #include "llvm/ADT/Statistic.h" 37 #include "llvm/CodeGen/MachineBasicBlock.h" 38 #include "llvm/CodeGen/MachineConstantPool.h" 39 #include "llvm/CodeGen/MachineDominators.h" 40 #include "llvm/CodeGen/MachineFunction.h" 41 #include "llvm/CodeGen/MachineFunctionPass.h" 42 #include "llvm/CodeGen/MachineInstr.h" 43 #include "llvm/CodeGen/MachineInstrBuilder.h" 44 #include "llvm/CodeGen/MachineModuleInfo.h" 45 #include "llvm/CodeGen/MachineOperand.h" 46 #include "llvm/CodeGen/MachineRegisterInfo.h" 47 #include "llvm/CodeGen/MachineSSAUpdater.h" 48 #include "llvm/CodeGen/TargetInstrInfo.h" 49 #include "llvm/CodeGen/TargetRegisterInfo.h" 50 #include "llvm/CodeGen/TargetSchedule.h" 51 #include "llvm/CodeGen/TargetSubtargetInfo.h" 52 #include "llvm/IR/DebugLoc.h" 53 #include "llvm/MC/MCSchedule.h" 54 #include "llvm/Pass.h" 55 #include "llvm/Support/CommandLine.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/raw_ostream.h" 58 #include <algorithm> 59 #include <cassert> 60 #include <iterator> 61 #include <utility> 62 63 using namespace llvm; 64 65 #define PASS_KEY "x86-flags-copy-lowering" 66 #define DEBUG_TYPE PASS_KEY 67 68 STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated"); 69 STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted"); 70 STATISTIC(NumTestsInserted, "Number of test instructions inserted"); 71 STATISTIC(NumAddsInserted, "Number of adds instructions inserted"); 72 73 namespace llvm { 74 75 void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); 76 77 } // end namespace llvm 78 79 namespace { 80 81 // Convenient array type for storing registers associated with each condition. 82 using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>; 83 84 class X86FlagsCopyLoweringPass : public MachineFunctionPass { 85 public: 86 X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { 87 initializeX86FlagsCopyLoweringPassPass(*PassRegistry::getPassRegistry()); 88 } 89 90 StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; } 91 bool runOnMachineFunction(MachineFunction &MF) override; 92 void getAnalysisUsage(AnalysisUsage &AU) const override; 93 94 /// Pass identification, replacement for typeid. 95 static char ID; 96 97 private: 98 MachineRegisterInfo *MRI; 99 const X86InstrInfo *TII; 100 const TargetRegisterInfo *TRI; 101 const TargetRegisterClass *PromoteRC; 102 MachineDominatorTree *MDT; 103 104 CondRegArray collectCondsInRegs(MachineBasicBlock &MBB, 105 MachineInstr &CopyDefI); 106 107 unsigned promoteCondToReg(MachineBasicBlock &MBB, 108 MachineBasicBlock::iterator TestPos, 109 DebugLoc TestLoc, X86::CondCode Cond); 110 std::pair<unsigned, bool> 111 getCondOrInverseInReg(MachineBasicBlock &TestMBB, 112 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 113 X86::CondCode Cond, CondRegArray &CondRegs); 114 void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, 115 DebugLoc Loc, unsigned Reg); 116 117 void rewriteArithmetic(MachineBasicBlock &TestMBB, 118 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 119 MachineInstr &MI, MachineOperand &FlagUse, 120 CondRegArray &CondRegs); 121 void rewriteCMov(MachineBasicBlock &TestMBB, 122 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 123 MachineInstr &CMovI, MachineOperand &FlagUse, 124 CondRegArray &CondRegs); 125 void rewriteCondJmp(MachineBasicBlock &TestMBB, 126 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 127 MachineInstr &JmpI, CondRegArray &CondRegs); 128 void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse, 129 MachineInstr &CopyDefI); 130 void rewriteSetCC(MachineBasicBlock &TestMBB, 131 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 132 MachineInstr &SetCCI, MachineOperand &FlagUse, 133 CondRegArray &CondRegs); 134 }; 135 136 } // end anonymous namespace 137 138 INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE, 139 "X86 EFLAGS copy lowering", false, false) 140 INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE, 141 "X86 EFLAGS copy lowering", false, false) 142 143 FunctionPass *llvm::createX86FlagsCopyLoweringPass() { 144 return new X86FlagsCopyLoweringPass(); 145 } 146 147 char X86FlagsCopyLoweringPass::ID = 0; 148 149 void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const { 150 AU.addRequired<MachineDominatorTree>(); 151 MachineFunctionPass::getAnalysisUsage(AU); 152 } 153 154 namespace { 155 /// An enumeration of the arithmetic instruction mnemonics which have 156 /// interesting flag semantics. 157 /// 158 /// We can map instruction opcodes into these mnemonics to make it easy to 159 /// dispatch with specific functionality. 160 enum class FlagArithMnemonic { 161 ADC, 162 ADCX, 163 ADOX, 164 RCL, 165 RCR, 166 SBB, 167 }; 168 } // namespace 169 170 static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) { 171 switch (Opcode) { 172 default: 173 report_fatal_error("No support for lowering a copy into EFLAGS when used " 174 "by this instruction!"); 175 176 #define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX) \ 177 case X86::MNEMONIC##8##SUFFIX: \ 178 case X86::MNEMONIC##16##SUFFIX: \ 179 case X86::MNEMONIC##32##SUFFIX: \ 180 case X86::MNEMONIC##64##SUFFIX: 181 182 #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \ 183 LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \ 184 LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \ 185 LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \ 186 LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \ 187 case X86::MNEMONIC##8ri: \ 188 case X86::MNEMONIC##16ri8: \ 189 case X86::MNEMONIC##32ri8: \ 190 case X86::MNEMONIC##64ri8: \ 191 case X86::MNEMONIC##16ri: \ 192 case X86::MNEMONIC##32ri: \ 193 case X86::MNEMONIC##64ri32: \ 194 case X86::MNEMONIC##8mi: \ 195 case X86::MNEMONIC##16mi8: \ 196 case X86::MNEMONIC##32mi8: \ 197 case X86::MNEMONIC##64mi8: \ 198 case X86::MNEMONIC##16mi: \ 199 case X86::MNEMONIC##32mi: \ 200 case X86::MNEMONIC##64mi32: \ 201 case X86::MNEMONIC##8i8: \ 202 case X86::MNEMONIC##16i16: \ 203 case X86::MNEMONIC##32i32: \ 204 case X86::MNEMONIC##64i32: 205 206 LLVM_EXPAND_ADC_SBB_INSTR(ADC) 207 return FlagArithMnemonic::ADC; 208 209 LLVM_EXPAND_ADC_SBB_INSTR(SBB) 210 return FlagArithMnemonic::SBB; 211 212 #undef LLVM_EXPAND_ADC_SBB_INSTR 213 214 LLVM_EXPAND_INSTR_SIZES(RCL, rCL) 215 LLVM_EXPAND_INSTR_SIZES(RCL, r1) 216 LLVM_EXPAND_INSTR_SIZES(RCL, ri) 217 return FlagArithMnemonic::RCL; 218 219 LLVM_EXPAND_INSTR_SIZES(RCR, rCL) 220 LLVM_EXPAND_INSTR_SIZES(RCR, r1) 221 LLVM_EXPAND_INSTR_SIZES(RCR, ri) 222 return FlagArithMnemonic::RCR; 223 224 #undef LLVM_EXPAND_INSTR_SIZES 225 226 case X86::ADCX32rr: 227 case X86::ADCX64rr: 228 case X86::ADCX32rm: 229 case X86::ADCX64rm: 230 return FlagArithMnemonic::ADCX; 231 232 case X86::ADOX32rr: 233 case X86::ADOX64rr: 234 case X86::ADOX32rm: 235 case X86::ADOX64rm: 236 return FlagArithMnemonic::ADOX; 237 } 238 } 239 240 static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB, 241 MachineInstr &SplitI, 242 const X86InstrInfo &TII) { 243 MachineFunction &MF = *MBB.getParent(); 244 245 assert(SplitI.getParent() == &MBB && 246 "Split instruction must be in the split block!"); 247 assert(SplitI.isBranch() && 248 "Only designed to split a tail of branch instructions!"); 249 assert(X86::getCondFromBranchOpc(SplitI.getOpcode()) != X86::COND_INVALID && 250 "Must split on an actual jCC instruction!"); 251 252 // Dig out the previous instruction to the split point. 253 MachineInstr &PrevI = *std::prev(SplitI.getIterator()); 254 assert(PrevI.isBranch() && "Must split after a branch!"); 255 assert(X86::getCondFromBranchOpc(PrevI.getOpcode()) != X86::COND_INVALID && 256 "Must split after an actual jCC instruction!"); 257 assert(!std::prev(PrevI.getIterator())->isTerminator() && 258 "Must only have this one terminator prior to the split!"); 259 260 // Grab the one successor edge that will stay in `MBB`. 261 MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB(); 262 263 // Analyze the original block to see if we are actually splitting an edge 264 // into two edges. This can happen when we have multiple conditional jumps to 265 // the same successor. 266 bool IsEdgeSplit = 267 std::any_of(SplitI.getIterator(), MBB.instr_end(), 268 [&](MachineInstr &MI) { 269 assert(MI.isTerminator() && 270 "Should only have spliced terminators!"); 271 return llvm::any_of( 272 MI.operands(), [&](MachineOperand &MOp) { 273 return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc; 274 }); 275 }) || 276 MBB.getFallThrough() == &UnsplitSucc; 277 278 MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock(); 279 280 // Insert the new block immediately after the current one. Any existing 281 // fallthrough will be sunk into this new block anyways. 282 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB); 283 284 // Splice the tail of instructions into the new block. 285 NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end()); 286 287 // Copy the necessary succesors (and their probability info) into the new 288 // block. 289 for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) 290 if (IsEdgeSplit || *SI != &UnsplitSucc) 291 NewMBB.copySuccessor(&MBB, SI); 292 // Normalize the probabilities if we didn't end up splitting the edge. 293 if (!IsEdgeSplit) 294 NewMBB.normalizeSuccProbs(); 295 296 // Now replace all of the moved successors in the original block with the new 297 // block. This will merge their probabilities. 298 for (MachineBasicBlock *Succ : NewMBB.successors()) 299 if (Succ != &UnsplitSucc) 300 MBB.replaceSuccessor(Succ, &NewMBB); 301 302 // We should always end up replacing at least one successor. 303 assert(MBB.isSuccessor(&NewMBB) && 304 "Failed to make the new block a successor!"); 305 306 // Now update all the PHIs. 307 for (MachineBasicBlock *Succ : NewMBB.successors()) { 308 for (MachineInstr &MI : *Succ) { 309 if (!MI.isPHI()) 310 break; 311 312 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; 313 OpIdx += 2) { 314 MachineOperand &OpV = MI.getOperand(OpIdx); 315 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1); 316 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!"); 317 if (OpMBB.getMBB() != &MBB) 318 continue; 319 320 // Replace the operand for unsplit successors 321 if (!IsEdgeSplit || Succ != &UnsplitSucc) { 322 OpMBB.setMBB(&NewMBB); 323 324 // We have to continue scanning as there may be multiple entries in 325 // the PHI. 326 continue; 327 } 328 329 // When we have split the edge append a new successor. 330 MI.addOperand(MF, OpV); 331 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB)); 332 break; 333 } 334 } 335 } 336 337 return NewMBB; 338 } 339 340 bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { 341 DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() 342 << " **********\n"); 343 344 auto &Subtarget = MF.getSubtarget<X86Subtarget>(); 345 MRI = &MF.getRegInfo(); 346 TII = Subtarget.getInstrInfo(); 347 TRI = Subtarget.getRegisterInfo(); 348 MDT = &getAnalysis<MachineDominatorTree>(); 349 PromoteRC = &X86::GR8RegClass; 350 351 if (MF.begin() == MF.end()) 352 // Nothing to do for a degenerate empty function... 353 return false; 354 355 SmallVector<MachineInstr *, 4> Copies; 356 for (MachineBasicBlock &MBB : MF) 357 for (MachineInstr &MI : MBB) 358 if (MI.getOpcode() == TargetOpcode::COPY && 359 MI.getOperand(0).getReg() == X86::EFLAGS) 360 Copies.push_back(&MI); 361 362 for (MachineInstr *CopyI : Copies) { 363 MachineBasicBlock &MBB = *CopyI->getParent(); 364 365 MachineOperand &VOp = CopyI->getOperand(1); 366 assert(VOp.isReg() && 367 "The input to the copy for EFLAGS should always be a register!"); 368 MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg()); 369 if (CopyDefI.getOpcode() != TargetOpcode::COPY) { 370 // FIXME: The big likely candidate here are PHI nodes. We could in theory 371 // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard 372 // enough that it is probably better to change every other part of LLVM 373 // to avoid creating them. The issue is that once we have PHIs we won't 374 // know which original EFLAGS value we need to capture with our setCCs 375 // below. The end result will be computing a complete set of setCCs that 376 // we *might* want, computing them in every place where we copy *out* of 377 // EFLAGS and then doing SSA formation on all of them to insert necessary 378 // PHI nodes and consume those here. Then hoping that somehow we DCE the 379 // unnecessary ones. This DCE seems very unlikely to be successful and so 380 // we will almost certainly end up with a glut of dead setCC 381 // instructions. Until we have a motivating test case and fail to avoid 382 // it by changing other parts of LLVM's lowering, we refuse to handle 383 // this complex case here. 384 DEBUG(dbgs() << "ERROR: Encountered unexpected def of an eflags copy: "; 385 CopyDefI.dump()); 386 report_fatal_error( 387 "Cannot lower EFLAGS copy unless it is defined in turn by a copy!"); 388 } 389 390 auto Cleanup = make_scope_exit([&] { 391 // All uses of the EFLAGS copy are now rewritten, kill the copy into 392 // eflags and if dead the copy from. 393 CopyI->eraseFromParent(); 394 if (MRI->use_empty(CopyDefI.getOperand(0).getReg())) 395 CopyDefI.eraseFromParent(); 396 ++NumCopiesEliminated; 397 }); 398 399 MachineOperand &DOp = CopyI->getOperand(0); 400 assert(DOp.isDef() && "Expected register def!"); 401 assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!"); 402 if (DOp.isDead()) 403 continue; 404 405 MachineBasicBlock &TestMBB = *CopyDefI.getParent(); 406 auto TestPos = CopyDefI.getIterator(); 407 DebugLoc TestLoc = CopyDefI.getDebugLoc(); 408 409 DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump()); 410 411 // Scan for usage of newly set EFLAGS so we can rewrite them. We just buffer 412 // jumps because their usage is very constrained. 413 bool FlagsKilled = false; 414 SmallVector<MachineInstr *, 4> JmpIs; 415 416 // Gather the condition flags that have already been preserved in 417 // registers. We do this from scratch each time as we expect there to be 418 // very few of them and we expect to not revisit the same copy definition 419 // many times. If either of those change sufficiently we could build a map 420 // of these up front instead. 421 CondRegArray CondRegs = collectCondsInRegs(TestMBB, CopyDefI); 422 423 // Collect the basic blocks we need to scan. Typically this will just be 424 // a single basic block but we may have to scan multiple blocks if the 425 // EFLAGS copy lives into successors. 426 SmallVector<MachineBasicBlock *, 2> Blocks; 427 SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks; 428 Blocks.push_back(&MBB); 429 VisitedBlocks.insert(&MBB); 430 431 do { 432 MachineBasicBlock &UseMBB = *Blocks.pop_back_val(); 433 434 // We currently don't do any PHI insertion and so we require that the 435 // test basic block dominates all of the use basic blocks. 436 // 437 // We could in theory do PHI insertion here if it becomes useful by just 438 // taking undef values in along every edge that we don't trace this 439 // EFLAGS copy along. This isn't as bad as fully general PHI insertion, 440 // but still seems like a great deal of complexity. 441 // 442 // Because it is theoretically possible that some earlier MI pass or 443 // other lowering transformation could induce this to happen, we do 444 // a hard check even in non-debug builds here. 445 if (&TestMBB != &UseMBB && !MDT->dominates(&TestMBB, &UseMBB)) { 446 DEBUG({ 447 dbgs() << "ERROR: Encountered use that is not dominated by our test " 448 "basic block! Rewriting this would require inserting PHI " 449 "nodes to track the flag state across the CFG.\n\nTest " 450 "block:\n"; 451 TestMBB.dump(); 452 dbgs() << "Use block:\n"; 453 UseMBB.dump(); 454 }); 455 report_fatal_error("Cannot lower EFLAGS copy when original copy def " 456 "does not dominate all uses."); 457 } 458 459 for (auto MII = &UseMBB == &MBB ? std::next(CopyI->getIterator()) 460 : UseMBB.instr_begin(), 461 MIE = UseMBB.instr_end(); 462 MII != MIE;) { 463 MachineInstr &MI = *MII++; 464 MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS); 465 if (!FlagUse) { 466 if (MI.findRegisterDefOperand(X86::EFLAGS)) { 467 // If EFLAGS are defined, it's as-if they were killed. We can stop 468 // scanning here. 469 // 470 // NB!!! Many instructions only modify some flags. LLVM currently 471 // models this as clobbering all flags, but if that ever changes 472 // this will need to be carefully updated to handle that more 473 // complex logic. 474 FlagsKilled = true; 475 break; 476 } 477 continue; 478 } 479 480 DEBUG(dbgs() << " Rewriting use: "; MI.dump()); 481 482 // Check the kill flag before we rewrite as that may change it. 483 if (FlagUse->isKill()) 484 FlagsKilled = true; 485 486 // Once we encounter a branch, the rest of the instructions must also be 487 // branches. We can't rewrite in place here, so we handle them below. 488 // 489 // Note that we don't have to handle tail calls here, even conditional 490 // tail calls, as those are not introduced into the X86 MI until post-RA 491 // branch folding or black placement. As a consequence, we get to deal 492 // with the simpler formulation of conditional branches followed by tail 493 // calls. 494 if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) { 495 auto JmpIt = MI.getIterator(); 496 do { 497 JmpIs.push_back(&*JmpIt); 498 ++JmpIt; 499 } while (JmpIt != UseMBB.instr_end() && 500 X86::getCondFromBranchOpc(JmpIt->getOpcode()) != 501 X86::COND_INVALID); 502 break; 503 } 504 505 // Otherwise we can just rewrite in-place. 506 if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) { 507 rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); 508 } else if (X86::getCondFromSETOpc(MI.getOpcode()) != 509 X86::COND_INVALID) { 510 rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); 511 } else if (MI.getOpcode() == TargetOpcode::COPY) { 512 rewriteCopy(MI, *FlagUse, CopyDefI); 513 } else { 514 // We assume that arithmetic instructions that use flags also def 515 // them. 516 assert(MI.findRegisterDefOperand(X86::EFLAGS) && 517 "Expected a def of EFLAGS for this instruction!"); 518 519 // NB!!! Several arithmetic instructions only *partially* update 520 // flags. Theoretically, we could generate MI code sequences that 521 // would rely on this fact and observe different flags independently. 522 // But currently LLVM models all of these instructions as clobbering 523 // all the flags in an undef way. We rely on that to simplify the 524 // logic. 525 FlagsKilled = true; 526 527 rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); 528 break; 529 } 530 531 // If this was the last use of the flags, we're done. 532 if (FlagsKilled) 533 break; 534 } 535 536 // If the flags were killed, we're done with this block. 537 if (FlagsKilled) 538 break; 539 540 // Otherwise we need to scan successors for ones where the flags live-in 541 // and queue those up for processing. 542 for (MachineBasicBlock *SuccMBB : UseMBB.successors()) 543 if (SuccMBB->isLiveIn(X86::EFLAGS) && 544 VisitedBlocks.insert(SuccMBB).second) 545 Blocks.push_back(SuccMBB); 546 } while (!Blocks.empty()); 547 548 // Now rewrite the jumps that use the flags. These we handle specially 549 // because if there are multiple jumps in a single basic block we'll have 550 // to do surgery on the CFG. 551 MachineBasicBlock *LastJmpMBB = nullptr; 552 for (MachineInstr *JmpI : JmpIs) { 553 // Past the first jump within a basic block we need to split the blocks 554 // apart. 555 if (JmpI->getParent() == LastJmpMBB) 556 splitBlock(*JmpI->getParent(), *JmpI, *TII); 557 else 558 LastJmpMBB = JmpI->getParent(); 559 560 rewriteCondJmp(TestMBB, TestPos, TestLoc, *JmpI, CondRegs); 561 } 562 563 // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if 564 // the copy's def operand is itself a kill. 565 } 566 567 #ifndef NDEBUG 568 for (MachineBasicBlock &MBB : MF) 569 for (MachineInstr &MI : MBB) 570 if (MI.getOpcode() == TargetOpcode::COPY && 571 (MI.getOperand(0).getReg() == X86::EFLAGS || 572 MI.getOperand(1).getReg() == X86::EFLAGS)) { 573 DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: "; MI.dump()); 574 llvm_unreachable("Unlowered EFLAGS copy!"); 575 } 576 #endif 577 578 return true; 579 } 580 581 /// Collect any conditions that have already been set in registers so that we 582 /// can re-use them rather than adding duplicates. 583 CondRegArray 584 X86FlagsCopyLoweringPass::collectCondsInRegs(MachineBasicBlock &MBB, 585 MachineInstr &CopyDefI) { 586 CondRegArray CondRegs = {}; 587 588 // Scan backwards across the range of instructions with live EFLAGS. 589 for (MachineInstr &MI : llvm::reverse( 590 llvm::make_range(MBB.instr_begin(), CopyDefI.getIterator()))) { 591 X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode()); 592 if (Cond != X86::COND_INVALID && MI.getOperand(0).isReg() && 593 TRI->isVirtualRegister(MI.getOperand(0).getReg())) 594 CondRegs[Cond] = MI.getOperand(0).getReg(); 595 596 // Stop scanning when we see the first definition of the EFLAGS as prior to 597 // this we would potentially capture the wrong flag state. 598 if (MI.findRegisterDefOperand(X86::EFLAGS)) 599 break; 600 } 601 return CondRegs; 602 } 603 604 unsigned X86FlagsCopyLoweringPass::promoteCondToReg( 605 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 606 DebugLoc TestLoc, X86::CondCode Cond) { 607 unsigned Reg = MRI->createVirtualRegister(PromoteRC); 608 auto SetI = BuildMI(TestMBB, TestPos, TestLoc, 609 TII->get(X86::getSETFromCond(Cond)), Reg); 610 (void)SetI; 611 DEBUG(dbgs() << " save cond: "; SetI->dump()); 612 ++NumSetCCsInserted; 613 return Reg; 614 } 615 616 std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg( 617 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 618 DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) { 619 unsigned &CondReg = CondRegs[Cond]; 620 unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)]; 621 if (!CondReg && !InvCondReg) 622 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 623 624 if (CondReg) 625 return {CondReg, false}; 626 else 627 return {InvCondReg, true}; 628 } 629 630 void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB, 631 MachineBasicBlock::iterator Pos, 632 DebugLoc Loc, unsigned Reg) { 633 // We emit test instructions as register/immediate test against -1. This 634 // allows register allocation to fold a memory operand if needed (that will 635 // happen often due to the places this code is emitted). But hopefully will 636 // also allow us to select a shorter encoding of `testb %reg, %reg` when that 637 // would be equivalent. 638 auto TestI = 639 BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg); 640 (void)TestI; 641 DEBUG(dbgs() << " test cond: "; TestI->dump()); 642 ++NumTestsInserted; 643 } 644 645 void X86FlagsCopyLoweringPass::rewriteArithmetic( 646 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 647 DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse, 648 CondRegArray &CondRegs) { 649 // Arithmetic is either reading CF or OF. Figure out which condition we need 650 // to preserve in a register. 651 X86::CondCode Cond; 652 653 // The addend to use to reset CF or OF when added to the flag value. 654 int Addend; 655 656 switch (getMnemonicFromOpcode(MI.getOpcode())) { 657 case FlagArithMnemonic::ADC: 658 case FlagArithMnemonic::ADCX: 659 case FlagArithMnemonic::RCL: 660 case FlagArithMnemonic::RCR: 661 case FlagArithMnemonic::SBB: 662 Cond = X86::COND_B; // CF == 1 663 // Set up an addend that when one is added will need a carry due to not 664 // having a higher bit available. 665 Addend = 255; 666 break; 667 668 case FlagArithMnemonic::ADOX: 669 Cond = X86::COND_O; // OF == 1 670 // Set up an addend that when one is added will turn from positive to 671 // negative and thus overflow in the signed domain. 672 Addend = 127; 673 break; 674 } 675 676 // Now get a register that contains the value of the flag input to the 677 // arithmetic. We require exactly this flag to simplify the arithmetic 678 // required to materialize it back into the flag. 679 unsigned &CondReg = CondRegs[Cond]; 680 if (!CondReg) 681 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 682 683 MachineBasicBlock &MBB = *MI.getParent(); 684 685 // Insert an instruction that will set the flag back to the desired value. 686 unsigned TmpReg = MRI->createVirtualRegister(PromoteRC); 687 auto AddI = 688 BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri)) 689 .addDef(TmpReg, RegState::Dead) 690 .addReg(CondReg) 691 .addImm(Addend); 692 (void)AddI; 693 DEBUG(dbgs() << " add cond: "; AddI->dump()); 694 ++NumAddsInserted; 695 FlagUse.setIsKill(true); 696 } 697 698 void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB, 699 MachineBasicBlock::iterator TestPos, 700 DebugLoc TestLoc, 701 MachineInstr &CMovI, 702 MachineOperand &FlagUse, 703 CondRegArray &CondRegs) { 704 // First get the register containing this specific condition. 705 X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode()); 706 unsigned CondReg; 707 bool Inverted; 708 std::tie(CondReg, Inverted) = 709 getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); 710 711 MachineBasicBlock &MBB = *CMovI.getParent(); 712 713 // Insert a direct test of the saved register. 714 insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg); 715 716 // Rewrite the CMov to use the !ZF flag from the test (but match register 717 // size and memory operand), and then kill its use of the flags afterward. 718 auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg()); 719 CMovI.setDesc(TII->get(X86::getCMovFromCond( 720 Inverted ? X86::COND_E : X86::COND_NE, TRI->getRegSizeInBits(CMovRC) / 8, 721 !CMovI.memoperands_empty()))); 722 FlagUse.setIsKill(true); 723 DEBUG(dbgs() << " fixed cmov: "; CMovI.dump()); 724 } 725 726 void X86FlagsCopyLoweringPass::rewriteCondJmp( 727 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 728 DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) { 729 // First get the register containing this specific condition. 730 X86::CondCode Cond = X86::getCondFromBranchOpc(JmpI.getOpcode()); 731 unsigned CondReg; 732 bool Inverted; 733 std::tie(CondReg, Inverted) = 734 getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); 735 736 MachineBasicBlock &JmpMBB = *JmpI.getParent(); 737 738 // Insert a direct test of the saved register. 739 insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg); 740 741 // Rewrite the jump to use the !ZF flag from the test, and kill its use of 742 // flags afterward. 743 JmpI.setDesc(TII->get( 744 X86::GetCondBranchFromCond(Inverted ? X86::COND_E : X86::COND_NE))); 745 const int ImplicitEFLAGSOpIdx = 1; 746 JmpI.getOperand(ImplicitEFLAGSOpIdx).setIsKill(true); 747 DEBUG(dbgs() << " fixed jCC: "; JmpI.dump()); 748 } 749 750 void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI, 751 MachineOperand &FlagUse, 752 MachineInstr &CopyDefI) { 753 // Just replace this copy with the original copy def. 754 MRI->replaceRegWith(MI.getOperand(0).getReg(), 755 CopyDefI.getOperand(0).getReg()); 756 MI.eraseFromParent(); 757 } 758 759 void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB, 760 MachineBasicBlock::iterator TestPos, 761 DebugLoc TestLoc, 762 MachineInstr &SetCCI, 763 MachineOperand &FlagUse, 764 CondRegArray &CondRegs) { 765 X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode()); 766 // Note that we can't usefully rewrite this to the inverse without complex 767 // analysis of the users of the setCC. Largely we rely on duplicates which 768 // could have been avoided already being avoided here. 769 unsigned &CondReg = CondRegs[Cond]; 770 if (!CondReg) 771 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 772 773 // Rewriting a register def is trivial: we just replace the register and 774 // remove the setcc. 775 if (!SetCCI.mayStore()) { 776 assert(SetCCI.getOperand(0).isReg() && 777 "Cannot have a non-register defined operand to SETcc!"); 778 MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg); 779 SetCCI.eraseFromParent(); 780 return; 781 } 782 783 // Otherwise, we need to emit a store. 784 auto MIB = BuildMI(*SetCCI.getParent(), SetCCI.getIterator(), 785 SetCCI.getDebugLoc(), TII->get(X86::MOV8mr)); 786 // Copy the address operands. 787 for (int i = 0; i < X86::AddrNumOperands; ++i) 788 MIB.add(SetCCI.getOperand(i)); 789 790 MIB.addReg(CondReg); 791 792 MIB->setMemRefs(SetCCI.memoperands_begin(), SetCCI.memoperands_end()); 793 794 SetCCI.eraseFromParent(); 795 return; 796 } 797