1 //====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// 11 /// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual 12 /// flag bits. 13 /// 14 /// We have to do this by carefully analyzing and rewriting the usage of the 15 /// copied EFLAGS register because there is no general way to rematerialize the 16 /// entire EFLAGS register safely and efficiently. Using `popf` both forces 17 /// dynamic stack adjustment and can create correctness issues due to IF, TF, 18 /// and other non-status flags being overwritten. Using sequences involving 19 /// SAHF don't work on all x86 processors and are often quite slow compared to 20 /// directly testing a single status preserved in its own GPR. 21 /// 22 //===----------------------------------------------------------------------===// 23 24 #include "X86.h" 25 #include "X86InstrBuilder.h" 26 #include "X86InstrInfo.h" 27 #include "X86Subtarget.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/ScopeExit.h" 32 #include "llvm/ADT/SmallPtrSet.h" 33 #include "llvm/ADT/SmallSet.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/ADT/SparseBitVector.h" 36 #include "llvm/ADT/Statistic.h" 37 #include "llvm/CodeGen/MachineBasicBlock.h" 38 #include "llvm/CodeGen/MachineConstantPool.h" 39 #include "llvm/CodeGen/MachineFunction.h" 40 #include "llvm/CodeGen/MachineFunctionPass.h" 41 #include "llvm/CodeGen/MachineInstr.h" 42 #include "llvm/CodeGen/MachineInstrBuilder.h" 43 #include "llvm/CodeGen/MachineModuleInfo.h" 44 #include "llvm/CodeGen/MachineOperand.h" 45 #include "llvm/CodeGen/MachineRegisterInfo.h" 46 #include "llvm/CodeGen/MachineSSAUpdater.h" 47 #include "llvm/CodeGen/TargetInstrInfo.h" 48 #include "llvm/CodeGen/TargetRegisterInfo.h" 49 #include "llvm/CodeGen/TargetSchedule.h" 50 #include "llvm/CodeGen/TargetSubtargetInfo.h" 51 #include "llvm/IR/DebugLoc.h" 52 #include "llvm/MC/MCSchedule.h" 53 #include "llvm/Pass.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Debug.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <algorithm> 58 #include <cassert> 59 #include <iterator> 60 #include <utility> 61 62 using namespace llvm; 63 64 #define PASS_KEY "x86-flags-copy-lowering" 65 #define DEBUG_TYPE PASS_KEY 66 67 STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated"); 68 STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted"); 69 STATISTIC(NumTestsInserted, "Number of test instructions inserted"); 70 STATISTIC(NumAddsInserted, "Number of adds instructions inserted"); 71 72 namespace llvm { 73 74 void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); 75 76 } // end namespace llvm 77 78 namespace { 79 80 // Convenient array type for storing registers associated with each condition. 81 using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>; 82 83 class X86FlagsCopyLoweringPass : public MachineFunctionPass { 84 public: 85 X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { 86 initializeX86FlagsCopyLoweringPassPass(*PassRegistry::getPassRegistry()); 87 } 88 89 StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; } 90 bool runOnMachineFunction(MachineFunction &MF) override; 91 void getAnalysisUsage(AnalysisUsage &AU) const override; 92 93 /// Pass identification, replacement for typeid. 94 static char ID; 95 96 private: 97 MachineRegisterInfo *MRI; 98 const X86InstrInfo *TII; 99 const TargetRegisterInfo *TRI; 100 const TargetRegisterClass *PromoteRC; 101 102 CondRegArray collectCondsInRegs(MachineBasicBlock &MBB, 103 MachineInstr &CopyDefI); 104 105 unsigned promoteCondToReg(MachineBasicBlock &MBB, 106 MachineBasicBlock::iterator TestPos, 107 DebugLoc TestLoc, X86::CondCode Cond); 108 std::pair<unsigned, bool> 109 getCondOrInverseInReg(MachineBasicBlock &TestMBB, 110 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 111 X86::CondCode Cond, CondRegArray &CondRegs); 112 void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, 113 DebugLoc Loc, unsigned Reg); 114 115 void rewriteArithmetic(MachineBasicBlock &TestMBB, 116 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 117 MachineInstr &MI, MachineOperand &FlagUse, 118 CondRegArray &CondRegs); 119 void rewriteCMov(MachineBasicBlock &TestMBB, 120 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 121 MachineInstr &CMovI, MachineOperand &FlagUse, 122 CondRegArray &CondRegs); 123 void rewriteCondJmp(MachineBasicBlock &TestMBB, 124 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 125 MachineInstr &JmpI, CondRegArray &CondRegs); 126 void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse, 127 MachineInstr &CopyDefI); 128 void rewriteSetCC(MachineBasicBlock &TestMBB, 129 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, 130 MachineInstr &SetCCI, MachineOperand &FlagUse, 131 CondRegArray &CondRegs); 132 }; 133 134 } // end anonymous namespace 135 136 INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE, 137 "X86 EFLAGS copy lowering", false, false) 138 INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE, 139 "X86 EFLAGS copy lowering", false, false) 140 141 FunctionPass *llvm::createX86FlagsCopyLoweringPass() { 142 return new X86FlagsCopyLoweringPass(); 143 } 144 145 char X86FlagsCopyLoweringPass::ID = 0; 146 147 void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const { 148 MachineFunctionPass::getAnalysisUsage(AU); 149 } 150 151 namespace { 152 /// An enumeration of the arithmetic instruction mnemonics which have 153 /// interesting flag semantics. 154 /// 155 /// We can map instruction opcodes into these mnemonics to make it easy to 156 /// dispatch with specific functionality. 157 enum class FlagArithMnemonic { 158 ADC, 159 ADCX, 160 ADOX, 161 RCL, 162 RCR, 163 SBB, 164 }; 165 } // namespace 166 167 static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) { 168 switch (Opcode) { 169 default: 170 report_fatal_error("No support for lowering a copy into EFLAGS when used " 171 "by this instruction!"); 172 173 #define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX) \ 174 case X86::MNEMONIC##8##SUFFIX: \ 175 case X86::MNEMONIC##16##SUFFIX: \ 176 case X86::MNEMONIC##32##SUFFIX: \ 177 case X86::MNEMONIC##64##SUFFIX: 178 179 #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \ 180 LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \ 181 LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \ 182 LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \ 183 LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \ 184 case X86::MNEMONIC##8ri: \ 185 case X86::MNEMONIC##16ri8: \ 186 case X86::MNEMONIC##32ri8: \ 187 case X86::MNEMONIC##64ri8: \ 188 case X86::MNEMONIC##16ri: \ 189 case X86::MNEMONIC##32ri: \ 190 case X86::MNEMONIC##64ri32: \ 191 case X86::MNEMONIC##8mi: \ 192 case X86::MNEMONIC##16mi8: \ 193 case X86::MNEMONIC##32mi8: \ 194 case X86::MNEMONIC##64mi8: \ 195 case X86::MNEMONIC##16mi: \ 196 case X86::MNEMONIC##32mi: \ 197 case X86::MNEMONIC##64mi32: \ 198 case X86::MNEMONIC##8i8: \ 199 case X86::MNEMONIC##16i16: \ 200 case X86::MNEMONIC##32i32: \ 201 case X86::MNEMONIC##64i32: 202 203 LLVM_EXPAND_ADC_SBB_INSTR(ADC) 204 return FlagArithMnemonic::ADC; 205 206 LLVM_EXPAND_ADC_SBB_INSTR(SBB) 207 return FlagArithMnemonic::SBB; 208 209 #undef LLVM_EXPAND_ADC_SBB_INSTR 210 211 LLVM_EXPAND_INSTR_SIZES(RCL, rCL) 212 LLVM_EXPAND_INSTR_SIZES(RCL, r1) 213 LLVM_EXPAND_INSTR_SIZES(RCL, ri) 214 return FlagArithMnemonic::RCL; 215 216 LLVM_EXPAND_INSTR_SIZES(RCR, rCL) 217 LLVM_EXPAND_INSTR_SIZES(RCR, r1) 218 LLVM_EXPAND_INSTR_SIZES(RCR, ri) 219 return FlagArithMnemonic::RCR; 220 221 #undef LLVM_EXPAND_INSTR_SIZES 222 223 case X86::ADCX32rr: 224 case X86::ADCX64rr: 225 case X86::ADCX32rm: 226 case X86::ADCX64rm: 227 return FlagArithMnemonic::ADCX; 228 229 case X86::ADOX32rr: 230 case X86::ADOX64rr: 231 case X86::ADOX32rm: 232 case X86::ADOX64rm: 233 return FlagArithMnemonic::ADOX; 234 } 235 } 236 237 static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB, 238 MachineInstr &SplitI, 239 const X86InstrInfo &TII) { 240 MachineFunction &MF = *MBB.getParent(); 241 242 assert(SplitI.getParent() == &MBB && 243 "Split instruction must be in the split block!"); 244 assert(SplitI.isBranch() && 245 "Only designed to split a tail of branch instructions!"); 246 assert(X86::getCondFromBranchOpc(SplitI.getOpcode()) != X86::COND_INVALID && 247 "Must split on an actual jCC instruction!"); 248 249 // Dig out the previous instruction to the split point. 250 MachineInstr &PrevI = *std::prev(SplitI.getIterator()); 251 assert(PrevI.isBranch() && "Must split after a branch!"); 252 assert(X86::getCondFromBranchOpc(PrevI.getOpcode()) != X86::COND_INVALID && 253 "Must split after an actual jCC instruction!"); 254 assert(!std::prev(PrevI.getIterator())->isTerminator() && 255 "Must only have this one terminator prior to the split!"); 256 257 // Grab the one successor edge that will stay in `MBB`. 258 MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB(); 259 260 // Analyze the original block to see if we are actually splitting an edge 261 // into two edges. This can happen when we have multiple conditional jumps to 262 // the same successor. 263 bool IsEdgeSplit = 264 std::any_of(SplitI.getIterator(), MBB.instr_end(), 265 [&](MachineInstr &MI) { 266 assert(MI.isTerminator() && 267 "Should only have spliced terminators!"); 268 return llvm::any_of( 269 MI.operands(), [&](MachineOperand &MOp) { 270 return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc; 271 }); 272 }) || 273 MBB.getFallThrough() == &UnsplitSucc; 274 275 MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock(); 276 277 // Insert the new block immediately after the current one. Any existing 278 // fallthrough will be sunk into this new block anyways. 279 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB); 280 281 // Splice the tail of instructions into the new block. 282 NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end()); 283 284 // Copy the necessary succesors (and their probability info) into the new 285 // block. 286 for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) 287 if (IsEdgeSplit || *SI != &UnsplitSucc) 288 NewMBB.copySuccessor(&MBB, SI); 289 // Normalize the probabilities if we didn't end up splitting the edge. 290 if (!IsEdgeSplit) 291 NewMBB.normalizeSuccProbs(); 292 293 // Now replace all of the moved successors in the original block with the new 294 // block. This will merge their probabilities. 295 for (MachineBasicBlock *Succ : NewMBB.successors()) 296 if (Succ != &UnsplitSucc) 297 MBB.replaceSuccessor(Succ, &NewMBB); 298 299 // We should always end up replacing at least one successor. 300 assert(MBB.isSuccessor(&NewMBB) && 301 "Failed to make the new block a successor!"); 302 303 // Now update all the PHIs. 304 for (MachineBasicBlock *Succ : NewMBB.successors()) { 305 for (MachineInstr &MI : *Succ) { 306 if (!MI.isPHI()) 307 break; 308 309 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; 310 OpIdx += 2) { 311 MachineOperand &OpV = MI.getOperand(OpIdx); 312 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1); 313 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!"); 314 if (OpMBB.getMBB() != &MBB) 315 continue; 316 317 // Replace the operand for unsplit successors 318 if (!IsEdgeSplit || Succ != &UnsplitSucc) { 319 OpMBB.setMBB(&NewMBB); 320 321 // We have to continue scanning as there may be multiple entries in 322 // the PHI. 323 continue; 324 } 325 326 // When we have split the edge append a new successor. 327 MI.addOperand(MF, OpV); 328 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB)); 329 break; 330 } 331 } 332 } 333 334 return NewMBB; 335 } 336 337 bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { 338 DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() 339 << " **********\n"); 340 341 auto &Subtarget = MF.getSubtarget<X86Subtarget>(); 342 MRI = &MF.getRegInfo(); 343 TII = Subtarget.getInstrInfo(); 344 TRI = Subtarget.getRegisterInfo(); 345 PromoteRC = &X86::GR8RegClass; 346 347 if (MF.begin() == MF.end()) 348 // Nothing to do for a degenerate empty function... 349 return false; 350 351 SmallVector<MachineInstr *, 4> Copies; 352 for (MachineBasicBlock &MBB : MF) 353 for (MachineInstr &MI : MBB) 354 if (MI.getOpcode() == TargetOpcode::COPY && 355 MI.getOperand(0).getReg() == X86::EFLAGS) 356 Copies.push_back(&MI); 357 358 for (MachineInstr *CopyI : Copies) { 359 MachineBasicBlock &MBB = *CopyI->getParent(); 360 361 MachineOperand &VOp = CopyI->getOperand(1); 362 assert(VOp.isReg() && 363 "The input to the copy for EFLAGS should always be a register!"); 364 MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg()); 365 if (CopyDefI.getOpcode() != TargetOpcode::COPY) { 366 // FIXME: The big likely candidate here are PHI nodes. We could in theory 367 // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard 368 // enough that it is probably better to change every other part of LLVM 369 // to avoid creating them. The issue is that once we have PHIs we won't 370 // know which original EFLAGS value we need to capture with our setCCs 371 // below. The end result will be computing a complete set of setCCs that 372 // we *might* want, computing them in every place where we copy *out* of 373 // EFLAGS and then doing SSA formation on all of them to insert necessary 374 // PHI nodes and consume those here. Then hoping that somehow we DCE the 375 // unnecessary ones. This DCE seems very unlikely to be successful and so 376 // we will almost certainly end up with a glut of dead setCC 377 // instructions. Until we have a motivating test case and fail to avoid 378 // it by changing other parts of LLVM's lowering, we refuse to handle 379 // this complex case here. 380 DEBUG(dbgs() << "ERROR: Encountered unexpected def of an eflags copy: "; 381 CopyDefI.dump()); 382 report_fatal_error( 383 "Cannot lower EFLAGS copy unless it is defined in turn by a copy!"); 384 } 385 386 auto Cleanup = make_scope_exit([&] { 387 // All uses of the EFLAGS copy are now rewritten, kill the copy into 388 // eflags and if dead the copy from. 389 CopyI->eraseFromParent(); 390 if (MRI->use_empty(CopyDefI.getOperand(0).getReg())) 391 CopyDefI.eraseFromParent(); 392 ++NumCopiesEliminated; 393 }); 394 395 MachineOperand &DOp = CopyI->getOperand(0); 396 assert(DOp.isDef() && "Expected register def!"); 397 assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!"); 398 if (DOp.isDead()) 399 continue; 400 401 MachineBasicBlock &TestMBB = *CopyDefI.getParent(); 402 auto TestPos = CopyDefI.getIterator(); 403 DebugLoc TestLoc = CopyDefI.getDebugLoc(); 404 405 DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump()); 406 407 // Scan for usage of newly set EFLAGS so we can rewrite them. We just buffer 408 // jumps because their usage is very constrained. 409 bool FlagsKilled = false; 410 SmallVector<MachineInstr *, 4> JmpIs; 411 412 // Gather the condition flags that have already been preserved in 413 // registers. We do this from scratch each time as we expect there to be 414 // very few of them and we expect to not revisit the same copy definition 415 // many times. If either of those change sufficiently we could build a map 416 // of these up front instead. 417 CondRegArray CondRegs = collectCondsInRegs(TestMBB, CopyDefI); 418 419 for (auto MII = std::next(CopyI->getIterator()), MIE = MBB.instr_end(); 420 MII != MIE;) { 421 MachineInstr &MI = *MII++; 422 MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS); 423 if (!FlagUse) { 424 if (MI.findRegisterDefOperand(X86::EFLAGS)) { 425 // If EFLAGS are defined, it's as-if they were killed. We can stop 426 // scanning here. 427 // 428 // NB!!! Many instructions only modify some flags. LLVM currently 429 // models this as clobbering all flags, but if that ever changes this 430 // will need to be carefully updated to handle that more complex 431 // logic. 432 FlagsKilled = true; 433 break; 434 } 435 continue; 436 } 437 438 DEBUG(dbgs() << " Rewriting use: "; MI.dump()); 439 440 // Check the kill flag before we rewrite as that may change it. 441 if (FlagUse->isKill()) 442 FlagsKilled = true; 443 444 // Once we encounter a branch, the rest of the instructions must also be 445 // branches. We can't rewrite in place here, so we handle them below. 446 // 447 // Note that we don't have to handle tail calls here, even conditional 448 // tail calls, as those are not introduced into the X86 MI until post-RA 449 // branch folding or black placement. As a consequence, we get to deal 450 // with the simpler formulation of conditional branches followed by tail 451 // calls. 452 if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) { 453 auto JmpIt = MI.getIterator(); 454 do { 455 JmpIs.push_back(&*JmpIt); 456 ++JmpIt; 457 } while (JmpIt != MBB.instr_end() && 458 X86::getCondFromBranchOpc(JmpIt->getOpcode()) != 459 X86::COND_INVALID); 460 break; 461 } 462 463 // Otherwise we can just rewrite in-place. 464 if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) { 465 rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); 466 } else if (X86::getCondFromSETOpc(MI.getOpcode()) != X86::COND_INVALID) { 467 rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); 468 } else if (MI.getOpcode() == TargetOpcode::COPY) { 469 rewriteCopy(MI, *FlagUse, CopyDefI); 470 } else { 471 // We assume that arithmetic instructions that use flags also def them. 472 assert(MI.findRegisterDefOperand(X86::EFLAGS) && 473 "Expected a def of EFLAGS for this instruction!"); 474 475 // NB!!! Several arithmetic instructions only *partially* update 476 // flags. Theoretically, we could generate MI code sequences that 477 // would rely on this fact and observe different flags independently. 478 // But currently LLVM models all of these instructions as clobbering 479 // all the flags in an undef way. We rely on that to simplify the 480 // logic. 481 FlagsKilled = true; 482 483 rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); 484 break; 485 } 486 487 // If this was the last use of the flags, we're done. 488 if (FlagsKilled) 489 break; 490 } 491 492 // If we didn't find a kill (or equivalent) check that the flags don't 493 // live-out of the basic block. Currently we don't support lowering copies 494 // of flags that live out in this fashion. 495 if (!FlagsKilled && 496 llvm::any_of(MBB.successors(), [](MachineBasicBlock *SuccMBB) { 497 return SuccMBB->isLiveIn(X86::EFLAGS); 498 })) { 499 DEBUG({ 500 dbgs() << "ERROR: Found a copied EFLAGS live-out from basic block:\n" 501 << "----\n"; 502 MBB.dump(); 503 dbgs() << "----\n" 504 << "ERROR: Cannot lower this EFLAGS copy!\n"; 505 }); 506 report_fatal_error( 507 "Cannot lower EFLAGS copy that lives out of a basic block!"); 508 } 509 510 // Now rewrite the jumps that use the flags. These we handle specially 511 // because if there are multiple jumps we'll have to do surgery on the CFG. 512 for (MachineInstr *JmpI : JmpIs) { 513 // Past the first jump we need to split the blocks apart. 514 if (JmpI != JmpIs.front()) 515 splitBlock(*JmpI->getParent(), *JmpI, *TII); 516 517 rewriteCondJmp(TestMBB, TestPos, TestLoc, *JmpI, CondRegs); 518 } 519 520 // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if 521 // the copy's def operand is itself a kill. 522 } 523 524 #ifndef NDEBUG 525 for (MachineBasicBlock &MBB : MF) 526 for (MachineInstr &MI : MBB) 527 if (MI.getOpcode() == TargetOpcode::COPY && 528 (MI.getOperand(0).getReg() == X86::EFLAGS || 529 MI.getOperand(1).getReg() == X86::EFLAGS)) { 530 DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: "; MI.dump()); 531 llvm_unreachable("Unlowered EFLAGS copy!"); 532 } 533 #endif 534 535 return true; 536 } 537 538 /// Collect any conditions that have already been set in registers so that we 539 /// can re-use them rather than adding duplicates. 540 CondRegArray 541 X86FlagsCopyLoweringPass::collectCondsInRegs(MachineBasicBlock &MBB, 542 MachineInstr &CopyDefI) { 543 CondRegArray CondRegs = {}; 544 545 // Scan backwards across the range of instructions with live EFLAGS. 546 for (MachineInstr &MI : llvm::reverse( 547 llvm::make_range(MBB.instr_begin(), CopyDefI.getIterator()))) { 548 X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode()); 549 if (Cond != X86::COND_INVALID && MI.getOperand(0).isReg() && 550 TRI->isVirtualRegister(MI.getOperand(0).getReg())) 551 CondRegs[Cond] = MI.getOperand(0).getReg(); 552 553 // Stop scanning when we see the first definition of the EFLAGS as prior to 554 // this we would potentially capture the wrong flag state. 555 if (MI.findRegisterDefOperand(X86::EFLAGS)) 556 break; 557 } 558 return CondRegs; 559 } 560 561 unsigned X86FlagsCopyLoweringPass::promoteCondToReg( 562 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 563 DebugLoc TestLoc, X86::CondCode Cond) { 564 unsigned Reg = MRI->createVirtualRegister(PromoteRC); 565 auto SetI = BuildMI(TestMBB, TestPos, TestLoc, 566 TII->get(X86::getSETFromCond(Cond)), Reg); 567 (void)SetI; 568 DEBUG(dbgs() << " save cond: "; SetI->dump()); 569 ++NumSetCCsInserted; 570 return Reg; 571 } 572 573 std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg( 574 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 575 DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) { 576 unsigned &CondReg = CondRegs[Cond]; 577 unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)]; 578 if (!CondReg && !InvCondReg) 579 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 580 581 if (CondReg) 582 return {CondReg, false}; 583 else 584 return {InvCondReg, true}; 585 } 586 587 void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB, 588 MachineBasicBlock::iterator Pos, 589 DebugLoc Loc, unsigned Reg) { 590 // We emit test instructions as register/immediate test against -1. This 591 // allows register allocation to fold a memory operand if needed (that will 592 // happen often due to the places this code is emitted). But hopefully will 593 // also allow us to select a shorter encoding of `testb %reg, %reg` when that 594 // would be equivalent. 595 auto TestI = 596 BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8ri)).addReg(Reg).addImm(-1); 597 (void)TestI; 598 DEBUG(dbgs() << " test cond: "; TestI->dump()); 599 ++NumTestsInserted; 600 } 601 602 void X86FlagsCopyLoweringPass::rewriteArithmetic( 603 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 604 DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse, 605 CondRegArray &CondRegs) { 606 // Arithmetic is either reading CF or OF. Figure out which condition we need 607 // to preserve in a register. 608 X86::CondCode Cond; 609 610 // The addend to use to reset CF or OF when added to the flag value. 611 int Addend; 612 613 switch (getMnemonicFromOpcode(MI.getOpcode())) { 614 case FlagArithMnemonic::ADC: 615 case FlagArithMnemonic::ADCX: 616 case FlagArithMnemonic::RCL: 617 case FlagArithMnemonic::RCR: 618 case FlagArithMnemonic::SBB: 619 Cond = X86::COND_B; // CF == 1 620 // Set up an addend that when one is added will need a carry due to not 621 // having a higher bit available. 622 Addend = 255; 623 break; 624 625 case FlagArithMnemonic::ADOX: 626 Cond = X86::COND_O; // OF == 1 627 // Set up an addend that when one is added will turn from positive to 628 // negative and thus overflow in the signed domain. 629 Addend = 127; 630 break; 631 } 632 633 // Now get a register that contains the value of the flag input to the 634 // arithmetic. We require exactly this flag to simplify the arithmetic 635 // required to materialize it back into the flag. 636 unsigned &CondReg = CondRegs[Cond]; 637 if (!CondReg) 638 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 639 640 MachineBasicBlock &MBB = *MI.getParent(); 641 642 // Insert an instruction that will set the flag back to the desired value. 643 unsigned TmpReg = MRI->createVirtualRegister(PromoteRC); 644 auto AddI = 645 BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri)) 646 .addDef(TmpReg, RegState::Dead) 647 .addReg(CondReg) 648 .addImm(Addend); 649 (void)AddI; 650 DEBUG(dbgs() << " add cond: "; AddI->dump()); 651 ++NumAddsInserted; 652 FlagUse.setIsKill(true); 653 } 654 655 void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB, 656 MachineBasicBlock::iterator TestPos, 657 DebugLoc TestLoc, 658 MachineInstr &CMovI, 659 MachineOperand &FlagUse, 660 CondRegArray &CondRegs) { 661 // First get the register containing this specific condition. 662 X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode()); 663 unsigned CondReg; 664 bool Inverted; 665 std::tie(CondReg, Inverted) = 666 getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); 667 668 MachineBasicBlock &MBB = *CMovI.getParent(); 669 670 // Insert a direct test of the saved register. 671 insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg); 672 673 // Rewrite the CMov to use the !ZF flag from the test (but match register 674 // size and memory operand), and then kill its use of the flags afterward. 675 auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg()); 676 CMovI.setDesc(TII->get(X86::getCMovFromCond( 677 Inverted ? X86::COND_E : X86::COND_NE, TRI->getRegSizeInBits(CMovRC) / 8, 678 !CMovI.memoperands_empty()))); 679 FlagUse.setIsKill(true); 680 DEBUG(dbgs() << " fixed cmov: "; CMovI.dump()); 681 } 682 683 void X86FlagsCopyLoweringPass::rewriteCondJmp( 684 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, 685 DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) { 686 // First get the register containing this specific condition. 687 X86::CondCode Cond = X86::getCondFromBranchOpc(JmpI.getOpcode()); 688 unsigned CondReg; 689 bool Inverted; 690 std::tie(CondReg, Inverted) = 691 getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); 692 693 MachineBasicBlock &JmpMBB = *JmpI.getParent(); 694 695 // Insert a direct test of the saved register. 696 insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg); 697 698 // Rewrite the jump to use the !ZF flag from the test, and kill its use of 699 // flags afterward. 700 JmpI.setDesc(TII->get( 701 X86::GetCondBranchFromCond(Inverted ? X86::COND_E : X86::COND_NE))); 702 const int ImplicitEFLAGSOpIdx = 1; 703 JmpI.getOperand(ImplicitEFLAGSOpIdx).setIsKill(true); 704 DEBUG(dbgs() << " fixed jCC: "; JmpI.dump()); 705 } 706 707 void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI, 708 MachineOperand &FlagUse, 709 MachineInstr &CopyDefI) { 710 // Just replace this copy with the the original copy def. 711 MRI->replaceRegWith(MI.getOperand(0).getReg(), 712 CopyDefI.getOperand(0).getReg()); 713 MI.eraseFromParent(); 714 } 715 716 void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB, 717 MachineBasicBlock::iterator TestPos, 718 DebugLoc TestLoc, 719 MachineInstr &SetCCI, 720 MachineOperand &FlagUse, 721 CondRegArray &CondRegs) { 722 X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode()); 723 // Note that we can't usefully rewrite this to the inverse without complex 724 // analysis of the users of the setCC. Largely we rely on duplicates which 725 // could have been avoided already being avoided here. 726 unsigned &CondReg = CondRegs[Cond]; 727 if (!CondReg) 728 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); 729 730 // Rewriting a register def is trivial: we just replace the register and 731 // remove the setcc. 732 if (!SetCCI.mayStore()) { 733 assert(SetCCI.getOperand(0).isReg() && 734 "Cannot have a non-register defined operand to SETcc!"); 735 MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg); 736 SetCCI.eraseFromParent(); 737 return; 738 } 739 740 // Otherwise, we need to emit a store. 741 auto MIB = BuildMI(*SetCCI.getParent(), SetCCI.getIterator(), 742 SetCCI.getDebugLoc(), TII->get(X86::MOV8mr)); 743 // Copy the address operands. 744 for (int i = 0; i < X86::AddrNumOperands; ++i) 745 MIB.add(SetCCI.getOperand(i)); 746 747 MIB.addReg(CondReg); 748 749 MIB->setMemRefs(SetCCI.memoperands_begin(), SetCCI.memoperands_end()); 750 751 SetCCI.eraseFromParent(); 752 return; 753 } 754