1 //===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the pass which converts floating point instructions from 11 // pseudo registers into register stack instructions. This pass uses live 12 // variable information to indicate where the FPn registers are used and their 13 // lifetimes. 14 // 15 // The x87 hardware tracks liveness of the stack registers, so it is necessary 16 // to implement exact liveness tracking between basic blocks. The CFG edges are 17 // partitioned into bundles where the same FP registers must be live in 18 // identical stack positions. Instructions are inserted at the end of each basic 19 // block to rearrange the live registers to match the outgoing bundle. 20 // 21 // This approach avoids splitting critical edges at the potential cost of more 22 // live register shuffling instructions when critical edges are present. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "X86.h" 27 #include "X86InstrInfo.h" 28 #include "llvm/ADT/DepthFirstIterator.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/CodeGen/EdgeBundles.h" 35 #include "llvm/CodeGen/LivePhysRegs.h" 36 #include "llvm/CodeGen/MachineFunctionPass.h" 37 #include "llvm/CodeGen/MachineInstrBuilder.h" 38 #include "llvm/CodeGen/MachineRegisterInfo.h" 39 #include "llvm/CodeGen/Passes.h" 40 #include "llvm/CodeGen/TargetInstrInfo.h" 41 #include "llvm/CodeGen/TargetSubtargetInfo.h" 42 #include "llvm/Config/llvm-config.h" 43 #include "llvm/IR/InlineAsm.h" 44 #include "llvm/Support/Debug.h" 45 #include "llvm/Support/ErrorHandling.h" 46 #include "llvm/Support/raw_ostream.h" 47 #include "llvm/Target/TargetMachine.h" 48 #include <algorithm> 49 #include <bitset> 50 using namespace llvm; 51 52 #define DEBUG_TYPE "x86-codegen" 53 54 STATISTIC(NumFXCH, "Number of fxch instructions inserted"); 55 STATISTIC(NumFP , "Number of floating point instructions"); 56 57 namespace { 58 const unsigned ScratchFPReg = 7; 59 60 struct FPS : public MachineFunctionPass { 61 static char ID; 62 FPS() : MachineFunctionPass(ID) { 63 initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); 64 // This is really only to keep valgrind quiet. 65 // The logic in isLive() is too much for it. 66 memset(Stack, 0, sizeof(Stack)); 67 memset(RegMap, 0, sizeof(RegMap)); 68 } 69 70 void getAnalysisUsage(AnalysisUsage &AU) const override { 71 AU.setPreservesCFG(); 72 AU.addRequired<EdgeBundles>(); 73 AU.addPreservedID(MachineLoopInfoID); 74 AU.addPreservedID(MachineDominatorsID); 75 MachineFunctionPass::getAnalysisUsage(AU); 76 } 77 78 bool runOnMachineFunction(MachineFunction &MF) override; 79 80 MachineFunctionProperties getRequiredProperties() const override { 81 return MachineFunctionProperties().set( 82 MachineFunctionProperties::Property::NoVRegs); 83 } 84 85 StringRef getPassName() const override { return "X86 FP Stackifier"; } 86 87 private: 88 const TargetInstrInfo *TII; // Machine instruction info. 89 90 // Two CFG edges are related if they leave the same block, or enter the same 91 // block. The transitive closure of an edge under this relation is a 92 // LiveBundle. It represents a set of CFG edges where the live FP stack 93 // registers must be allocated identically in the x87 stack. 94 // 95 // A LiveBundle is usually all the edges leaving a block, or all the edges 96 // entering a block, but it can contain more edges if critical edges are 97 // present. 98 // 99 // The set of live FP registers in a LiveBundle is calculated by bundleCFG, 100 // but the exact mapping of FP registers to stack slots is fixed later. 101 struct LiveBundle { 102 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c. 103 unsigned Mask; 104 105 // Number of pre-assigned live registers in FixStack. This is 0 when the 106 // stack order has not yet been fixed. 107 unsigned FixCount; 108 109 // Assigned stack order for live-in registers. 110 // FixStack[i] == getStackEntry(i) for all i < FixCount. 111 unsigned char FixStack[8]; 112 113 LiveBundle() : Mask(0), FixCount(0) {} 114 115 // Have the live registers been assigned a stack order yet? 116 bool isFixed() const { return !Mask || FixCount; } 117 }; 118 119 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges 120 // with no live FP registers. 121 SmallVector<LiveBundle, 8> LiveBundles; 122 123 // The edge bundle analysis provides indices into the LiveBundles vector. 124 EdgeBundles *Bundles; 125 126 // Return a bitmask of FP registers in block's live-in list. 127 static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) { 128 unsigned Mask = 0; 129 for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(); 130 I != MBB->livein_end(); ) { 131 MCPhysReg Reg = I->PhysReg; 132 static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums"); 133 if (Reg >= X86::FP0 && Reg <= X86::FP6) { 134 Mask |= 1 << (Reg - X86::FP0); 135 if (RemoveFPs) { 136 I = MBB->removeLiveIn(I); 137 continue; 138 } 139 } 140 ++I; 141 } 142 return Mask; 143 } 144 145 // Partition all the CFG edges into LiveBundles. 146 void bundleCFGRecomputeKillFlags(MachineFunction &MF); 147 148 MachineBasicBlock *MBB; // Current basic block 149 150 // The hardware keeps track of how many FP registers are live, so we have 151 // to model that exactly. Usually, each live register corresponds to an 152 // FP<n> register, but when dealing with calls, returns, and inline 153 // assembly, it is sometimes necessary to have live scratch registers. 154 unsigned Stack[8]; // FP<n> Registers in each stack slot... 155 unsigned StackTop; // The current top of the FP stack. 156 157 enum { 158 NumFPRegs = 8 // Including scratch pseudo-registers. 159 }; 160 161 // For each live FP<n> register, point to its Stack[] entry. 162 // The first entries correspond to FP0-FP6, the rest are scratch registers 163 // used when we need slightly different live registers than what the 164 // register allocator thinks. 165 unsigned RegMap[NumFPRegs]; 166 167 // Set up our stack model to match the incoming registers to MBB. 168 void setupBlockStack(); 169 170 // Shuffle live registers to match the expectations of successor blocks. 171 void finishBlockStack(); 172 173 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 174 void dumpStack() const { 175 dbgs() << "Stack contents:"; 176 for (unsigned i = 0; i != StackTop; ++i) { 177 dbgs() << " FP" << Stack[i]; 178 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); 179 } 180 } 181 #endif 182 183 /// getSlot - Return the stack slot number a particular register number is 184 /// in. 185 unsigned getSlot(unsigned RegNo) const { 186 assert(RegNo < NumFPRegs && "Regno out of range!"); 187 return RegMap[RegNo]; 188 } 189 190 /// isLive - Is RegNo currently live in the stack? 191 bool isLive(unsigned RegNo) const { 192 unsigned Slot = getSlot(RegNo); 193 return Slot < StackTop && Stack[Slot] == RegNo; 194 } 195 196 /// getStackEntry - Return the X86::FP<n> register in register ST(i). 197 unsigned getStackEntry(unsigned STi) const { 198 if (STi >= StackTop) 199 report_fatal_error("Access past stack top!"); 200 return Stack[StackTop-1-STi]; 201 } 202 203 /// getSTReg - Return the X86::ST(i) register which contains the specified 204 /// FP<RegNo> register. 205 unsigned getSTReg(unsigned RegNo) const { 206 return StackTop - 1 - getSlot(RegNo) + X86::ST0; 207 } 208 209 // pushReg - Push the specified FP<n> register onto the stack. 210 void pushReg(unsigned Reg) { 211 assert(Reg < NumFPRegs && "Register number out of range!"); 212 if (StackTop >= 8) 213 report_fatal_error("Stack overflow!"); 214 Stack[StackTop] = Reg; 215 RegMap[Reg] = StackTop++; 216 } 217 218 // popReg - Pop a register from the stack. 219 void popReg() { 220 if (StackTop == 0) 221 report_fatal_error("Cannot pop empty stack!"); 222 RegMap[Stack[--StackTop]] = ~0; // Update state 223 } 224 225 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } 226 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { 227 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 228 if (isAtTop(RegNo)) return; 229 230 unsigned STReg = getSTReg(RegNo); 231 unsigned RegOnTop = getStackEntry(0); 232 233 // Swap the slots the regs are in. 234 std::swap(RegMap[RegNo], RegMap[RegOnTop]); 235 236 // Swap stack slot contents. 237 if (RegMap[RegOnTop] >= StackTop) 238 report_fatal_error("Access past stack top!"); 239 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 240 241 // Emit an fxch to update the runtime processors version of the state. 242 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); 243 ++NumFXCH; 244 } 245 246 void duplicateToTop(unsigned RegNo, unsigned AsReg, 247 MachineBasicBlock::iterator I) { 248 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 249 unsigned STReg = getSTReg(RegNo); 250 pushReg(AsReg); // New register on top of stack 251 252 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); 253 } 254 255 /// popStackAfter - Pop the current value off of the top of the FP stack 256 /// after the specified instruction. 257 void popStackAfter(MachineBasicBlock::iterator &I); 258 259 /// freeStackSlotAfter - Free the specified register from the register 260 /// stack, so that it is no longer in a register. If the register is 261 /// currently at the top of the stack, we just pop the current instruction, 262 /// otherwise we store the current top-of-stack into the specified slot, 263 /// then pop the top of stack. 264 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); 265 266 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted 267 /// instruction. 268 MachineBasicBlock::iterator 269 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo); 270 271 /// Adjust the live registers to be the set in Mask. 272 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); 273 274 /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is 275 /// st(0), FP reg FixStack[1] is st(1) etc. 276 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, 277 MachineBasicBlock::iterator I); 278 279 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); 280 281 void handleCall(MachineBasicBlock::iterator &I); 282 void handleReturn(MachineBasicBlock::iterator &I); 283 void handleZeroArgFP(MachineBasicBlock::iterator &I); 284 void handleOneArgFP(MachineBasicBlock::iterator &I); 285 void handleOneArgFPRW(MachineBasicBlock::iterator &I); 286 void handleTwoArgFP(MachineBasicBlock::iterator &I); 287 void handleCompareFP(MachineBasicBlock::iterator &I); 288 void handleCondMovFP(MachineBasicBlock::iterator &I); 289 void handleSpecialFP(MachineBasicBlock::iterator &I); 290 291 // Check if a COPY instruction is using FP registers. 292 static bool isFPCopy(MachineInstr &MI) { 293 unsigned DstReg = MI.getOperand(0).getReg(); 294 unsigned SrcReg = MI.getOperand(1).getReg(); 295 296 return X86::RFP80RegClass.contains(DstReg) || 297 X86::RFP80RegClass.contains(SrcReg); 298 } 299 300 void setKillFlags(MachineBasicBlock &MBB) const; 301 }; 302 char FPS::ID = 0; 303 } 304 305 FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } 306 307 /// getFPReg - Return the X86::FPx register number for the specified operand. 308 /// For example, this returns 3 for X86::FP3. 309 static unsigned getFPReg(const MachineOperand &MO) { 310 assert(MO.isReg() && "Expected an FP register!"); 311 unsigned Reg = MO.getReg(); 312 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); 313 return Reg - X86::FP0; 314 } 315 316 /// runOnMachineFunction - Loop over all of the basic blocks, transforming FP 317 /// register references into FP stack references. 318 /// 319 bool FPS::runOnMachineFunction(MachineFunction &MF) { 320 // We only need to run this pass if there are any FP registers used in this 321 // function. If it is all integer, there is nothing for us to do! 322 bool FPIsUsed = false; 323 324 static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!"); 325 const MachineRegisterInfo &MRI = MF.getRegInfo(); 326 for (unsigned i = 0; i <= 6; ++i) 327 if (!MRI.reg_nodbg_empty(X86::FP0 + i)) { 328 FPIsUsed = true; 329 break; 330 } 331 332 // Early exit. 333 if (!FPIsUsed) return false; 334 335 Bundles = &getAnalysis<EdgeBundles>(); 336 TII = MF.getSubtarget().getInstrInfo(); 337 338 // Prepare cross-MBB liveness. 339 bundleCFGRecomputeKillFlags(MF); 340 341 StackTop = 0; 342 343 // Process the function in depth first order so that we process at least one 344 // of the predecessors for every reachable block in the function. 345 df_iterator_default_set<MachineBasicBlock*> Processed; 346 MachineBasicBlock *Entry = &MF.front(); 347 348 LiveBundle &Bundle = 349 LiveBundles[Bundles->getBundle(Entry->getNumber(), false)]; 350 351 // In regcall convention, some FP registers may not be passed through 352 // the stack, so they will need to be assigned to the stack first 353 if ((Entry->getParent()->getFunction().getCallingConv() == 354 CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) { 355 // In the register calling convention, up to one FP argument could be 356 // saved in the first FP register. 357 // If bundle.mask is non-zero and Bundle.FixCount is zero, it means 358 // that the FP registers contain arguments. 359 // The actual value is passed in FP0. 360 // Here we fix the stack and mark FP0 as pre-assigned register. 361 assert((Bundle.Mask & 0xFE) == 0 && 362 "Only FP0 could be passed as an argument"); 363 Bundle.FixCount = 1; 364 Bundle.FixStack[0] = 0; 365 } 366 367 bool Changed = false; 368 for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed)) 369 Changed |= processBasicBlock(MF, *BB); 370 371 // Process any unreachable blocks in arbitrary order now. 372 if (MF.size() != Processed.size()) 373 for (MachineBasicBlock &BB : MF) 374 if (Processed.insert(&BB).second) 375 Changed |= processBasicBlock(MF, BB); 376 377 LiveBundles.clear(); 378 379 return Changed; 380 } 381 382 /// bundleCFG - Scan all the basic blocks to determine consistent live-in and 383 /// live-out sets for the FP registers. Consistent means that the set of 384 /// registers live-out from a block is identical to the live-in set of all 385 /// successors. This is not enforced by the normal live-in lists since 386 /// registers may be implicitly defined, or not used by all successors. 387 void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) { 388 assert(LiveBundles.empty() && "Stale data in LiveBundles"); 389 LiveBundles.resize(Bundles->getNumBundles()); 390 391 // Gather the actual live-in masks for all MBBs. 392 for (MachineBasicBlock &MBB : MF) { 393 setKillFlags(MBB); 394 395 const unsigned Mask = calcLiveInMask(&MBB, false); 396 if (!Mask) 397 continue; 398 // Update MBB ingoing bundle mask. 399 LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask; 400 } 401 } 402 403 /// processBasicBlock - Loop over all of the instructions in the basic block, 404 /// transforming FP instructions into their stack form. 405 /// 406 bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { 407 bool Changed = false; 408 MBB = &BB; 409 410 setupBlockStack(); 411 412 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { 413 MachineInstr &MI = *I; 414 uint64_t Flags = MI.getDesc().TSFlags; 415 416 unsigned FPInstClass = Flags & X86II::FPTypeMask; 417 if (MI.isInlineAsm()) 418 FPInstClass = X86II::SpecialFP; 419 420 if (MI.isCopy() && isFPCopy(MI)) 421 FPInstClass = X86II::SpecialFP; 422 423 if (MI.isImplicitDef() && 424 X86::RFP80RegClass.contains(MI.getOperand(0).getReg())) 425 FPInstClass = X86II::SpecialFP; 426 427 if (MI.isCall()) 428 FPInstClass = X86II::SpecialFP; 429 430 if (FPInstClass == X86II::NotFP) 431 continue; // Efficiently ignore non-fp insts! 432 433 MachineInstr *PrevMI = nullptr; 434 if (I != BB.begin()) 435 PrevMI = &*std::prev(I); 436 437 ++NumFP; // Keep track of # of pseudo instrs 438 LLVM_DEBUG(dbgs() << "\nFPInst:\t" << MI); 439 440 // Get dead variables list now because the MI pointer may be deleted as part 441 // of processing! 442 SmallVector<unsigned, 8> DeadRegs; 443 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 444 const MachineOperand &MO = MI.getOperand(i); 445 if (MO.isReg() && MO.isDead()) 446 DeadRegs.push_back(MO.getReg()); 447 } 448 449 switch (FPInstClass) { 450 case X86II::ZeroArgFP: handleZeroArgFP(I); break; 451 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0) 452 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0)) 453 case X86II::TwoArgFP: handleTwoArgFP(I); break; 454 case X86II::CompareFP: handleCompareFP(I); break; 455 case X86II::CondMovFP: handleCondMovFP(I); break; 456 case X86II::SpecialFP: handleSpecialFP(I); break; 457 default: llvm_unreachable("Unknown FP Type!"); 458 } 459 460 // Check to see if any of the values defined by this instruction are dead 461 // after definition. If so, pop them. 462 for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { 463 unsigned Reg = DeadRegs[i]; 464 // Check if Reg is live on the stack. An inline-asm register operand that 465 // is in the clobber list and marked dead might not be live on the stack. 466 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers"); 467 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) { 468 LLVM_DEBUG(dbgs() << "Register FP#" << Reg - X86::FP0 << " is dead!\n"); 469 freeStackSlotAfter(I, Reg-X86::FP0); 470 } 471 } 472 473 // Print out all of the instructions expanded to if -debug 474 LLVM_DEBUG({ 475 MachineBasicBlock::iterator PrevI = PrevMI; 476 if (I == PrevI) { 477 dbgs() << "Just deleted pseudo instruction\n"; 478 } else { 479 MachineBasicBlock::iterator Start = I; 480 // Rewind to first instruction newly inserted. 481 while (Start != BB.begin() && std::prev(Start) != PrevI) 482 --Start; 483 dbgs() << "Inserted instructions:\n\t"; 484 Start->print(dbgs()); 485 while (++Start != std::next(I)) { 486 } 487 } 488 dumpStack(); 489 }); 490 (void)PrevMI; 491 492 Changed = true; 493 } 494 495 finishBlockStack(); 496 497 return Changed; 498 } 499 500 /// setupBlockStack - Use the live bundles to set up our model of the stack 501 /// to match predecessors' live out stack. 502 void FPS::setupBlockStack() { 503 LLVM_DEBUG(dbgs() << "\nSetting up live-ins for " << printMBBReference(*MBB) 504 << " derived from " << MBB->getName() << ".\n"); 505 StackTop = 0; 506 // Get the live-in bundle for MBB. 507 const LiveBundle &Bundle = 508 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)]; 509 510 if (!Bundle.Mask) { 511 LLVM_DEBUG(dbgs() << "Block has no FP live-ins.\n"); 512 return; 513 } 514 515 // Depth-first iteration should ensure that we always have an assigned stack. 516 assert(Bundle.isFixed() && "Reached block before any predecessors"); 517 518 // Push the fixed live-in registers. 519 for (unsigned i = Bundle.FixCount; i > 0; --i) { 520 LLVM_DEBUG(dbgs() << "Live-in st(" << (i - 1) << "): %fp" 521 << unsigned(Bundle.FixStack[i - 1]) << '\n'); 522 pushReg(Bundle.FixStack[i-1]); 523 } 524 525 // Kill off unwanted live-ins. This can happen with a critical edge. 526 // FIXME: We could keep these live registers around as zombies. They may need 527 // to be revived at the end of a short block. It might save a few instrs. 528 unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true); 529 adjustLiveRegs(Mask, MBB->begin()); 530 LLVM_DEBUG(MBB->dump()); 531 } 532 533 /// finishBlockStack - Revive live-outs that are implicitly defined out of 534 /// MBB. Shuffle live registers to match the expected fixed stack of any 535 /// predecessors, and ensure that all predecessors are expecting the same 536 /// stack. 537 void FPS::finishBlockStack() { 538 // The RET handling below takes care of return blocks for us. 539 if (MBB->succ_empty()) 540 return; 541 542 LLVM_DEBUG(dbgs() << "Setting up live-outs for " << printMBBReference(*MBB) 543 << " derived from " << MBB->getName() << ".\n"); 544 545 // Get MBB's live-out bundle. 546 unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true); 547 LiveBundle &Bundle = LiveBundles[BundleIdx]; 548 549 // We may need to kill and define some registers to match successors. 550 // FIXME: This can probably be combined with the shuffle below. 551 MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); 552 adjustLiveRegs(Bundle.Mask, Term); 553 554 if (!Bundle.Mask) { 555 LLVM_DEBUG(dbgs() << "No live-outs.\n"); 556 return; 557 } 558 559 // Has the stack order been fixed yet? 560 LLVM_DEBUG(dbgs() << "LB#" << BundleIdx << ": "); 561 if (Bundle.isFixed()) { 562 LLVM_DEBUG(dbgs() << "Shuffling stack to match.\n"); 563 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term); 564 } else { 565 // Not fixed yet, we get to choose. 566 LLVM_DEBUG(dbgs() << "Fixing stack order now.\n"); 567 Bundle.FixCount = StackTop; 568 for (unsigned i = 0; i < StackTop; ++i) 569 Bundle.FixStack[i] = getStackEntry(i); 570 } 571 } 572 573 574 //===----------------------------------------------------------------------===// 575 // Efficient Lookup Table Support 576 //===----------------------------------------------------------------------===// 577 578 namespace { 579 struct TableEntry { 580 uint16_t from; 581 uint16_t to; 582 bool operator<(const TableEntry &TE) const { return from < TE.from; } 583 friend bool operator<(const TableEntry &TE, unsigned V) { 584 return TE.from < V; 585 } 586 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V, 587 const TableEntry &TE) { 588 return V < TE.from; 589 } 590 }; 591 } 592 593 static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) { 594 const TableEntry *I = std::lower_bound(Table.begin(), Table.end(), Opcode); 595 if (I != Table.end() && I->from == Opcode) 596 return I->to; 597 return -1; 598 } 599 600 #ifdef NDEBUG 601 #define ASSERT_SORTED(TABLE) 602 #else 603 #define ASSERT_SORTED(TABLE) \ 604 { \ 605 static std::atomic<bool> TABLE##Checked(false); \ 606 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \ 607 assert(std::is_sorted(std::begin(TABLE), std::end(TABLE)) && \ 608 "All lookup tables must be sorted for efficient access!"); \ 609 TABLE##Checked.store(true, std::memory_order_relaxed); \ 610 } \ 611 } 612 #endif 613 614 //===----------------------------------------------------------------------===// 615 // Register File -> Register Stack Mapping Methods 616 //===----------------------------------------------------------------------===// 617 618 // OpcodeTable - Sorted map of register instructions to their stack version. 619 // The first element is an register file pseudo instruction, the second is the 620 // concrete X86 instruction which uses the register stack. 621 // 622 static const TableEntry OpcodeTable[] = { 623 { X86::ABS_Fp32 , X86::ABS_F }, 624 { X86::ABS_Fp64 , X86::ABS_F }, 625 { X86::ABS_Fp80 , X86::ABS_F }, 626 { X86::ADD_Fp32m , X86::ADD_F32m }, 627 { X86::ADD_Fp64m , X86::ADD_F64m }, 628 { X86::ADD_Fp64m32 , X86::ADD_F32m }, 629 { X86::ADD_Fp80m32 , X86::ADD_F32m }, 630 { X86::ADD_Fp80m64 , X86::ADD_F64m }, 631 { X86::ADD_FpI16m32 , X86::ADD_FI16m }, 632 { X86::ADD_FpI16m64 , X86::ADD_FI16m }, 633 { X86::ADD_FpI16m80 , X86::ADD_FI16m }, 634 { X86::ADD_FpI32m32 , X86::ADD_FI32m }, 635 { X86::ADD_FpI32m64 , X86::ADD_FI32m }, 636 { X86::ADD_FpI32m80 , X86::ADD_FI32m }, 637 { X86::CHS_Fp32 , X86::CHS_F }, 638 { X86::CHS_Fp64 , X86::CHS_F }, 639 { X86::CHS_Fp80 , X86::CHS_F }, 640 { X86::CMOVBE_Fp32 , X86::CMOVBE_F }, 641 { X86::CMOVBE_Fp64 , X86::CMOVBE_F }, 642 { X86::CMOVBE_Fp80 , X86::CMOVBE_F }, 643 { X86::CMOVB_Fp32 , X86::CMOVB_F }, 644 { X86::CMOVB_Fp64 , X86::CMOVB_F }, 645 { X86::CMOVB_Fp80 , X86::CMOVB_F }, 646 { X86::CMOVE_Fp32 , X86::CMOVE_F }, 647 { X86::CMOVE_Fp64 , X86::CMOVE_F }, 648 { X86::CMOVE_Fp80 , X86::CMOVE_F }, 649 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F }, 650 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F }, 651 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F }, 652 { X86::CMOVNB_Fp32 , X86::CMOVNB_F }, 653 { X86::CMOVNB_Fp64 , X86::CMOVNB_F }, 654 { X86::CMOVNB_Fp80 , X86::CMOVNB_F }, 655 { X86::CMOVNE_Fp32 , X86::CMOVNE_F }, 656 { X86::CMOVNE_Fp64 , X86::CMOVNE_F }, 657 { X86::CMOVNE_Fp80 , X86::CMOVNE_F }, 658 { X86::CMOVNP_Fp32 , X86::CMOVNP_F }, 659 { X86::CMOVNP_Fp64 , X86::CMOVNP_F }, 660 { X86::CMOVNP_Fp80 , X86::CMOVNP_F }, 661 { X86::CMOVP_Fp32 , X86::CMOVP_F }, 662 { X86::CMOVP_Fp64 , X86::CMOVP_F }, 663 { X86::CMOVP_Fp80 , X86::CMOVP_F }, 664 { X86::COS_Fp32 , X86::COS_F }, 665 { X86::COS_Fp64 , X86::COS_F }, 666 { X86::COS_Fp80 , X86::COS_F }, 667 { X86::DIVR_Fp32m , X86::DIVR_F32m }, 668 { X86::DIVR_Fp64m , X86::DIVR_F64m }, 669 { X86::DIVR_Fp64m32 , X86::DIVR_F32m }, 670 { X86::DIVR_Fp80m32 , X86::DIVR_F32m }, 671 { X86::DIVR_Fp80m64 , X86::DIVR_F64m }, 672 { X86::DIVR_FpI16m32, X86::DIVR_FI16m}, 673 { X86::DIVR_FpI16m64, X86::DIVR_FI16m}, 674 { X86::DIVR_FpI16m80, X86::DIVR_FI16m}, 675 { X86::DIVR_FpI32m32, X86::DIVR_FI32m}, 676 { X86::DIVR_FpI32m64, X86::DIVR_FI32m}, 677 { X86::DIVR_FpI32m80, X86::DIVR_FI32m}, 678 { X86::DIV_Fp32m , X86::DIV_F32m }, 679 { X86::DIV_Fp64m , X86::DIV_F64m }, 680 { X86::DIV_Fp64m32 , X86::DIV_F32m }, 681 { X86::DIV_Fp80m32 , X86::DIV_F32m }, 682 { X86::DIV_Fp80m64 , X86::DIV_F64m }, 683 { X86::DIV_FpI16m32 , X86::DIV_FI16m }, 684 { X86::DIV_FpI16m64 , X86::DIV_FI16m }, 685 { X86::DIV_FpI16m80 , X86::DIV_FI16m }, 686 { X86::DIV_FpI32m32 , X86::DIV_FI32m }, 687 { X86::DIV_FpI32m64 , X86::DIV_FI32m }, 688 { X86::DIV_FpI32m80 , X86::DIV_FI32m }, 689 { X86::ILD_Fp16m32 , X86::ILD_F16m }, 690 { X86::ILD_Fp16m64 , X86::ILD_F16m }, 691 { X86::ILD_Fp16m80 , X86::ILD_F16m }, 692 { X86::ILD_Fp32m32 , X86::ILD_F32m }, 693 { X86::ILD_Fp32m64 , X86::ILD_F32m }, 694 { X86::ILD_Fp32m80 , X86::ILD_F32m }, 695 { X86::ILD_Fp64m32 , X86::ILD_F64m }, 696 { X86::ILD_Fp64m64 , X86::ILD_F64m }, 697 { X86::ILD_Fp64m80 , X86::ILD_F64m }, 698 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m}, 699 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m}, 700 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m}, 701 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m}, 702 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m}, 703 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m}, 704 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m}, 705 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m}, 706 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m}, 707 { X86::IST_Fp16m32 , X86::IST_F16m }, 708 { X86::IST_Fp16m64 , X86::IST_F16m }, 709 { X86::IST_Fp16m80 , X86::IST_F16m }, 710 { X86::IST_Fp32m32 , X86::IST_F32m }, 711 { X86::IST_Fp32m64 , X86::IST_F32m }, 712 { X86::IST_Fp32m80 , X86::IST_F32m }, 713 { X86::IST_Fp64m32 , X86::IST_FP64m }, 714 { X86::IST_Fp64m64 , X86::IST_FP64m }, 715 { X86::IST_Fp64m80 , X86::IST_FP64m }, 716 { X86::LD_Fp032 , X86::LD_F0 }, 717 { X86::LD_Fp064 , X86::LD_F0 }, 718 { X86::LD_Fp080 , X86::LD_F0 }, 719 { X86::LD_Fp132 , X86::LD_F1 }, 720 { X86::LD_Fp164 , X86::LD_F1 }, 721 { X86::LD_Fp180 , X86::LD_F1 }, 722 { X86::LD_Fp32m , X86::LD_F32m }, 723 { X86::LD_Fp32m64 , X86::LD_F32m }, 724 { X86::LD_Fp32m80 , X86::LD_F32m }, 725 { X86::LD_Fp64m , X86::LD_F64m }, 726 { X86::LD_Fp64m80 , X86::LD_F64m }, 727 { X86::LD_Fp80m , X86::LD_F80m }, 728 { X86::MUL_Fp32m , X86::MUL_F32m }, 729 { X86::MUL_Fp64m , X86::MUL_F64m }, 730 { X86::MUL_Fp64m32 , X86::MUL_F32m }, 731 { X86::MUL_Fp80m32 , X86::MUL_F32m }, 732 { X86::MUL_Fp80m64 , X86::MUL_F64m }, 733 { X86::MUL_FpI16m32 , X86::MUL_FI16m }, 734 { X86::MUL_FpI16m64 , X86::MUL_FI16m }, 735 { X86::MUL_FpI16m80 , X86::MUL_FI16m }, 736 { X86::MUL_FpI32m32 , X86::MUL_FI32m }, 737 { X86::MUL_FpI32m64 , X86::MUL_FI32m }, 738 { X86::MUL_FpI32m80 , X86::MUL_FI32m }, 739 { X86::SIN_Fp32 , X86::SIN_F }, 740 { X86::SIN_Fp64 , X86::SIN_F }, 741 { X86::SIN_Fp80 , X86::SIN_F }, 742 { X86::SQRT_Fp32 , X86::SQRT_F }, 743 { X86::SQRT_Fp64 , X86::SQRT_F }, 744 { X86::SQRT_Fp80 , X86::SQRT_F }, 745 { X86::ST_Fp32m , X86::ST_F32m }, 746 { X86::ST_Fp64m , X86::ST_F64m }, 747 { X86::ST_Fp64m32 , X86::ST_F32m }, 748 { X86::ST_Fp80m32 , X86::ST_F32m }, 749 { X86::ST_Fp80m64 , X86::ST_F64m }, 750 { X86::ST_FpP80m , X86::ST_FP80m }, 751 { X86::SUBR_Fp32m , X86::SUBR_F32m }, 752 { X86::SUBR_Fp64m , X86::SUBR_F64m }, 753 { X86::SUBR_Fp64m32 , X86::SUBR_F32m }, 754 { X86::SUBR_Fp80m32 , X86::SUBR_F32m }, 755 { X86::SUBR_Fp80m64 , X86::SUBR_F64m }, 756 { X86::SUBR_FpI16m32, X86::SUBR_FI16m}, 757 { X86::SUBR_FpI16m64, X86::SUBR_FI16m}, 758 { X86::SUBR_FpI16m80, X86::SUBR_FI16m}, 759 { X86::SUBR_FpI32m32, X86::SUBR_FI32m}, 760 { X86::SUBR_FpI32m64, X86::SUBR_FI32m}, 761 { X86::SUBR_FpI32m80, X86::SUBR_FI32m}, 762 { X86::SUB_Fp32m , X86::SUB_F32m }, 763 { X86::SUB_Fp64m , X86::SUB_F64m }, 764 { X86::SUB_Fp64m32 , X86::SUB_F32m }, 765 { X86::SUB_Fp80m32 , X86::SUB_F32m }, 766 { X86::SUB_Fp80m64 , X86::SUB_F64m }, 767 { X86::SUB_FpI16m32 , X86::SUB_FI16m }, 768 { X86::SUB_FpI16m64 , X86::SUB_FI16m }, 769 { X86::SUB_FpI16m80 , X86::SUB_FI16m }, 770 { X86::SUB_FpI32m32 , X86::SUB_FI32m }, 771 { X86::SUB_FpI32m64 , X86::SUB_FI32m }, 772 { X86::SUB_FpI32m80 , X86::SUB_FI32m }, 773 { X86::TST_Fp32 , X86::TST_F }, 774 { X86::TST_Fp64 , X86::TST_F }, 775 { X86::TST_Fp80 , X86::TST_F }, 776 { X86::UCOM_FpIr32 , X86::UCOM_FIr }, 777 { X86::UCOM_FpIr64 , X86::UCOM_FIr }, 778 { X86::UCOM_FpIr80 , X86::UCOM_FIr }, 779 { X86::UCOM_Fpr32 , X86::UCOM_Fr }, 780 { X86::UCOM_Fpr64 , X86::UCOM_Fr }, 781 { X86::UCOM_Fpr80 , X86::UCOM_Fr }, 782 }; 783 784 static unsigned getConcreteOpcode(unsigned Opcode) { 785 ASSERT_SORTED(OpcodeTable); 786 int Opc = Lookup(OpcodeTable, Opcode); 787 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!"); 788 return Opc; 789 } 790 791 //===----------------------------------------------------------------------===// 792 // Helper Methods 793 //===----------------------------------------------------------------------===// 794 795 // PopTable - Sorted map of instructions to their popping version. The first 796 // element is an instruction, the second is the version which pops. 797 // 798 static const TableEntry PopTable[] = { 799 { X86::ADD_FrST0 , X86::ADD_FPrST0 }, 800 801 { X86::DIVR_FrST0, X86::DIVR_FPrST0 }, 802 { X86::DIV_FrST0 , X86::DIV_FPrST0 }, 803 804 { X86::IST_F16m , X86::IST_FP16m }, 805 { X86::IST_F32m , X86::IST_FP32m }, 806 807 { X86::MUL_FrST0 , X86::MUL_FPrST0 }, 808 809 { X86::ST_F32m , X86::ST_FP32m }, 810 { X86::ST_F64m , X86::ST_FP64m }, 811 { X86::ST_Frr , X86::ST_FPrr }, 812 813 { X86::SUBR_FrST0, X86::SUBR_FPrST0 }, 814 { X86::SUB_FrST0 , X86::SUB_FPrST0 }, 815 816 { X86::UCOM_FIr , X86::UCOM_FIPr }, 817 818 { X86::UCOM_FPr , X86::UCOM_FPPr }, 819 { X86::UCOM_Fr , X86::UCOM_FPr }, 820 }; 821 822 /// popStackAfter - Pop the current value off of the top of the FP stack after 823 /// the specified instruction. This attempts to be sneaky and combine the pop 824 /// into the instruction itself if possible. The iterator is left pointing to 825 /// the last instruction, be it a new pop instruction inserted, or the old 826 /// instruction if it was modified in place. 827 /// 828 void FPS::popStackAfter(MachineBasicBlock::iterator &I) { 829 MachineInstr &MI = *I; 830 const DebugLoc &dl = MI.getDebugLoc(); 831 ASSERT_SORTED(PopTable); 832 833 popReg(); 834 835 // Check to see if there is a popping version of this instruction... 836 int Opcode = Lookup(PopTable, I->getOpcode()); 837 if (Opcode != -1) { 838 I->setDesc(TII->get(Opcode)); 839 if (Opcode == X86::UCOM_FPPr) 840 I->RemoveOperand(0); 841 } else { // Insert an explicit pop 842 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0); 843 } 844 } 845 846 /// freeStackSlotAfter - Free the specified register from the register stack, so 847 /// that it is no longer in a register. If the register is currently at the top 848 /// of the stack, we just pop the current instruction, otherwise we store the 849 /// current top-of-stack into the specified slot, then pop the top of stack. 850 void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { 851 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy. 852 popStackAfter(I); 853 return; 854 } 855 856 // Otherwise, store the top of stack into the dead slot, killing the operand 857 // without having to add in an explicit xchg then pop. 858 // 859 I = freeStackSlotBefore(++I, FPRegNo); 860 } 861 862 /// freeStackSlotBefore - Free the specified register without trying any 863 /// folding. 864 MachineBasicBlock::iterator 865 FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) { 866 unsigned STReg = getSTReg(FPRegNo); 867 unsigned OldSlot = getSlot(FPRegNo); 868 unsigned TopReg = Stack[StackTop-1]; 869 Stack[OldSlot] = TopReg; 870 RegMap[TopReg] = OldSlot; 871 RegMap[FPRegNo] = ~0; 872 Stack[--StackTop] = ~0; 873 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)) 874 .addReg(STReg) 875 .getInstr(); 876 } 877 878 /// adjustLiveRegs - Kill and revive registers such that exactly the FP 879 /// registers with a bit in Mask are live. 880 void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { 881 unsigned Defs = Mask; 882 unsigned Kills = 0; 883 for (unsigned i = 0; i < StackTop; ++i) { 884 unsigned RegNo = Stack[i]; 885 if (!(Defs & (1 << RegNo))) 886 // This register is live, but we don't want it. 887 Kills |= (1 << RegNo); 888 else 889 // We don't need to imp-def this live register. 890 Defs &= ~(1 << RegNo); 891 } 892 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?"); 893 894 // Produce implicit-defs for free by using killed registers. 895 while (Kills && Defs) { 896 unsigned KReg = countTrailingZeros(Kills); 897 unsigned DReg = countTrailingZeros(Defs); 898 LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg 899 << "\n"); 900 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]); 901 std::swap(RegMap[KReg], RegMap[DReg]); 902 Kills &= ~(1 << KReg); 903 Defs &= ~(1 << DReg); 904 } 905 906 // Kill registers by popping. 907 if (Kills && I != MBB->begin()) { 908 MachineBasicBlock::iterator I2 = std::prev(I); 909 while (StackTop) { 910 unsigned KReg = getStackEntry(0); 911 if (!(Kills & (1 << KReg))) 912 break; 913 LLVM_DEBUG(dbgs() << "Popping %fp" << KReg << "\n"); 914 popStackAfter(I2); 915 Kills &= ~(1 << KReg); 916 } 917 } 918 919 // Manually kill the rest. 920 while (Kills) { 921 unsigned KReg = countTrailingZeros(Kills); 922 LLVM_DEBUG(dbgs() << "Killing %fp" << KReg << "\n"); 923 freeStackSlotBefore(I, KReg); 924 Kills &= ~(1 << KReg); 925 } 926 927 // Load zeros for all the imp-defs. 928 while(Defs) { 929 unsigned DReg = countTrailingZeros(Defs); 930 LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n"); 931 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0)); 932 pushReg(DReg); 933 Defs &= ~(1 << DReg); 934 } 935 936 // Now we should have the correct registers live. 937 LLVM_DEBUG(dumpStack()); 938 assert(StackTop == countPopulation(Mask) && "Live count mismatch"); 939 } 940 941 /// shuffleStackTop - emit fxch instructions before I to shuffle the top 942 /// FixCount entries into the order given by FixStack. 943 /// FIXME: Is there a better algorithm than insertion sort? 944 void FPS::shuffleStackTop(const unsigned char *FixStack, 945 unsigned FixCount, 946 MachineBasicBlock::iterator I) { 947 // Move items into place, starting from the desired stack bottom. 948 while (FixCount--) { 949 // Old register at position FixCount. 950 unsigned OldReg = getStackEntry(FixCount); 951 // Desired register at position FixCount. 952 unsigned Reg = FixStack[FixCount]; 953 if (Reg == OldReg) 954 continue; 955 // (Reg st0) (OldReg st0) = (Reg OldReg st0) 956 moveToTop(Reg, I); 957 if (FixCount > 0) 958 moveToTop(OldReg, I); 959 } 960 LLVM_DEBUG(dumpStack()); 961 } 962 963 964 //===----------------------------------------------------------------------===// 965 // Instruction transformation implementation 966 //===----------------------------------------------------------------------===// 967 968 void FPS::handleCall(MachineBasicBlock::iterator &I) { 969 unsigned STReturns = 0; 970 const MachineFunction* MF = I->getParent()->getParent(); 971 972 for (const auto &MO : I->operands()) { 973 if (!MO.isReg()) 974 continue; 975 976 unsigned R = MO.getReg() - X86::FP0; 977 978 if (R < 8) { 979 if (MF->getFunction().getCallingConv() != CallingConv::X86_RegCall) { 980 assert(MO.isDef() && MO.isImplicit()); 981 } 982 983 STReturns |= 1 << R; 984 } 985 } 986 987 unsigned N = countTrailingOnes(STReturns); 988 989 // FP registers used for function return must be consecutive starting at 990 // FP0 991 assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2)); 992 993 // Reset the FP Stack - It is required because of possible leftovers from 994 // passed arguments. The caller should assume that the FP stack is 995 // returned empty (unless the callee returns values on FP stack). 996 while (StackTop > 0) 997 popReg(); 998 999 for (unsigned I = 0; I < N; ++I) 1000 pushReg(N - I - 1); 1001 } 1002 1003 /// If RET has an FP register use operand, pass the first one in ST(0) and 1004 /// the second one in ST(1). 1005 void FPS::handleReturn(MachineBasicBlock::iterator &I) { 1006 MachineInstr &MI = *I; 1007 1008 // Find the register operands. 1009 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U; 1010 unsigned LiveMask = 0; 1011 1012 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 1013 MachineOperand &Op = MI.getOperand(i); 1014 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1015 continue; 1016 // FP Register uses must be kills unless there are two uses of the same 1017 // register, in which case only one will be a kill. 1018 assert(Op.isUse() && 1019 (Op.isKill() || // Marked kill. 1020 getFPReg(Op) == FirstFPRegOp || // Second instance. 1021 MI.killsRegister(Op.getReg())) && // Later use is marked kill. 1022 "Ret only defs operands, and values aren't live beyond it"); 1023 1024 if (FirstFPRegOp == ~0U) 1025 FirstFPRegOp = getFPReg(Op); 1026 else { 1027 assert(SecondFPRegOp == ~0U && "More than two fp operands!"); 1028 SecondFPRegOp = getFPReg(Op); 1029 } 1030 LiveMask |= (1 << getFPReg(Op)); 1031 1032 // Remove the operand so that later passes don't see it. 1033 MI.RemoveOperand(i); 1034 --i; 1035 --e; 1036 } 1037 1038 // We may have been carrying spurious live-ins, so make sure only the 1039 // returned registers are left live. 1040 adjustLiveRegs(LiveMask, MI); 1041 if (!LiveMask) return; // Quick check to see if any are possible. 1042 1043 // There are only four possibilities here: 1044 // 1) we are returning a single FP value. In this case, it has to be in 1045 // ST(0) already, so just declare success by removing the value from the 1046 // FP Stack. 1047 if (SecondFPRegOp == ~0U) { 1048 // Assert that the top of stack contains the right FP register. 1049 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && 1050 "Top of stack not the right register for RET!"); 1051 1052 // Ok, everything is good, mark the value as not being on the stack 1053 // anymore so that our assertion about the stack being empty at end of 1054 // block doesn't fire. 1055 StackTop = 0; 1056 return; 1057 } 1058 1059 // Otherwise, we are returning two values: 1060 // 2) If returning the same value for both, we only have one thing in the FP 1061 // stack. Consider: RET FP1, FP1 1062 if (StackTop == 1) { 1063 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& 1064 "Stack misconfiguration for RET!"); 1065 1066 // Duplicate the TOS so that we return it twice. Just pick some other FPx 1067 // register to hold it. 1068 unsigned NewReg = ScratchFPReg; 1069 duplicateToTop(FirstFPRegOp, NewReg, MI); 1070 FirstFPRegOp = NewReg; 1071 } 1072 1073 /// Okay we know we have two different FPx operands now: 1074 assert(StackTop == 2 && "Must have two values live!"); 1075 1076 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently 1077 /// in ST(1). In this case, emit an fxch. 1078 if (getStackEntry(0) == SecondFPRegOp) { 1079 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); 1080 moveToTop(FirstFPRegOp, MI); 1081 } 1082 1083 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in 1084 /// ST(1). Just remove both from our understanding of the stack and return. 1085 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); 1086 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live"); 1087 StackTop = 0; 1088 } 1089 1090 /// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem> 1091 /// 1092 void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { 1093 MachineInstr &MI = *I; 1094 unsigned DestReg = getFPReg(MI.getOperand(0)); 1095 1096 // Change from the pseudo instruction to the concrete instruction. 1097 MI.RemoveOperand(0); // Remove the explicit ST(0) operand 1098 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode()))); 1099 1100 // Result gets pushed on the stack. 1101 pushReg(DestReg); 1102 } 1103 1104 /// handleOneArgFP - fst <mem>, ST(0) 1105 /// 1106 void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { 1107 MachineInstr &MI = *I; 1108 unsigned NumOps = MI.getDesc().getNumOperands(); 1109 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) && 1110 "Can only handle fst* & ftst instructions!"); 1111 1112 // Is this the last use of the source register? 1113 unsigned Reg = getFPReg(MI.getOperand(NumOps - 1)); 1114 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg); 1115 1116 // FISTP64m is strange because there isn't a non-popping versions. 1117 // If we have one _and_ we don't want to pop the operand, duplicate the value 1118 // on the stack instead of moving it. This ensure that popping the value is 1119 // always ok. 1120 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m. 1121 // 1122 if (!KillsSrc && (MI.getOpcode() == X86::IST_Fp64m32 || 1123 MI.getOpcode() == X86::ISTT_Fp16m32 || 1124 MI.getOpcode() == X86::ISTT_Fp32m32 || 1125 MI.getOpcode() == X86::ISTT_Fp64m32 || 1126 MI.getOpcode() == X86::IST_Fp64m64 || 1127 MI.getOpcode() == X86::ISTT_Fp16m64 || 1128 MI.getOpcode() == X86::ISTT_Fp32m64 || 1129 MI.getOpcode() == X86::ISTT_Fp64m64 || 1130 MI.getOpcode() == X86::IST_Fp64m80 || 1131 MI.getOpcode() == X86::ISTT_Fp16m80 || 1132 MI.getOpcode() == X86::ISTT_Fp32m80 || 1133 MI.getOpcode() == X86::ISTT_Fp64m80 || 1134 MI.getOpcode() == X86::ST_FpP80m)) { 1135 duplicateToTop(Reg, ScratchFPReg, I); 1136 } else { 1137 moveToTop(Reg, I); // Move to the top of the stack... 1138 } 1139 1140 // Convert from the pseudo instruction to the concrete instruction. 1141 MI.RemoveOperand(NumOps - 1); // Remove explicit ST(0) operand 1142 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode()))); 1143 1144 if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m || 1145 MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m || 1146 MI.getOpcode() == X86::ST_FP80m) { 1147 if (StackTop == 0) 1148 report_fatal_error("Stack empty??"); 1149 --StackTop; 1150 } else if (KillsSrc) { // Last use of operand? 1151 popStackAfter(I); 1152 } 1153 } 1154 1155 1156 /// handleOneArgFPRW: Handle instructions that read from the top of stack and 1157 /// replace the value with a newly computed value. These instructions may have 1158 /// non-fp operands after their FP operands. 1159 /// 1160 /// Examples: 1161 /// R1 = fchs R2 1162 /// R1 = fadd R2, [mem] 1163 /// 1164 void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { 1165 MachineInstr &MI = *I; 1166 #ifndef NDEBUG 1167 unsigned NumOps = MI.getDesc().getNumOperands(); 1168 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!"); 1169 #endif 1170 1171 // Is this the last use of the source register? 1172 unsigned Reg = getFPReg(MI.getOperand(1)); 1173 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg); 1174 1175 if (KillsSrc) { 1176 // If this is the last use of the source register, just make sure it's on 1177 // the top of the stack. 1178 moveToTop(Reg, I); 1179 if (StackTop == 0) 1180 report_fatal_error("Stack cannot be empty!"); 1181 --StackTop; 1182 pushReg(getFPReg(MI.getOperand(0))); 1183 } else { 1184 // If this is not the last use of the source register, _copy_ it to the top 1185 // of the stack. 1186 duplicateToTop(Reg, getFPReg(MI.getOperand(0)), I); 1187 } 1188 1189 // Change from the pseudo instruction to the concrete instruction. 1190 MI.RemoveOperand(1); // Drop the source operand. 1191 MI.RemoveOperand(0); // Drop the destination operand. 1192 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode()))); 1193 } 1194 1195 1196 //===----------------------------------------------------------------------===// 1197 // Define tables of various ways to map pseudo instructions 1198 // 1199 1200 // ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) 1201 static const TableEntry ForwardST0Table[] = { 1202 { X86::ADD_Fp32 , X86::ADD_FST0r }, 1203 { X86::ADD_Fp64 , X86::ADD_FST0r }, 1204 { X86::ADD_Fp80 , X86::ADD_FST0r }, 1205 { X86::DIV_Fp32 , X86::DIV_FST0r }, 1206 { X86::DIV_Fp64 , X86::DIV_FST0r }, 1207 { X86::DIV_Fp80 , X86::DIV_FST0r }, 1208 { X86::MUL_Fp32 , X86::MUL_FST0r }, 1209 { X86::MUL_Fp64 , X86::MUL_FST0r }, 1210 { X86::MUL_Fp80 , X86::MUL_FST0r }, 1211 { X86::SUB_Fp32 , X86::SUB_FST0r }, 1212 { X86::SUB_Fp64 , X86::SUB_FST0r }, 1213 { X86::SUB_Fp80 , X86::SUB_FST0r }, 1214 }; 1215 1216 // ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) 1217 static const TableEntry ReverseST0Table[] = { 1218 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative 1219 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative 1220 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative 1221 { X86::DIV_Fp32 , X86::DIVR_FST0r }, 1222 { X86::DIV_Fp64 , X86::DIVR_FST0r }, 1223 { X86::DIV_Fp80 , X86::DIVR_FST0r }, 1224 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative 1225 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative 1226 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative 1227 { X86::SUB_Fp32 , X86::SUBR_FST0r }, 1228 { X86::SUB_Fp64 , X86::SUBR_FST0r }, 1229 { X86::SUB_Fp80 , X86::SUBR_FST0r }, 1230 }; 1231 1232 // ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) 1233 static const TableEntry ForwardSTiTable[] = { 1234 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative 1235 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative 1236 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative 1237 { X86::DIV_Fp32 , X86::DIVR_FrST0 }, 1238 { X86::DIV_Fp64 , X86::DIVR_FrST0 }, 1239 { X86::DIV_Fp80 , X86::DIVR_FrST0 }, 1240 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative 1241 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative 1242 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative 1243 { X86::SUB_Fp32 , X86::SUBR_FrST0 }, 1244 { X86::SUB_Fp64 , X86::SUBR_FrST0 }, 1245 { X86::SUB_Fp80 , X86::SUBR_FrST0 }, 1246 }; 1247 1248 // ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) 1249 static const TableEntry ReverseSTiTable[] = { 1250 { X86::ADD_Fp32 , X86::ADD_FrST0 }, 1251 { X86::ADD_Fp64 , X86::ADD_FrST0 }, 1252 { X86::ADD_Fp80 , X86::ADD_FrST0 }, 1253 { X86::DIV_Fp32 , X86::DIV_FrST0 }, 1254 { X86::DIV_Fp64 , X86::DIV_FrST0 }, 1255 { X86::DIV_Fp80 , X86::DIV_FrST0 }, 1256 { X86::MUL_Fp32 , X86::MUL_FrST0 }, 1257 { X86::MUL_Fp64 , X86::MUL_FrST0 }, 1258 { X86::MUL_Fp80 , X86::MUL_FrST0 }, 1259 { X86::SUB_Fp32 , X86::SUB_FrST0 }, 1260 { X86::SUB_Fp64 , X86::SUB_FrST0 }, 1261 { X86::SUB_Fp80 , X86::SUB_FrST0 }, 1262 }; 1263 1264 1265 /// handleTwoArgFP - Handle instructions like FADD and friends which are virtual 1266 /// instructions which need to be simplified and possibly transformed. 1267 /// 1268 /// Result: ST(0) = fsub ST(0), ST(i) 1269 /// ST(i) = fsub ST(0), ST(i) 1270 /// ST(0) = fsubr ST(0), ST(i) 1271 /// ST(i) = fsubr ST(0), ST(i) 1272 /// 1273 void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { 1274 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 1275 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 1276 MachineInstr &MI = *I; 1277 1278 unsigned NumOperands = MI.getDesc().getNumOperands(); 1279 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!"); 1280 unsigned Dest = getFPReg(MI.getOperand(0)); 1281 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2)); 1282 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1)); 1283 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0); 1284 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1); 1285 DebugLoc dl = MI.getDebugLoc(); 1286 1287 unsigned TOS = getStackEntry(0); 1288 1289 // One of our operands must be on the top of the stack. If neither is yet, we 1290 // need to move one. 1291 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? 1292 // We can choose to move either operand to the top of the stack. If one of 1293 // the operands is killed by this instruction, we want that one so that we 1294 // can update right on top of the old version. 1295 if (KillsOp0) { 1296 moveToTop(Op0, I); // Move dead operand to TOS. 1297 TOS = Op0; 1298 } else if (KillsOp1) { 1299 moveToTop(Op1, I); 1300 TOS = Op1; 1301 } else { 1302 // All of the operands are live after this instruction executes, so we 1303 // cannot update on top of any operand. Because of this, we must 1304 // duplicate one of the stack elements to the top. It doesn't matter 1305 // which one we pick. 1306 // 1307 duplicateToTop(Op0, Dest, I); 1308 Op0 = TOS = Dest; 1309 KillsOp0 = true; 1310 } 1311 } else if (!KillsOp0 && !KillsOp1) { 1312 // If we DO have one of our operands at the top of the stack, but we don't 1313 // have a dead operand, we must duplicate one of the operands to a new slot 1314 // on the stack. 1315 duplicateToTop(Op0, Dest, I); 1316 Op0 = TOS = Dest; 1317 KillsOp0 = true; 1318 } 1319 1320 // Now we know that one of our operands is on the top of the stack, and at 1321 // least one of our operands is killed by this instruction. 1322 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) && 1323 "Stack conditions not set up right!"); 1324 1325 // We decide which form to use based on what is on the top of the stack, and 1326 // which operand is killed by this instruction. 1327 ArrayRef<TableEntry> InstTable; 1328 bool isForward = TOS == Op0; 1329 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); 1330 if (updateST0) { 1331 if (isForward) 1332 InstTable = ForwardST0Table; 1333 else 1334 InstTable = ReverseST0Table; 1335 } else { 1336 if (isForward) 1337 InstTable = ForwardSTiTable; 1338 else 1339 InstTable = ReverseSTiTable; 1340 } 1341 1342 int Opcode = Lookup(InstTable, MI.getOpcode()); 1343 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); 1344 1345 // NotTOS - The register which is not on the top of stack... 1346 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; 1347 1348 // Replace the old instruction with a new instruction 1349 MBB->remove(&*I++); 1350 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS)); 1351 1352 // If both operands are killed, pop one off of the stack in addition to 1353 // overwriting the other one. 1354 if (KillsOp0 && KillsOp1 && Op0 != Op1) { 1355 assert(!updateST0 && "Should have updated other operand!"); 1356 popStackAfter(I); // Pop the top of stack 1357 } 1358 1359 // Update stack information so that we know the destination register is now on 1360 // the stack. 1361 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); 1362 assert(UpdatedSlot < StackTop && Dest < 7); 1363 Stack[UpdatedSlot] = Dest; 1364 RegMap[Dest] = UpdatedSlot; 1365 MBB->getParent()->DeleteMachineInstr(&MI); // Remove the old instruction 1366 } 1367 1368 /// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP 1369 /// register arguments and no explicit destinations. 1370 /// 1371 void FPS::handleCompareFP(MachineBasicBlock::iterator &I) { 1372 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 1373 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 1374 MachineInstr &MI = *I; 1375 1376 unsigned NumOperands = MI.getDesc().getNumOperands(); 1377 assert(NumOperands == 2 && "Illegal FUCOM* instruction!"); 1378 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2)); 1379 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1)); 1380 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0); 1381 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1); 1382 1383 // Make sure the first operand is on the top of stack, the other one can be 1384 // anywhere. 1385 moveToTop(Op0, I); 1386 1387 // Change from the pseudo instruction to the concrete instruction. 1388 MI.getOperand(0).setReg(getSTReg(Op1)); 1389 MI.RemoveOperand(1); 1390 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode()))); 1391 1392 // If any of the operands are killed by this instruction, free them. 1393 if (KillsOp0) freeStackSlotAfter(I, Op0); 1394 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1); 1395 } 1396 1397 /// handleCondMovFP - Handle two address conditional move instructions. These 1398 /// instructions move a st(i) register to st(0) iff a condition is true. These 1399 /// instructions require that the first operand is at the top of the stack, but 1400 /// otherwise don't modify the stack at all. 1401 void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { 1402 MachineInstr &MI = *I; 1403 1404 unsigned Op0 = getFPReg(MI.getOperand(0)); 1405 unsigned Op1 = getFPReg(MI.getOperand(2)); 1406 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1); 1407 1408 // The first operand *must* be on the top of the stack. 1409 moveToTop(Op0, I); 1410 1411 // Change the second operand to the stack register that the operand is in. 1412 // Change from the pseudo instruction to the concrete instruction. 1413 MI.RemoveOperand(0); 1414 MI.RemoveOperand(1); 1415 MI.getOperand(0).setReg(getSTReg(Op1)); 1416 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode()))); 1417 1418 // If we kill the second operand, make sure to pop it from the stack. 1419 if (Op0 != Op1 && KillsOp1) { 1420 // Get this value off of the register stack. 1421 freeStackSlotAfter(I, Op1); 1422 } 1423 } 1424 1425 1426 /// handleSpecialFP - Handle special instructions which behave unlike other 1427 /// floating point instructions. This is primarily intended for use by pseudo 1428 /// instructions. 1429 /// 1430 void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) { 1431 MachineInstr &MI = *Inst; 1432 1433 if (MI.isCall()) { 1434 handleCall(Inst); 1435 return; 1436 } 1437 1438 if (MI.isReturn()) { 1439 handleReturn(Inst); 1440 return; 1441 } 1442 1443 switch (MI.getOpcode()) { 1444 default: llvm_unreachable("Unknown SpecialFP instruction!"); 1445 case TargetOpcode::COPY: { 1446 // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP. 1447 const MachineOperand &MO1 = MI.getOperand(1); 1448 const MachineOperand &MO0 = MI.getOperand(0); 1449 bool KillsSrc = MI.killsRegister(MO1.getReg()); 1450 1451 // FP <- FP copy. 1452 unsigned DstFP = getFPReg(MO0); 1453 unsigned SrcFP = getFPReg(MO1); 1454 assert(isLive(SrcFP) && "Cannot copy dead register"); 1455 if (KillsSrc) { 1456 // If the input operand is killed, we can just change the owner of the 1457 // incoming stack slot into the result. 1458 unsigned Slot = getSlot(SrcFP); 1459 Stack[Slot] = DstFP; 1460 RegMap[DstFP] = Slot; 1461 } else { 1462 // For COPY we just duplicate the specified value to a new stack slot. 1463 // This could be made better, but would require substantial changes. 1464 duplicateToTop(SrcFP, DstFP, Inst); 1465 } 1466 break; 1467 } 1468 1469 case TargetOpcode::IMPLICIT_DEF: { 1470 // All FP registers must be explicitly defined, so load a 0 instead. 1471 unsigned Reg = MI.getOperand(0).getReg() - X86::FP0; 1472 LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n'); 1473 BuildMI(*MBB, Inst, MI.getDebugLoc(), TII->get(X86::LD_F0)); 1474 pushReg(Reg); 1475 break; 1476 } 1477 1478 case TargetOpcode::INLINEASM: { 1479 // The inline asm MachineInstr currently only *uses* FP registers for the 1480 // 'f' constraint. These should be turned into the current ST(x) register 1481 // in the machine instr. 1482 // 1483 // There are special rules for x87 inline assembly. The compiler must know 1484 // exactly how many registers are popped and pushed implicitly by the asm. 1485 // Otherwise it is not possible to restore the stack state after the inline 1486 // asm. 1487 // 1488 // There are 3 kinds of input operands: 1489 // 1490 // 1. Popped inputs. These must appear at the stack top in ST0-STn. A 1491 // popped input operand must be in a fixed stack slot, and it is either 1492 // tied to an output operand, or in the clobber list. The MI has ST use 1493 // and def operands for these inputs. 1494 // 1495 // 2. Fixed inputs. These inputs appear in fixed stack slots, but are 1496 // preserved by the inline asm. The fixed stack slots must be STn-STm 1497 // following the popped inputs. A fixed input operand cannot be tied to 1498 // an output or appear in the clobber list. The MI has ST use operands 1499 // and no defs for these inputs. 1500 // 1501 // 3. Preserved inputs. These inputs use the "f" constraint which is 1502 // represented as an FP register. The inline asm won't change these 1503 // stack slots. 1504 // 1505 // Outputs must be in ST registers, FP outputs are not allowed. Clobbered 1506 // registers do not count as output operands. The inline asm changes the 1507 // stack as if it popped all the popped inputs and then pushed all the 1508 // output operands. 1509 1510 // Scan the assembly for ST registers used, defined and clobbered. We can 1511 // only tell clobbers from defs by looking at the asm descriptor. 1512 unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0; 1513 unsigned NumOps = 0; 1514 SmallSet<unsigned, 1> FRegIdx; 1515 unsigned RCID; 1516 1517 for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI.getNumOperands(); 1518 i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) { 1519 unsigned Flags = MI.getOperand(i).getImm(); 1520 1521 NumOps = InlineAsm::getNumOperandRegisters(Flags); 1522 if (NumOps != 1) 1523 continue; 1524 const MachineOperand &MO = MI.getOperand(i + 1); 1525 if (!MO.isReg()) 1526 continue; 1527 unsigned STReg = MO.getReg() - X86::FP0; 1528 if (STReg >= 8) 1529 continue; 1530 1531 // If the flag has a register class constraint, this must be an operand 1532 // with constraint "f". Record its index and continue. 1533 if (InlineAsm::hasRegClassConstraint(Flags, RCID)) { 1534 FRegIdx.insert(i + 1); 1535 continue; 1536 } 1537 1538 switch (InlineAsm::getKind(Flags)) { 1539 case InlineAsm::Kind_RegUse: 1540 STUses |= (1u << STReg); 1541 break; 1542 case InlineAsm::Kind_RegDef: 1543 case InlineAsm::Kind_RegDefEarlyClobber: 1544 STDefs |= (1u << STReg); 1545 if (MO.isDead()) 1546 STDeadDefs |= (1u << STReg); 1547 break; 1548 case InlineAsm::Kind_Clobber: 1549 STClobbers |= (1u << STReg); 1550 break; 1551 default: 1552 break; 1553 } 1554 } 1555 1556 if (STUses && !isMask_32(STUses)) 1557 MI.emitError("fixed input regs must be last on the x87 stack"); 1558 unsigned NumSTUses = countTrailingOnes(STUses); 1559 1560 // Defs must be contiguous from the stack top. ST0-STn. 1561 if (STDefs && !isMask_32(STDefs)) { 1562 MI.emitError("output regs must be last on the x87 stack"); 1563 STDefs = NextPowerOf2(STDefs) - 1; 1564 } 1565 unsigned NumSTDefs = countTrailingOnes(STDefs); 1566 1567 // So must the clobbered stack slots. ST0-STm, m >= n. 1568 if (STClobbers && !isMask_32(STDefs | STClobbers)) 1569 MI.emitError("clobbers must be last on the x87 stack"); 1570 1571 // Popped inputs are the ones that are also clobbered or defined. 1572 unsigned STPopped = STUses & (STDefs | STClobbers); 1573 if (STPopped && !isMask_32(STPopped)) 1574 MI.emitError("implicitly popped regs must be last on the x87 stack"); 1575 unsigned NumSTPopped = countTrailingOnes(STPopped); 1576 1577 LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops " 1578 << NumSTPopped << ", and defines " << NumSTDefs 1579 << " regs.\n"); 1580 1581 #ifndef NDEBUG 1582 // If any input operand uses constraint "f", all output register 1583 // constraints must be early-clobber defs. 1584 for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) 1585 if (FRegIdx.count(I)) { 1586 assert((1 << getFPReg(MI.getOperand(I)) & STDefs) == 0 && 1587 "Operands with constraint \"f\" cannot overlap with defs"); 1588 } 1589 #endif 1590 1591 // Collect all FP registers (register operands with constraints "t", "u", 1592 // and "f") to kill afer the instruction. 1593 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff; 1594 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 1595 MachineOperand &Op = MI.getOperand(i); 1596 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1597 continue; 1598 unsigned FPReg = getFPReg(Op); 1599 1600 // If we kill this operand, make sure to pop it from the stack after the 1601 // asm. We just remember it for now, and pop them all off at the end in 1602 // a batch. 1603 if (Op.isUse() && Op.isKill()) 1604 FPKills |= 1U << FPReg; 1605 } 1606 1607 // Do not include registers that are implicitly popped by defs/clobbers. 1608 FPKills &= ~(STDefs | STClobbers); 1609 1610 // Now we can rearrange the live registers to match what was requested. 1611 unsigned char STUsesArray[8]; 1612 1613 for (unsigned I = 0; I < NumSTUses; ++I) 1614 STUsesArray[I] = I; 1615 1616 shuffleStackTop(STUsesArray, NumSTUses, Inst); 1617 LLVM_DEBUG({ 1618 dbgs() << "Before asm: "; 1619 dumpStack(); 1620 }); 1621 1622 // With the stack layout fixed, rewrite the FP registers. 1623 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 1624 MachineOperand &Op = MI.getOperand(i); 1625 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1626 continue; 1627 1628 unsigned FPReg = getFPReg(Op); 1629 1630 if (FRegIdx.count(i)) 1631 // Operand with constraint "f". 1632 Op.setReg(getSTReg(FPReg)); 1633 else 1634 // Operand with a single register class constraint ("t" or "u"). 1635 Op.setReg(X86::ST0 + FPReg); 1636 } 1637 1638 // Simulate the inline asm popping its inputs and pushing its outputs. 1639 StackTop -= NumSTPopped; 1640 1641 for (unsigned i = 0; i < NumSTDefs; ++i) 1642 pushReg(NumSTDefs - i - 1); 1643 1644 // If this asm kills any FP registers (is the last use of them) we must 1645 // explicitly emit pop instructions for them. Do this now after the asm has 1646 // executed so that the ST(x) numbers are not off (which would happen if we 1647 // did this inline with operand rewriting). 1648 // 1649 // Note: this might be a non-optimal pop sequence. We might be able to do 1650 // better by trying to pop in stack order or something. 1651 while (FPKills) { 1652 unsigned FPReg = countTrailingZeros(FPKills); 1653 if (isLive(FPReg)) 1654 freeStackSlotAfter(Inst, FPReg); 1655 FPKills &= ~(1U << FPReg); 1656 } 1657 1658 // Don't delete the inline asm! 1659 return; 1660 } 1661 } 1662 1663 Inst = MBB->erase(Inst); // Remove the pseudo instruction 1664 1665 // We want to leave I pointing to the previous instruction, but what if we 1666 // just erased the first instruction? 1667 if (Inst == MBB->begin()) { 1668 LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n"); 1669 Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL)); 1670 } else 1671 --Inst; 1672 } 1673 1674 void FPS::setKillFlags(MachineBasicBlock &MBB) const { 1675 const TargetRegisterInfo &TRI = 1676 *MBB.getParent()->getSubtarget().getRegisterInfo(); 1677 LivePhysRegs LPR(TRI); 1678 1679 LPR.addLiveOuts(MBB); 1680 1681 for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); 1682 I != E; ++I) { 1683 if (I->isDebugInstr()) 1684 continue; 1685 1686 std::bitset<8> Defs; 1687 SmallVector<MachineOperand *, 2> Uses; 1688 MachineInstr &MI = *I; 1689 1690 for (auto &MO : I->operands()) { 1691 if (!MO.isReg()) 1692 continue; 1693 1694 unsigned Reg = MO.getReg() - X86::FP0; 1695 1696 if (Reg >= 8) 1697 continue; 1698 1699 if (MO.isDef()) { 1700 Defs.set(Reg); 1701 if (!LPR.contains(MO.getReg())) 1702 MO.setIsDead(); 1703 } else 1704 Uses.push_back(&MO); 1705 } 1706 1707 for (auto *MO : Uses) 1708 if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg())) 1709 MO->setIsKill(); 1710 1711 LPR.stepBackward(MI); 1712 } 1713 } 1714