1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This implements the ScheduleDAGInstrs class, which implements re-scheduling 11 // of MachineInstrs. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "sched-instrs" 16 #include "RegisterPressure.h" 17 #include "llvm/Operator.h" 18 #include "llvm/Analysis/AliasAnalysis.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineMemOperand.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/PseudoSourceValue.h" 25 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 26 #include "llvm/MC/MCInstrItineraries.h" 27 #include "llvm/Target/TargetMachine.h" 28 #include "llvm/Target/TargetInstrInfo.h" 29 #include "llvm/Target/TargetRegisterInfo.h" 30 #include "llvm/Target/TargetSubtargetInfo.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/raw_ostream.h" 33 #include "llvm/ADT/SmallSet.h" 34 using namespace llvm; 35 36 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 37 const MachineLoopInfo &mli, 38 const MachineDominatorTree &mdt, 39 bool IsPostRAFlag, 40 LiveIntervals *lis) 41 : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), 42 InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), 43 IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false), 44 LoopRegs(MLI, MDT), FirstDbgValue(0) { 45 assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); 46 DbgValues.clear(); 47 assert(!(IsPostRA && MRI.getNumVirtRegs()) && 48 "Virtual registers must be removed prior to PostRA scheduling"); 49 } 50 51 /// getUnderlyingObjectFromInt - This is the function that does the work of 52 /// looking through basic ptrtoint+arithmetic+inttoptr sequences. 53 static const Value *getUnderlyingObjectFromInt(const Value *V) { 54 do { 55 if (const Operator *U = dyn_cast<Operator>(V)) { 56 // If we find a ptrtoint, we can transfer control back to the 57 // regular getUnderlyingObjectFromInt. 58 if (U->getOpcode() == Instruction::PtrToInt) 59 return U->getOperand(0); 60 // If we find an add of a constant or a multiplied value, it's 61 // likely that the other operand will lead us to the base 62 // object. We don't have to worry about the case where the 63 // object address is somehow being computed by the multiply, 64 // because our callers only care when the result is an 65 // identifibale object. 66 if (U->getOpcode() != Instruction::Add || 67 (!isa<ConstantInt>(U->getOperand(1)) && 68 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) 69 return V; 70 V = U->getOperand(0); 71 } else { 72 return V; 73 } 74 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 75 } while (1); 76 } 77 78 /// getUnderlyingObject - This is a wrapper around GetUnderlyingObject 79 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 80 static const Value *getUnderlyingObject(const Value *V) { 81 // First just call Value::getUnderlyingObject to let it do what it does. 82 do { 83 V = GetUnderlyingObject(V); 84 // If it found an inttoptr, use special code to continue climing. 85 if (Operator::getOpcode(V) != Instruction::IntToPtr) 86 break; 87 const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 88 // If that succeeded in finding a pointer, continue the search. 89 if (!O->getType()->isPointerTy()) 90 break; 91 V = O; 92 } while (1); 93 return V; 94 } 95 96 /// getUnderlyingObjectForInstr - If this machine instr has memory reference 97 /// information and it can be tracked to a normal reference to a known 98 /// object, return the Value for that object. Otherwise return null. 99 static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, 100 const MachineFrameInfo *MFI, 101 bool &MayAlias) { 102 MayAlias = true; 103 if (!MI->hasOneMemOperand() || 104 !(*MI->memoperands_begin())->getValue() || 105 (*MI->memoperands_begin())->isVolatile()) 106 return 0; 107 108 const Value *V = (*MI->memoperands_begin())->getValue(); 109 if (!V) 110 return 0; 111 112 V = getUnderlyingObject(V); 113 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 114 // For now, ignore PseudoSourceValues which may alias LLVM IR values 115 // because the code that uses this function has no way to cope with 116 // such aliases. 117 if (PSV->isAliased(MFI)) 118 return 0; 119 120 MayAlias = PSV->mayAlias(MFI); 121 return V; 122 } 123 124 if (isIdentifiedObject(V)) 125 return V; 126 127 return 0; 128 } 129 130 void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { 131 BB = bb; 132 LoopRegs.Deps.clear(); 133 if (MachineLoop *ML = MLI.getLoopFor(BB)) 134 if (BB == ML->getLoopLatch()) 135 LoopRegs.VisitLoop(ML); 136 } 137 138 void ScheduleDAGInstrs::finishBlock() { 139 // Subclasses should no longer refer to the old block. 140 BB = 0; 141 } 142 143 /// Initialize the map with the number of registers. 144 void Reg2SUnitsMap::setRegLimit(unsigned Limit) { 145 PhysRegSet.setUniverse(Limit); 146 SUnits.resize(Limit); 147 } 148 149 /// Clear the map without deallocating storage. 150 void Reg2SUnitsMap::clear() { 151 for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { 152 SUnits[*I].clear(); 153 } 154 PhysRegSet.clear(); 155 } 156 157 /// Initialize the DAG and common scheduler state for the current scheduling 158 /// region. This does not actually create the DAG, only clears it. The 159 /// scheduling driver may call BuildSchedGraph multiple times per scheduling 160 /// region. 161 void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, 162 MachineBasicBlock::iterator begin, 163 MachineBasicBlock::iterator end, 164 unsigned endcount) { 165 assert(bb == BB && "startBlock should set BB"); 166 RegionBegin = begin; 167 RegionEnd = end; 168 EndIndex = endcount; 169 MISUnitMap.clear(); 170 171 // Check to see if the scheduler cares about latencies. 172 UnitLatencies = forceUnitLatencies(); 173 174 ScheduleDAG::clearDAG(); 175 } 176 177 /// Close the current scheduling region. Don't clear any state in case the 178 /// driver wants to refer to the previous scheduling region. 179 void ScheduleDAGInstrs::exitRegion() { 180 // Nothing to do. 181 } 182 183 /// addSchedBarrierDeps - Add dependencies from instructions in the current 184 /// list of instructions being scheduled to scheduling barrier by adding 185 /// the exit SU to the register defs and use list. This is because we want to 186 /// make sure instructions which define registers that are either used by 187 /// the terminator or are live-out are properly scheduled. This is 188 /// especially important when the definition latency of the return value(s) 189 /// are too high to be hidden by the branch or when the liveout registers 190 /// used by instructions in the fallthrough block. 191 void ScheduleDAGInstrs::addSchedBarrierDeps() { 192 MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; 193 ExitSU.setInstr(ExitMI); 194 bool AllDepKnown = ExitMI && 195 (ExitMI->isCall() || ExitMI->isBarrier()); 196 if (ExitMI && AllDepKnown) { 197 // If it's a call or a barrier, add dependencies on the defs and uses of 198 // instruction. 199 for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { 200 const MachineOperand &MO = ExitMI->getOperand(i); 201 if (!MO.isReg() || MO.isDef()) continue; 202 unsigned Reg = MO.getReg(); 203 if (Reg == 0) continue; 204 205 if (TRI->isPhysicalRegister(Reg)) 206 Uses[Reg].push_back(&ExitSU); 207 else { 208 assert(!IsPostRA && "Virtual register encountered after regalloc."); 209 addVRegUseDeps(&ExitSU, i); 210 } 211 } 212 } else { 213 // For others, e.g. fallthrough, conditional branch, assume the exit 214 // uses all the registers that are livein to the successor blocks. 215 assert(Uses.empty() && "Uses in set before adding deps?"); 216 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 217 SE = BB->succ_end(); SI != SE; ++SI) 218 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 219 E = (*SI)->livein_end(); I != E; ++I) { 220 unsigned Reg = *I; 221 if (!Uses.contains(Reg)) 222 Uses[Reg].push_back(&ExitSU); 223 } 224 } 225 } 226 227 /// MO is an operand of SU's instruction that defines a physical register. Add 228 /// data dependencies from SU to any uses of the physical register. 229 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, 230 const MachineOperand &MO) { 231 assert(MO.isDef() && "expect physreg def"); 232 233 // Ask the target if address-backscheduling is desirable, and if so how much. 234 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 235 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 236 unsigned DataLatency = SU->Latency; 237 238 for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 239 if (!Uses.contains(*Alias)) 240 continue; 241 std::vector<SUnit*> &UseList = Uses[*Alias]; 242 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 243 SUnit *UseSU = UseList[i]; 244 if (UseSU == SU) 245 continue; 246 unsigned LDataLatency = DataLatency; 247 // Optionally add in a special extra latency for nodes that 248 // feed addresses. 249 // TODO: Perhaps we should get rid of 250 // SpecialAddressLatency and just move this into 251 // adjustSchedDependency for the targets that care about it. 252 if (SpecialAddressLatency != 0 && !UnitLatencies && 253 UseSU != &ExitSU) { 254 MachineInstr *UseMI = UseSU->getInstr(); 255 const MCInstrDesc &UseMCID = UseMI->getDesc(); 256 int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); 257 assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); 258 if (RegUseIndex >= 0 && 259 (UseMI->mayLoad() || UseMI->mayStore()) && 260 (unsigned)RegUseIndex < UseMCID.getNumOperands() && 261 UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) 262 LDataLatency += SpecialAddressLatency; 263 } 264 // Adjust the dependence latency using operand def/use 265 // information (if any), and then allow the target to 266 // perform its own adjustments. 267 const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); 268 if (!UnitLatencies) { 269 computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); 270 ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); 271 } 272 UseSU->addPred(dep); 273 } 274 } 275 } 276 277 /// addPhysRegDeps - Add register dependencies (data, anti, and output) from 278 /// this SUnit to following instructions in the same scheduling region that 279 /// depend the physical register referenced at OperIdx. 280 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { 281 const MachineInstr *MI = SU->getInstr(); 282 const MachineOperand &MO = MI->getOperand(OperIdx); 283 284 // Optionally add output and anti dependencies. For anti 285 // dependencies we use a latency of 0 because for a multi-issue 286 // target we want to allow the defining instruction to issue 287 // in the same cycle as the using instruction. 288 // TODO: Using a latency of 1 here for output dependencies assumes 289 // there's no cost for reusing registers. 290 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 291 for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 292 if (!Defs.contains(*Alias)) 293 continue; 294 std::vector<SUnit *> &DefList = Defs[*Alias]; 295 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 296 SUnit *DefSU = DefList[i]; 297 if (DefSU == &ExitSU) 298 continue; 299 if (DefSU != SU && 300 (Kind != SDep::Output || !MO.isDead() || 301 !DefSU->getInstr()->registerDefIsDead(*Alias))) { 302 if (Kind == SDep::Anti) 303 DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); 304 else { 305 unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, 306 DefSU->getInstr()); 307 DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); 308 } 309 } 310 } 311 } 312 313 if (!MO.isDef()) { 314 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 315 // retrieve the existing SUnits list for this register's uses. 316 // Push this SUnit on the use list. 317 Uses[MO.getReg()].push_back(SU); 318 } 319 else { 320 addPhysRegDataDeps(SU, MO); 321 322 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 323 // retrieve the existing SUnits list for this register's defs. 324 std::vector<SUnit *> &DefList = Defs[MO.getReg()]; 325 326 // If a def is going to wrap back around to the top of the loop, 327 // backschedule it. 328 if (!UnitLatencies && DefList.empty()) { 329 LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg()); 330 if (I != LoopRegs.Deps.end()) { 331 const MachineOperand *UseMO = I->second.first; 332 unsigned Count = I->second.second; 333 const MachineInstr *UseMI = UseMO->getParent(); 334 unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); 335 const MCInstrDesc &UseMCID = UseMI->getDesc(); 336 const TargetSubtargetInfo &ST = 337 TM.getSubtarget<TargetSubtargetInfo>(); 338 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 339 // TODO: If we knew the total depth of the region here, we could 340 // handle the case where the whole loop is inside the region but 341 // is large enough that the isScheduleHigh trick isn't needed. 342 if (UseMOIdx < UseMCID.getNumOperands()) { 343 // Currently, we only support scheduling regions consisting of 344 // single basic blocks. Check to see if the instruction is in 345 // the same region by checking to see if it has the same parent. 346 if (UseMI->getParent() != MI->getParent()) { 347 unsigned Latency = SU->Latency; 348 if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) 349 Latency += SpecialAddressLatency; 350 // This is a wild guess as to the portion of the latency which 351 // will be overlapped by work done outside the current 352 // scheduling region. 353 Latency -= std::min(Latency, Count); 354 // Add the artificial edge. 355 ExitSU.addPred(SDep(SU, SDep::Order, Latency, 356 /*Reg=*/0, /*isNormalMemory=*/false, 357 /*isMustAlias=*/false, 358 /*isArtificial=*/true)); 359 } else if (SpecialAddressLatency > 0 && 360 UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { 361 // The entire loop body is within the current scheduling region 362 // and the latency of this operation is assumed to be greater 363 // than the latency of the loop. 364 // TODO: Recursively mark data-edge predecessors as 365 // isScheduleHigh too. 366 SU->isScheduleHigh = true; 367 } 368 } 369 LoopRegs.Deps.erase(I); 370 } 371 } 372 373 // clear this register's use list 374 if (Uses.contains(MO.getReg())) 375 Uses[MO.getReg()].clear(); 376 377 if (!MO.isDead()) 378 DefList.clear(); 379 380 // Calls will not be reordered because of chain dependencies (see 381 // below). Since call operands are dead, calls may continue to be added 382 // to the DefList making dependence checking quadratic in the size of 383 // the block. Instead, we leave only one call at the back of the 384 // DefList. 385 if (SU->isCall) { 386 while (!DefList.empty() && DefList.back()->isCall) 387 DefList.pop_back(); 388 } 389 // Defs are pushed in the order they are visited and never reordered. 390 DefList.push_back(SU); 391 } 392 } 393 394 /// addVRegDefDeps - Add register output and data dependencies from this SUnit 395 /// to instructions that occur later in the same scheduling region if they read 396 /// from or write to the virtual register defined at OperIdx. 397 /// 398 /// TODO: Hoist loop induction variable increments. This has to be 399 /// reevaluated. Generally, IV scheduling should be done before coalescing. 400 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { 401 const MachineInstr *MI = SU->getInstr(); 402 unsigned Reg = MI->getOperand(OperIdx).getReg(); 403 404 // SSA defs do not have output/anti dependencies. 405 // The current operand is a def, so we have at least one. 406 if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) 407 return; 408 409 // Add output dependence to the next nearest def of this vreg. 410 // 411 // Unless this definition is dead, the output dependence should be 412 // transitively redundant with antidependencies from this definition's 413 // uses. We're conservative for now until we have a way to guarantee the uses 414 // are not eliminated sometime during scheduling. The output dependence edge 415 // is also useful if output latency exceeds def-use latency. 416 VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); 417 if (DefI == VRegDefs.end()) 418 VRegDefs.insert(VReg2SUnit(Reg, SU)); 419 else { 420 SUnit *DefSU = DefI->SU; 421 if (DefSU != SU && DefSU != &ExitSU) { 422 unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, 423 DefSU->getInstr()); 424 DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); 425 } 426 DefI->SU = SU; 427 } 428 } 429 430 /// addVRegUseDeps - Add a register data dependency if the instruction that 431 /// defines the virtual register used at OperIdx is mapped to an SUnit. Add a 432 /// register antidependency from this SUnit to instructions that occur later in 433 /// the same scheduling region if they write the virtual register. 434 /// 435 /// TODO: Handle ExitSU "uses" properly. 436 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { 437 MachineInstr *MI = SU->getInstr(); 438 unsigned Reg = MI->getOperand(OperIdx).getReg(); 439 440 // Lookup this operand's reaching definition. 441 assert(LIS && "vreg dependencies requires LiveIntervals"); 442 SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(); 443 LiveInterval *LI = &LIS->getInterval(Reg); 444 VNInfo *VNI = LI->getVNInfoBefore(UseIdx); 445 446 // Special case: An early-clobber tied operand reads and writes the 447 // register one slot early. e.g. InlineAsm. 448 // 449 // FIXME: Same special case is in shrinkToUses. Hide under an API. 450 if (SlotIndex::isSameInstr(VNI->def, UseIdx)) { 451 UseIdx = VNI->def; 452 VNI = LI->getVNInfoBefore(UseIdx); 453 } 454 // VNI will be valid because MachineOperand::readsReg() is checked by caller. 455 MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); 456 // Phis and other noninstructions (after coalescing) have a NULL Def. 457 if (Def) { 458 SUnit *DefSU = getSUnit(Def); 459 if (DefSU) { 460 // The reaching Def lives within this scheduling region. 461 // Create a data dependence. 462 // 463 // TODO: Handle "special" address latencies cleanly. 464 const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); 465 if (!UnitLatencies) { 466 // Adjust the dependence latency using operand def/use information, then 467 // allow the target to perform its own adjustments. 468 computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); 469 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 470 ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); 471 } 472 SU->addPred(dep); 473 } 474 } 475 476 // Add antidependence to the following def of the vreg it uses. 477 VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); 478 if (DefI != VRegDefs.end() && DefI->SU != SU) 479 DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); 480 } 481 482 /// Create an SUnit for each real instruction, numbered in top-down toplological 483 /// order. The instruction order A < B, implies that no edge exists from B to A. 484 /// 485 /// Map each real instruction to its SUnit. 486 /// 487 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may 488 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs 489 /// instead of pointers. 490 /// 491 /// MachineScheduler relies on initSUnits numbering the nodes by their order in 492 /// the original instruction list. 493 void ScheduleDAGInstrs::initSUnits() { 494 // We'll be allocating one SUnit for each real instruction in the region, 495 // which is contained within a basic block. 496 SUnits.reserve(BB->size()); 497 498 for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { 499 MachineInstr *MI = I; 500 if (MI->isDebugValue()) 501 continue; 502 503 SUnit *SU = newSUnit(MI); 504 MISUnitMap[MI] = SU; 505 506 SU->isCall = MI->isCall(); 507 SU->isCommutable = MI->isCommutable(); 508 509 // Assign the Latency field of SU using target-provided information. 510 if (UnitLatencies) 511 SU->Latency = 1; 512 else 513 computeLatency(SU); 514 } 515 } 516 517 /// If RegPressure is non null, compute register pressure as a side effect. The 518 /// DAG builder is an efficient place to do it because it already visits 519 /// operands. 520 void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, 521 RegPressureTracker *RPTracker) { 522 // Create an SUnit for each real instruction. 523 initSUnits(); 524 525 // We build scheduling units by walking a block's instruction list from bottom 526 // to top. 527 528 // Remember where a generic side-effecting instruction is as we procede. 529 SUnit *BarrierChain = 0, *AliasChain = 0; 530 531 // Memory references to specific known memory locations are tracked 532 // so that they can be given more precise dependencies. We track 533 // separately the known memory locations that may alias and those 534 // that are known not to alias 535 std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; 536 std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; 537 538 // Remove any stale debug info; sometimes BuildSchedGraph is called again 539 // without emitting the info from the previous call. 540 DbgValues.clear(); 541 FirstDbgValue = NULL; 542 543 assert(Defs.empty() && Uses.empty() && 544 "Only BuildGraph should update Defs/Uses"); 545 Defs.setRegLimit(TRI->getNumRegs()); 546 Uses.setRegLimit(TRI->getNumRegs()); 547 548 assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); 549 // FIXME: Allow SparseSet to reserve space for the creation of virtual 550 // registers during scheduling. Don't artificially inflate the Universe 551 // because we want to assert that vregs are not created during DAG building. 552 VRegDefs.setUniverse(MRI.getNumVirtRegs()); 553 554 // Model data dependencies between instructions being scheduled and the 555 // ExitSU. 556 addSchedBarrierDeps(); 557 558 // Walk the list of instructions, from bottom moving up. 559 MachineInstr *PrevMI = NULL; 560 for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; 561 MII != MIE; --MII) { 562 MachineInstr *MI = prior(MII); 563 if (MI && PrevMI) { 564 DbgValues.push_back(std::make_pair(PrevMI, MI)); 565 PrevMI = NULL; 566 } 567 568 if (MI->isDebugValue()) { 569 PrevMI = MI; 570 continue; 571 } 572 if (RPTracker) { 573 RPTracker->recede(); 574 assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); 575 } 576 577 assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() && 578 "Cannot schedule terminators or labels!"); 579 580 SUnit *SU = MISUnitMap[MI]; 581 assert(SU && "No SUnit mapped to this MI"); 582 583 // Add register-based dependencies (data, anti, and output). 584 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 585 const MachineOperand &MO = MI->getOperand(j); 586 if (!MO.isReg()) continue; 587 unsigned Reg = MO.getReg(); 588 if (Reg == 0) continue; 589 590 if (TRI->isPhysicalRegister(Reg)) 591 addPhysRegDeps(SU, j); 592 else { 593 assert(!IsPostRA && "Virtual register encountered!"); 594 if (MO.isDef()) 595 addVRegDefDeps(SU, j); 596 else if (MO.readsReg()) // ignore undef operands 597 addVRegUseDeps(SU, j); 598 } 599 } 600 601 // Add chain dependencies. 602 // Chain dependencies used to enforce memory order should have 603 // latency of 0 (except for true dependency of Store followed by 604 // aliased Load... we estimate that with a single cycle of latency 605 // assuming the hardware will bypass) 606 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 607 // after stack slots are lowered to actual addresses. 608 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 609 // produce more precise dependence information. 610 #define STORE_LOAD_LATENCY 1 611 unsigned TrueMemOrderLatency = 0; 612 if (MI->isCall() || MI->hasUnmodeledSideEffects() || 613 (MI->hasVolatileMemoryRef() && 614 (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { 615 // Be conservative with these and add dependencies on all memory 616 // references, even those that are known to not alias. 617 for (std::map<const Value *, SUnit *>::iterator I = 618 NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { 619 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 620 } 621 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 622 NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { 623 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 624 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 625 } 626 NonAliasMemDefs.clear(); 627 NonAliasMemUses.clear(); 628 // Add SU to the barrier chain. 629 if (BarrierChain) 630 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 631 BarrierChain = SU; 632 633 // fall-through 634 new_alias_chain: 635 // Chain all possibly aliasing memory references though SU. 636 if (AliasChain) 637 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 638 AliasChain = SU; 639 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 640 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 641 for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), 642 E = AliasMemDefs.end(); I != E; ++I) { 643 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 644 } 645 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 646 AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { 647 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 648 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 649 } 650 PendingLoads.clear(); 651 AliasMemDefs.clear(); 652 AliasMemUses.clear(); 653 } else if (MI->mayStore()) { 654 bool MayAlias = true; 655 TrueMemOrderLatency = STORE_LOAD_LATENCY; 656 if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 657 // A store to a specific PseudoSourceValue. Add precise dependencies. 658 // Record the def in MemDefs, first adding a dep if there is 659 // an existing def. 660 std::map<const Value *, SUnit *>::iterator I = 661 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 662 std::map<const Value *, SUnit *>::iterator IE = 663 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 664 if (I != IE) { 665 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 666 /*isNormalMemory=*/true)); 667 I->second = SU; 668 } else { 669 if (MayAlias) 670 AliasMemDefs[V] = SU; 671 else 672 NonAliasMemDefs[V] = SU; 673 } 674 // Handle the uses in MemUses, if there are any. 675 std::map<const Value *, std::vector<SUnit *> >::iterator J = 676 ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); 677 std::map<const Value *, std::vector<SUnit *> >::iterator JE = 678 ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); 679 if (J != JE) { 680 for (unsigned i = 0, e = J->second.size(); i != e; ++i) 681 J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, 682 /*Reg=*/0, /*isNormalMemory=*/true)); 683 J->second.clear(); 684 } 685 if (MayAlias) { 686 // Add dependencies from all the PendingLoads, i.e. loads 687 // with no underlying object. 688 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 689 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 690 // Add dependence on alias chain, if needed. 691 if (AliasChain) 692 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 693 } 694 // Add dependence on barrier chain, if needed. 695 if (BarrierChain) 696 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 697 } else { 698 // Treat all other stores conservatively. 699 goto new_alias_chain; 700 } 701 702 if (!ExitSU.isPred(SU)) 703 // Push store's up a bit to avoid them getting in between cmp 704 // and branches. 705 ExitSU.addPred(SDep(SU, SDep::Order, 0, 706 /*Reg=*/0, /*isNormalMemory=*/false, 707 /*isMustAlias=*/false, 708 /*isArtificial=*/true)); 709 } else if (MI->mayLoad()) { 710 bool MayAlias = true; 711 TrueMemOrderLatency = 0; 712 if (MI->isInvariantLoad(AA)) { 713 // Invariant load, no chain dependencies needed! 714 } else { 715 if (const Value *V = 716 getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 717 // A load from a specific PseudoSourceValue. Add precise dependencies. 718 std::map<const Value *, SUnit *>::iterator I = 719 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 720 std::map<const Value *, SUnit *>::iterator IE = 721 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 722 if (I != IE) 723 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 724 /*isNormalMemory=*/true)); 725 if (MayAlias) 726 AliasMemUses[V].push_back(SU); 727 else 728 NonAliasMemUses[V].push_back(SU); 729 } else { 730 // A load with no underlying object. Depend on all 731 // potentially aliasing stores. 732 for (std::map<const Value *, SUnit *>::iterator I = 733 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) 734 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 735 736 PendingLoads.push_back(SU); 737 MayAlias = true; 738 } 739 740 // Add dependencies on alias and barrier chains, if needed. 741 if (MayAlias && AliasChain) 742 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 743 if (BarrierChain) 744 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 745 } 746 } 747 } 748 if (PrevMI) 749 FirstDbgValue = PrevMI; 750 751 Defs.clear(); 752 Uses.clear(); 753 VRegDefs.clear(); 754 PendingLoads.clear(); 755 } 756 757 void ScheduleDAGInstrs::computeLatency(SUnit *SU) { 758 // Compute the latency for the node. 759 if (!InstrItins || InstrItins->isEmpty()) { 760 SU->Latency = 1; 761 762 // Simplistic target-independent heuristic: assume that loads take 763 // extra time. 764 if (SU->getInstr()->mayLoad()) 765 SU->Latency += 2; 766 } else { 767 SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); 768 } 769 } 770 771 void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, 772 SDep& dep) const { 773 if (!InstrItins || InstrItins->isEmpty()) 774 return; 775 776 // For a data dependency with a known register... 777 if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) 778 return; 779 780 const unsigned Reg = dep.getReg(); 781 782 // ... find the definition of the register in the defining 783 // instruction 784 MachineInstr *DefMI = Def->getInstr(); 785 int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); 786 if (DefIdx != -1) { 787 const MachineOperand &MO = DefMI->getOperand(DefIdx); 788 if (MO.isReg() && MO.isImplicit() && 789 DefIdx >= (int)DefMI->getDesc().getNumOperands()) { 790 // This is an implicit def, getOperandLatency() won't return the correct 791 // latency. e.g. 792 // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> 793 // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... 794 // What we want is to compute latency between def of %D6/%D7 and use of 795 // %Q3 instead. 796 unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); 797 if (DefMI->getOperand(Op2).isReg()) 798 DefIdx = Op2; 799 } 800 MachineInstr *UseMI = Use->getInstr(); 801 // For all uses of the register, calculate the maxmimum latency 802 int Latency = -1; 803 if (UseMI) { 804 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 805 const MachineOperand &MO = UseMI->getOperand(i); 806 if (!MO.isReg() || !MO.isUse()) 807 continue; 808 unsigned MOReg = MO.getReg(); 809 if (MOReg != Reg) 810 continue; 811 812 int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, 813 UseMI, i); 814 Latency = std::max(Latency, UseCycle); 815 } 816 } else { 817 // UseMI is null, then it must be a scheduling barrier. 818 if (!InstrItins || InstrItins->isEmpty()) 819 return; 820 unsigned DefClass = DefMI->getDesc().getSchedClass(); 821 Latency = InstrItins->getOperandCycle(DefClass, DefIdx); 822 } 823 824 // If we found a latency, then replace the existing dependence latency. 825 if (Latency >= 0) 826 dep.setLatency(Latency); 827 } 828 } 829 830 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 831 SU->getInstr()->dump(); 832 } 833 834 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 835 std::string s; 836 raw_string_ostream oss(s); 837 if (SU == &EntrySU) 838 oss << "<entry>"; 839 else if (SU == &ExitSU) 840 oss << "<exit>"; 841 else 842 SU->getInstr()->print(oss); 843 return oss.str(); 844 } 845 846 /// Return the basic block label. It is not necessarilly unique because a block 847 /// contains multiple scheduling regions. But it is fine for visualization. 848 std::string ScheduleDAGInstrs::getDAGName() const { 849 return "dag." + BB->getFullName(); 850 } 851