1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This implements the ScheduleDAGInstrs class, which implements re-scheduling 11 // of MachineInstrs. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "sched-instrs" 16 #include "llvm/Operator.h" 17 #include "llvm/Analysis/AliasAnalysis.h" 18 #include "llvm/Analysis/ValueTracking.h" 19 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 20 #include "llvm/CodeGen/MachineFunctionPass.h" 21 #include "llvm/CodeGen/MachineMemOperand.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/PseudoSourceValue.h" 24 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 25 #include "llvm/MC/MCInstrItineraries.h" 26 #include "llvm/Target/TargetMachine.h" 27 #include "llvm/Target/TargetInstrInfo.h" 28 #include "llvm/Target/TargetRegisterInfo.h" 29 #include "llvm/Target/TargetSubtargetInfo.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/raw_ostream.h" 32 #include "llvm/ADT/SmallSet.h" 33 using namespace llvm; 34 35 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 36 const MachineLoopInfo &mli, 37 const MachineDominatorTree &mdt, 38 bool IsPostRAFlag, 39 LiveIntervals *lis) 40 : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), 41 InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), 42 IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT), 43 FirstDbgValue(0) { 44 assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); 45 DbgValues.clear(); 46 assert(!(IsPostRA && MRI.getNumVirtRegs()) && 47 "Virtual registers must be removed prior to PostRA scheduling"); 48 } 49 50 /// getUnderlyingObjectFromInt - This is the function that does the work of 51 /// looking through basic ptrtoint+arithmetic+inttoptr sequences. 52 static const Value *getUnderlyingObjectFromInt(const Value *V) { 53 do { 54 if (const Operator *U = dyn_cast<Operator>(V)) { 55 // If we find a ptrtoint, we can transfer control back to the 56 // regular getUnderlyingObjectFromInt. 57 if (U->getOpcode() == Instruction::PtrToInt) 58 return U->getOperand(0); 59 // If we find an add of a constant or a multiplied value, it's 60 // likely that the other operand will lead us to the base 61 // object. We don't have to worry about the case where the 62 // object address is somehow being computed by the multiply, 63 // because our callers only care when the result is an 64 // identifibale object. 65 if (U->getOpcode() != Instruction::Add || 66 (!isa<ConstantInt>(U->getOperand(1)) && 67 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) 68 return V; 69 V = U->getOperand(0); 70 } else { 71 return V; 72 } 73 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 74 } while (1); 75 } 76 77 /// getUnderlyingObject - This is a wrapper around GetUnderlyingObject 78 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 79 static const Value *getUnderlyingObject(const Value *V) { 80 // First just call Value::getUnderlyingObject to let it do what it does. 81 do { 82 V = GetUnderlyingObject(V); 83 // If it found an inttoptr, use special code to continue climing. 84 if (Operator::getOpcode(V) != Instruction::IntToPtr) 85 break; 86 const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 87 // If that succeeded in finding a pointer, continue the search. 88 if (!O->getType()->isPointerTy()) 89 break; 90 V = O; 91 } while (1); 92 return V; 93 } 94 95 /// getUnderlyingObjectForInstr - If this machine instr has memory reference 96 /// information and it can be tracked to a normal reference to a known 97 /// object, return the Value for that object. Otherwise return null. 98 static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, 99 const MachineFrameInfo *MFI, 100 bool &MayAlias) { 101 MayAlias = true; 102 if (!MI->hasOneMemOperand() || 103 !(*MI->memoperands_begin())->getValue() || 104 (*MI->memoperands_begin())->isVolatile()) 105 return 0; 106 107 const Value *V = (*MI->memoperands_begin())->getValue(); 108 if (!V) 109 return 0; 110 111 V = getUnderlyingObject(V); 112 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 113 // For now, ignore PseudoSourceValues which may alias LLVM IR values 114 // because the code that uses this function has no way to cope with 115 // such aliases. 116 if (PSV->isAliased(MFI)) 117 return 0; 118 119 MayAlias = PSV->mayAlias(MFI); 120 return V; 121 } 122 123 if (isIdentifiedObject(V)) 124 return V; 125 126 return 0; 127 } 128 129 void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) { 130 LoopRegs.Deps.clear(); 131 if (MachineLoop *ML = MLI.getLoopFor(BB)) 132 if (BB == ML->getLoopLatch()) 133 LoopRegs.VisitLoop(ML); 134 } 135 136 void ScheduleDAGInstrs::finishBlock() { 137 // Nothing to do. 138 } 139 140 /// Initialize the map with the number of registers. 141 void Reg2SUnitsMap::setRegLimit(unsigned Limit) { 142 PhysRegSet.setUniverse(Limit); 143 SUnits.resize(Limit); 144 } 145 146 /// Clear the map without deallocating storage. 147 void Reg2SUnitsMap::clear() { 148 for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { 149 SUnits[*I].clear(); 150 } 151 PhysRegSet.clear(); 152 } 153 154 /// Initialize the DAG and common scheduler state for the current scheduling 155 /// region. This does not actually create the DAG, only clears it. The 156 /// scheduling driver may call BuildSchedGraph multiple times per scheduling 157 /// region. 158 void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, 159 MachineBasicBlock::iterator begin, 160 MachineBasicBlock::iterator end, 161 unsigned endcount) { 162 BB = bb; 163 RegionBegin = begin; 164 RegionEnd = end; 165 EndIndex = endcount; 166 MISUnitMap.clear(); 167 168 // Check to see if the scheduler cares about latencies. 169 UnitLatencies = forceUnitLatencies(); 170 171 ScheduleDAG::clearDAG(); 172 } 173 174 /// Close the current scheduling region. Don't clear any state in case the 175 /// driver wants to refer to the previous scheduling region. 176 void ScheduleDAGInstrs::exitRegion() { 177 // Nothing to do. 178 } 179 180 /// addSchedBarrierDeps - Add dependencies from instructions in the current 181 /// list of instructions being scheduled to scheduling barrier by adding 182 /// the exit SU to the register defs and use list. This is because we want to 183 /// make sure instructions which define registers that are either used by 184 /// the terminator or are live-out are properly scheduled. This is 185 /// especially important when the definition latency of the return value(s) 186 /// are too high to be hidden by the branch or when the liveout registers 187 /// used by instructions in the fallthrough block. 188 void ScheduleDAGInstrs::addSchedBarrierDeps() { 189 MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; 190 ExitSU.setInstr(ExitMI); 191 bool AllDepKnown = ExitMI && 192 (ExitMI->isCall() || ExitMI->isBarrier()); 193 if (ExitMI && AllDepKnown) { 194 // If it's a call or a barrier, add dependencies on the defs and uses of 195 // instruction. 196 for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { 197 const MachineOperand &MO = ExitMI->getOperand(i); 198 if (!MO.isReg() || MO.isDef()) continue; 199 unsigned Reg = MO.getReg(); 200 if (Reg == 0) continue; 201 202 if (TRI->isPhysicalRegister(Reg)) 203 Uses[Reg].push_back(&ExitSU); 204 else 205 assert(!IsPostRA && "Virtual register encountered after regalloc."); 206 } 207 } else { 208 // For others, e.g. fallthrough, conditional branch, assume the exit 209 // uses all the registers that are livein to the successor blocks. 210 SmallSet<unsigned, 8> Seen; 211 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 212 SE = BB->succ_end(); SI != SE; ++SI) 213 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 214 E = (*SI)->livein_end(); I != E; ++I) { 215 unsigned Reg = *I; 216 if (Seen.insert(Reg)) 217 Uses[Reg].push_back(&ExitSU); 218 } 219 } 220 } 221 222 /// MO is an operand of SU's instruction that defines a physical register. Add 223 /// data dependencies from SU to any uses of the physical register. 224 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, 225 const MachineOperand &MO) { 226 assert(MO.isDef() && "expect physreg def"); 227 228 // Ask the target if address-backscheduling is desirable, and if so how much. 229 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 230 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 231 unsigned DataLatency = SU->Latency; 232 233 for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 234 if (!Uses.contains(*Alias)) 235 continue; 236 std::vector<SUnit*> &UseList = Uses[*Alias]; 237 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 238 SUnit *UseSU = UseList[i]; 239 if (UseSU == SU) 240 continue; 241 unsigned LDataLatency = DataLatency; 242 // Optionally add in a special extra latency for nodes that 243 // feed addresses. 244 // TODO: Perhaps we should get rid of 245 // SpecialAddressLatency and just move this into 246 // adjustSchedDependency for the targets that care about it. 247 if (SpecialAddressLatency != 0 && !UnitLatencies && 248 UseSU != &ExitSU) { 249 MachineInstr *UseMI = UseSU->getInstr(); 250 const MCInstrDesc &UseMCID = UseMI->getDesc(); 251 int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); 252 assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); 253 if (RegUseIndex >= 0 && 254 (UseMI->mayLoad() || UseMI->mayStore()) && 255 (unsigned)RegUseIndex < UseMCID.getNumOperands() && 256 UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) 257 LDataLatency += SpecialAddressLatency; 258 } 259 // Adjust the dependence latency using operand def/use 260 // information (if any), and then allow the target to 261 // perform its own adjustments. 262 const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); 263 if (!UnitLatencies) { 264 computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); 265 ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); 266 } 267 UseSU->addPred(dep); 268 } 269 } 270 } 271 272 /// addPhysRegDeps - Add register dependencies (data, anti, and output) from 273 /// this SUnit to following instructions in the same scheduling region that 274 /// depend the physical register referenced at OperIdx. 275 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { 276 const MachineInstr *MI = SU->getInstr(); 277 const MachineOperand &MO = MI->getOperand(OperIdx); 278 279 // Optionally add output and anti dependencies. For anti 280 // dependencies we use a latency of 0 because for a multi-issue 281 // target we want to allow the defining instruction to issue 282 // in the same cycle as the using instruction. 283 // TODO: Using a latency of 1 here for output dependencies assumes 284 // there's no cost for reusing registers. 285 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 286 for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 287 if (!Defs.contains(*Alias)) 288 continue; 289 std::vector<SUnit *> &DefList = Defs[*Alias]; 290 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 291 SUnit *DefSU = DefList[i]; 292 if (DefSU == &ExitSU) 293 continue; 294 if (DefSU != SU && 295 (Kind != SDep::Output || !MO.isDead() || 296 !DefSU->getInstr()->registerDefIsDead(*Alias))) { 297 if (Kind == SDep::Anti) 298 DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); 299 else { 300 unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, 301 DefSU->getInstr()); 302 DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); 303 } 304 } 305 } 306 } 307 308 if (!MO.isDef()) { 309 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 310 // retrieve the existing SUnits list for this register's uses. 311 // Push this SUnit on the use list. 312 Uses[MO.getReg()].push_back(SU); 313 } 314 else { 315 addPhysRegDataDeps(SU, MO); 316 317 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 318 // retrieve the existing SUnits list for this register's defs. 319 std::vector<SUnit *> &DefList = Defs[MO.getReg()]; 320 321 // If a def is going to wrap back around to the top of the loop, 322 // backschedule it. 323 if (!UnitLatencies && DefList.empty()) { 324 LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg()); 325 if (I != LoopRegs.Deps.end()) { 326 const MachineOperand *UseMO = I->second.first; 327 unsigned Count = I->second.second; 328 const MachineInstr *UseMI = UseMO->getParent(); 329 unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); 330 const MCInstrDesc &UseMCID = UseMI->getDesc(); 331 const TargetSubtargetInfo &ST = 332 TM.getSubtarget<TargetSubtargetInfo>(); 333 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 334 // TODO: If we knew the total depth of the region here, we could 335 // handle the case where the whole loop is inside the region but 336 // is large enough that the isScheduleHigh trick isn't needed. 337 if (UseMOIdx < UseMCID.getNumOperands()) { 338 // Currently, we only support scheduling regions consisting of 339 // single basic blocks. Check to see if the instruction is in 340 // the same region by checking to see if it has the same parent. 341 if (UseMI->getParent() != MI->getParent()) { 342 unsigned Latency = SU->Latency; 343 if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) 344 Latency += SpecialAddressLatency; 345 // This is a wild guess as to the portion of the latency which 346 // will be overlapped by work done outside the current 347 // scheduling region. 348 Latency -= std::min(Latency, Count); 349 // Add the artificial edge. 350 ExitSU.addPred(SDep(SU, SDep::Order, Latency, 351 /*Reg=*/0, /*isNormalMemory=*/false, 352 /*isMustAlias=*/false, 353 /*isArtificial=*/true)); 354 } else if (SpecialAddressLatency > 0 && 355 UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { 356 // The entire loop body is within the current scheduling region 357 // and the latency of this operation is assumed to be greater 358 // than the latency of the loop. 359 // TODO: Recursively mark data-edge predecessors as 360 // isScheduleHigh too. 361 SU->isScheduleHigh = true; 362 } 363 } 364 LoopRegs.Deps.erase(I); 365 } 366 } 367 368 // clear this register's use list 369 if (Uses.contains(MO.getReg())) 370 Uses[MO.getReg()].clear(); 371 372 if (!MO.isDead()) 373 DefList.clear(); 374 375 // Calls will not be reordered because of chain dependencies (see 376 // below). Since call operands are dead, calls may continue to be added 377 // to the DefList making dependence checking quadratic in the size of 378 // the block. Instead, we leave only one call at the back of the 379 // DefList. 380 if (SU->isCall) { 381 while (!DefList.empty() && DefList.back()->isCall) 382 DefList.pop_back(); 383 } 384 // Defs are pushed in the order they are visited and never reordered. 385 DefList.push_back(SU); 386 } 387 } 388 389 /// addVRegDefDeps - Add register output and data dependencies from this SUnit 390 /// to instructions that occur later in the same scheduling region if they read 391 /// from or write to the virtual register defined at OperIdx. 392 /// 393 /// TODO: Hoist loop induction variable increments. This has to be 394 /// reevaluated. Generally, IV scheduling should be done before coalescing. 395 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { 396 const MachineInstr *MI = SU->getInstr(); 397 unsigned Reg = MI->getOperand(OperIdx).getReg(); 398 399 // SSA defs do not have output/anti dependencies. 400 // The current operand is a def, so we have at least one. 401 if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) 402 return; 403 404 // Add output dependence to the next nearest def of this vreg. 405 // 406 // Unless this definition is dead, the output dependence should be 407 // transitively redundant with antidependencies from this definition's 408 // uses. We're conservative for now until we have a way to guarantee the uses 409 // are not eliminated sometime during scheduling. The output dependence edge 410 // is also useful if output latency exceeds def-use latency. 411 VReg2SUnitMap::iterator DefI = findVRegDef(Reg); 412 if (DefI == VRegDefs.end()) 413 VRegDefs.insert(VReg2SUnit(Reg, SU)); 414 else { 415 SUnit *DefSU = DefI->SU; 416 if (DefSU != SU && DefSU != &ExitSU) { 417 unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, 418 DefSU->getInstr()); 419 DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); 420 } 421 DefI->SU = SU; 422 } 423 } 424 425 /// addVRegUseDeps - Add a register data dependency if the instruction that 426 /// defines the virtual register used at OperIdx is mapped to an SUnit. Add a 427 /// register antidependency from this SUnit to instructions that occur later in 428 /// the same scheduling region if they write the virtual register. 429 /// 430 /// TODO: Handle ExitSU "uses" properly. 431 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { 432 MachineInstr *MI = SU->getInstr(); 433 unsigned Reg = MI->getOperand(OperIdx).getReg(); 434 435 // Lookup this operand's reaching definition. 436 assert(LIS && "vreg dependencies requires LiveIntervals"); 437 SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(); 438 LiveInterval *LI = &LIS->getInterval(Reg); 439 VNInfo *VNI = LI->getVNInfoBefore(UseIdx); 440 // VNI will be valid because MachineOperand::readsReg() is checked by caller. 441 MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); 442 // Phis and other noninstructions (after coalescing) have a NULL Def. 443 if (Def) { 444 SUnit *DefSU = getSUnit(Def); 445 if (DefSU) { 446 // The reaching Def lives within this scheduling region. 447 // Create a data dependence. 448 // 449 // TODO: Handle "special" address latencies cleanly. 450 const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); 451 if (!UnitLatencies) { 452 // Adjust the dependence latency using operand def/use information, then 453 // allow the target to perform its own adjustments. 454 computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); 455 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 456 ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); 457 } 458 SU->addPred(dep); 459 } 460 } 461 462 // Add antidependence to the following def of the vreg it uses. 463 VReg2SUnitMap::iterator DefI = findVRegDef(Reg); 464 if (DefI != VRegDefs.end() && DefI->SU != SU) 465 DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); 466 } 467 468 /// Create an SUnit for each real instruction, numbered in top-down toplological 469 /// order. The instruction order A < B, implies that no edge exists from B to A. 470 /// 471 /// Map each real instruction to its SUnit. 472 /// 473 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may 474 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs 475 /// instead of pointers. 476 /// 477 /// MachineScheduler relies on initSUnits numbering the nodes by their order in 478 /// the original instruction list. 479 void ScheduleDAGInstrs::initSUnits() { 480 // We'll be allocating one SUnit for each real instruction in the region, 481 // which is contained within a basic block. 482 SUnits.reserve(BB->size()); 483 484 for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { 485 MachineInstr *MI = I; 486 if (MI->isDebugValue()) 487 continue; 488 489 SUnit *SU = newSUnit(MI); 490 MISUnitMap[MI] = SU; 491 492 SU->isCall = MI->isCall(); 493 SU->isCommutable = MI->isCommutable(); 494 495 // Assign the Latency field of SU using target-provided information. 496 if (UnitLatencies) 497 SU->Latency = 1; 498 else 499 computeLatency(SU); 500 } 501 } 502 503 void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { 504 // Create an SUnit for each real instruction. 505 initSUnits(); 506 507 // We build scheduling units by walking a block's instruction list from bottom 508 // to top. 509 510 // Remember where a generic side-effecting instruction is as we procede. 511 SUnit *BarrierChain = 0, *AliasChain = 0; 512 513 // Memory references to specific known memory locations are tracked 514 // so that they can be given more precise dependencies. We track 515 // separately the known memory locations that may alias and those 516 // that are known not to alias 517 std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; 518 std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; 519 520 // Remove any stale debug info; sometimes BuildSchedGraph is called again 521 // without emitting the info from the previous call. 522 DbgValues.clear(); 523 FirstDbgValue = NULL; 524 525 assert(Defs.empty() && Uses.empty() && 526 "Only BuildGraph should update Defs/Uses"); 527 Defs.setRegLimit(TRI->getNumRegs()); 528 Uses.setRegLimit(TRI->getNumRegs()); 529 530 assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); 531 // FIXME: Allow SparseSet to reserve space for the creation of virtual 532 // registers during scheduling. Don't artificially inflate the Universe 533 // because we want to assert that vregs are not created during DAG building. 534 VRegDefs.setUniverse(MRI.getNumVirtRegs()); 535 536 // Model data dependencies between instructions being scheduled and the 537 // ExitSU. 538 addSchedBarrierDeps(); 539 540 // Walk the list of instructions, from bottom moving up. 541 MachineInstr *PrevMI = NULL; 542 for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; 543 MII != MIE; --MII) { 544 MachineInstr *MI = prior(MII); 545 if (MI && PrevMI) { 546 DbgValues.push_back(std::make_pair(PrevMI, MI)); 547 PrevMI = NULL; 548 } 549 550 if (MI->isDebugValue()) { 551 PrevMI = MI; 552 continue; 553 } 554 555 assert(!MI->isTerminator() && !MI->isLabel() && 556 "Cannot schedule terminators or labels!"); 557 558 SUnit *SU = MISUnitMap[MI]; 559 assert(SU && "No SUnit mapped to this MI"); 560 561 // Add register-based dependencies (data, anti, and output). 562 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 563 const MachineOperand &MO = MI->getOperand(j); 564 if (!MO.isReg()) continue; 565 unsigned Reg = MO.getReg(); 566 if (Reg == 0) continue; 567 568 if (TRI->isPhysicalRegister(Reg)) 569 addPhysRegDeps(SU, j); 570 else { 571 assert(!IsPostRA && "Virtual register encountered!"); 572 if (MO.isDef()) 573 addVRegDefDeps(SU, j); 574 else if (MO.readsReg()) // ignore undef operands 575 addVRegUseDeps(SU, j); 576 } 577 } 578 579 // Add chain dependencies. 580 // Chain dependencies used to enforce memory order should have 581 // latency of 0 (except for true dependency of Store followed by 582 // aliased Load... we estimate that with a single cycle of latency 583 // assuming the hardware will bypass) 584 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 585 // after stack slots are lowered to actual addresses. 586 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 587 // produce more precise dependence information. 588 #define STORE_LOAD_LATENCY 1 589 unsigned TrueMemOrderLatency = 0; 590 if (MI->isCall() || MI->hasUnmodeledSideEffects() || 591 (MI->hasVolatileMemoryRef() && 592 (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { 593 // Be conservative with these and add dependencies on all memory 594 // references, even those that are known to not alias. 595 for (std::map<const Value *, SUnit *>::iterator I = 596 NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { 597 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 598 } 599 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 600 NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { 601 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 602 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 603 } 604 NonAliasMemDefs.clear(); 605 NonAliasMemUses.clear(); 606 // Add SU to the barrier chain. 607 if (BarrierChain) 608 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 609 BarrierChain = SU; 610 611 // fall-through 612 new_alias_chain: 613 // Chain all possibly aliasing memory references though SU. 614 if (AliasChain) 615 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 616 AliasChain = SU; 617 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 618 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 619 for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), 620 E = AliasMemDefs.end(); I != E; ++I) { 621 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 622 } 623 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 624 AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { 625 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 626 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 627 } 628 PendingLoads.clear(); 629 AliasMemDefs.clear(); 630 AliasMemUses.clear(); 631 } else if (MI->mayStore()) { 632 bool MayAlias = true; 633 TrueMemOrderLatency = STORE_LOAD_LATENCY; 634 if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 635 // A store to a specific PseudoSourceValue. Add precise dependencies. 636 // Record the def in MemDefs, first adding a dep if there is 637 // an existing def. 638 std::map<const Value *, SUnit *>::iterator I = 639 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 640 std::map<const Value *, SUnit *>::iterator IE = 641 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 642 if (I != IE) { 643 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 644 /*isNormalMemory=*/true)); 645 I->second = SU; 646 } else { 647 if (MayAlias) 648 AliasMemDefs[V] = SU; 649 else 650 NonAliasMemDefs[V] = SU; 651 } 652 // Handle the uses in MemUses, if there are any. 653 std::map<const Value *, std::vector<SUnit *> >::iterator J = 654 ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); 655 std::map<const Value *, std::vector<SUnit *> >::iterator JE = 656 ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); 657 if (J != JE) { 658 for (unsigned i = 0, e = J->second.size(); i != e; ++i) 659 J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, 660 /*Reg=*/0, /*isNormalMemory=*/true)); 661 J->second.clear(); 662 } 663 if (MayAlias) { 664 // Add dependencies from all the PendingLoads, i.e. loads 665 // with no underlying object. 666 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 667 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 668 // Add dependence on alias chain, if needed. 669 if (AliasChain) 670 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 671 } 672 // Add dependence on barrier chain, if needed. 673 if (BarrierChain) 674 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 675 } else { 676 // Treat all other stores conservatively. 677 goto new_alias_chain; 678 } 679 680 if (!ExitSU.isPred(SU)) 681 // Push store's up a bit to avoid them getting in between cmp 682 // and branches. 683 ExitSU.addPred(SDep(SU, SDep::Order, 0, 684 /*Reg=*/0, /*isNormalMemory=*/false, 685 /*isMustAlias=*/false, 686 /*isArtificial=*/true)); 687 } else if (MI->mayLoad()) { 688 bool MayAlias = true; 689 TrueMemOrderLatency = 0; 690 if (MI->isInvariantLoad(AA)) { 691 // Invariant load, no chain dependencies needed! 692 } else { 693 if (const Value *V = 694 getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 695 // A load from a specific PseudoSourceValue. Add precise dependencies. 696 std::map<const Value *, SUnit *>::iterator I = 697 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 698 std::map<const Value *, SUnit *>::iterator IE = 699 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 700 if (I != IE) 701 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 702 /*isNormalMemory=*/true)); 703 if (MayAlias) 704 AliasMemUses[V].push_back(SU); 705 else 706 NonAliasMemUses[V].push_back(SU); 707 } else { 708 // A load with no underlying object. Depend on all 709 // potentially aliasing stores. 710 for (std::map<const Value *, SUnit *>::iterator I = 711 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) 712 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 713 714 PendingLoads.push_back(SU); 715 MayAlias = true; 716 } 717 718 // Add dependencies on alias and barrier chains, if needed. 719 if (MayAlias && AliasChain) 720 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 721 if (BarrierChain) 722 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 723 } 724 } 725 } 726 if (PrevMI) 727 FirstDbgValue = PrevMI; 728 729 Defs.clear(); 730 Uses.clear(); 731 VRegDefs.clear(); 732 PendingLoads.clear(); 733 } 734 735 void ScheduleDAGInstrs::computeLatency(SUnit *SU) { 736 // Compute the latency for the node. 737 if (!InstrItins || InstrItins->isEmpty()) { 738 SU->Latency = 1; 739 740 // Simplistic target-independent heuristic: assume that loads take 741 // extra time. 742 if (SU->getInstr()->mayLoad()) 743 SU->Latency += 2; 744 } else { 745 SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); 746 } 747 } 748 749 void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, 750 SDep& dep) const { 751 if (!InstrItins || InstrItins->isEmpty()) 752 return; 753 754 // For a data dependency with a known register... 755 if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) 756 return; 757 758 const unsigned Reg = dep.getReg(); 759 760 // ... find the definition of the register in the defining 761 // instruction 762 MachineInstr *DefMI = Def->getInstr(); 763 int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); 764 if (DefIdx != -1) { 765 const MachineOperand &MO = DefMI->getOperand(DefIdx); 766 if (MO.isReg() && MO.isImplicit() && 767 DefIdx >= (int)DefMI->getDesc().getNumOperands()) { 768 // This is an implicit def, getOperandLatency() won't return the correct 769 // latency. e.g. 770 // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> 771 // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... 772 // What we want is to compute latency between def of %D6/%D7 and use of 773 // %Q3 instead. 774 unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); 775 if (DefMI->getOperand(Op2).isReg()) 776 DefIdx = Op2; 777 } 778 MachineInstr *UseMI = Use->getInstr(); 779 // For all uses of the register, calculate the maxmimum latency 780 int Latency = -1; 781 if (UseMI) { 782 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 783 const MachineOperand &MO = UseMI->getOperand(i); 784 if (!MO.isReg() || !MO.isUse()) 785 continue; 786 unsigned MOReg = MO.getReg(); 787 if (MOReg != Reg) 788 continue; 789 790 int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, 791 UseMI, i); 792 Latency = std::max(Latency, UseCycle); 793 } 794 } else { 795 // UseMI is null, then it must be a scheduling barrier. 796 if (!InstrItins || InstrItins->isEmpty()) 797 return; 798 unsigned DefClass = DefMI->getDesc().getSchedClass(); 799 Latency = InstrItins->getOperandCycle(DefClass, DefIdx); 800 } 801 802 // If we found a latency, then replace the existing dependence latency. 803 if (Latency >= 0) 804 dep.setLatency(Latency); 805 } 806 } 807 808 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 809 SU->getInstr()->dump(); 810 } 811 812 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 813 std::string s; 814 raw_string_ostream oss(s); 815 if (SU == &EntrySU) 816 oss << "<entry>"; 817 else if (SU == &ExitSU) 818 oss << "<exit>"; 819 else 820 SU->getInstr()->print(oss); 821 return oss.str(); 822 } 823 824 /// Return the basic block label. It is not necessarilly unique because a block 825 /// contains multiple scheduling regions. But it is fine for visualization. 826 std::string ScheduleDAGInstrs::getDAGName() const { 827 return "dag." + BB->getFullName(); 828 } 829