1 //===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file provides an interface for customizing the standard MachineScheduler 11 // pass. Note that the entire pass may be replaced as follows: 12 // 13 // <Target>TargetMachine::createPassConfig(PassManagerBase &PM) { 14 // PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID); 15 // ...} 16 // 17 // The MachineScheduler pass is only responsible for choosing the regions to be 18 // scheduled. Targets can override the DAG builder and scheduler without 19 // replacing the pass as follows: 20 // 21 // ScheduleDAGInstrs *<Target>PassConfig:: 22 // createMachineScheduler(MachineSchedContext *C) { 23 // return new CustomMachineScheduler(C); 24 // } 25 // 26 // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list 27 // scheduling while updating the instruction stream, register pressure, and live 28 // intervals. Most targets don't need to override the DAG builder and list 29 // scheduler, but subtargets that require custom scheduling heuristics may 30 // plugin an alternate MachineSchedStrategy. The strategy is responsible for 31 // selecting the highest priority node from the list: 32 // 33 // ScheduleDAGInstrs *<Target>PassConfig:: 34 // createMachineScheduler(MachineSchedContext *C) { 35 // return new ScheduleDAGMILive(C, CustomStrategy(C)); 36 // } 37 // 38 // The DAG builder can also be customized in a sense by adding DAG mutations 39 // that will run after DAG building and before list scheduling. DAG mutations 40 // can adjust dependencies based on target-specific knowledge or add weak edges 41 // to aid heuristics: 42 // 43 // ScheduleDAGInstrs *<Target>PassConfig:: 44 // createMachineScheduler(MachineSchedContext *C) { 45 // ScheduleDAGMI *DAG = createGenericSchedLive(C); 46 // DAG->addMutation(new CustomDAGMutation(...)); 47 // return DAG; 48 // } 49 // 50 // A target that supports alternative schedulers can use the 51 // MachineSchedRegistry to allow command line selection. This can be done by 52 // implementing the following boilerplate: 53 // 54 // static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) { 55 // return new CustomMachineScheduler(C); 56 // } 57 // static MachineSchedRegistry 58 // SchedCustomRegistry("custom", "Run my target's custom scheduler", 59 // createCustomMachineSched); 60 // 61 // 62 // Finally, subtargets that don't need to implement custom heuristics but would 63 // like to configure the GenericScheduler's policy for a given scheduler region, 64 // including scheduling direction and register pressure tracking policy, can do 65 // this: 66 // 67 // void <SubTarget>Subtarget:: 68 // overrideSchedPolicy(MachineSchedPolicy &Policy, 69 // unsigned NumRegionInstrs) const { 70 // Policy.<Flag> = true; 71 // } 72 // 73 //===----------------------------------------------------------------------===// 74 75 #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H 76 #define LLVM_CODEGEN_MACHINESCHEDULER_H 77 78 #include "llvm/ADT/ArrayRef.h" 79 #include "llvm/ADT/BitVector.h" 80 #include "llvm/ADT/STLExtras.h" 81 #include "llvm/ADT/SmallVector.h" 82 #include "llvm/ADT/StringRef.h" 83 #include "llvm/ADT/Twine.h" 84 #include "llvm/Analysis/AliasAnalysis.h" 85 #include "llvm/CodeGen/MachineBasicBlock.h" 86 #include "llvm/CodeGen/MachinePassRegistry.h" 87 #include "llvm/CodeGen/RegisterPressure.h" 88 #include "llvm/CodeGen/ScheduleDAG.h" 89 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 90 #include "llvm/CodeGen/ScheduleDAGMutation.h" 91 #include "llvm/CodeGen/TargetSchedule.h" 92 #include "llvm/Support/CommandLine.h" 93 #include "llvm/Support/ErrorHandling.h" 94 #include <algorithm> 95 #include <cassert> 96 #include <memory> 97 #include <string> 98 #include <vector> 99 100 namespace llvm { 101 102 extern cl::opt<bool> ForceTopDown; 103 extern cl::opt<bool> ForceBottomUp; 104 105 class LiveIntervals; 106 class MachineDominatorTree; 107 class MachineFunction; 108 class MachineInstr; 109 class MachineLoopInfo; 110 class RegisterClassInfo; 111 class SchedDFSResult; 112 class ScheduleHazardRecognizer; 113 class TargetInstrInfo; 114 class TargetPassConfig; 115 class TargetRegisterInfo; 116 117 /// MachineSchedContext provides enough context from the MachineScheduler pass 118 /// for the target to instantiate a scheduler. 119 struct MachineSchedContext { 120 MachineFunction *MF = nullptr; 121 const MachineLoopInfo *MLI = nullptr; 122 const MachineDominatorTree *MDT = nullptr; 123 const TargetPassConfig *PassConfig = nullptr; 124 AliasAnalysis *AA = nullptr; 125 LiveIntervals *LIS = nullptr; 126 127 RegisterClassInfo *RegClassInfo; 128 129 MachineSchedContext(); 130 virtual ~MachineSchedContext(); 131 }; 132 133 /// MachineSchedRegistry provides a selection of available machine instruction 134 /// schedulers. 135 class MachineSchedRegistry 136 : public MachinePassRegistryNode< 137 ScheduleDAGInstrs *(*)(MachineSchedContext *)> { 138 public: 139 using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *); 140 141 // RegisterPassParser requires a (misnamed) FunctionPassCtor type. 142 using FunctionPassCtor = ScheduleDAGCtor; 143 144 static MachinePassRegistry<ScheduleDAGCtor> Registry; 145 MachineSchedRegistry(const char * N,const char * D,ScheduleDAGCtor C)146 MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C) 147 : MachinePassRegistryNode(N, D, C) { 148 Registry.Add(this); 149 } 150 ~MachineSchedRegistry()151 ~MachineSchedRegistry() { Registry.Remove(this); } 152 153 // Accessors. 154 // getNext()155 MachineSchedRegistry *getNext() const { 156 return (MachineSchedRegistry *)MachinePassRegistryNode::getNext(); 157 } 158 getList()159 static MachineSchedRegistry *getList() { 160 return (MachineSchedRegistry *)Registry.getList(); 161 } 162 setListener(MachinePassRegistryListener<FunctionPassCtor> * L)163 static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) { 164 Registry.setListener(L); 165 } 166 }; 167 168 class ScheduleDAGMI; 169 170 /// Define a generic scheduling policy for targets that don't provide their own 171 /// MachineSchedStrategy. This can be overriden for each scheduling region 172 /// before building the DAG. 173 struct MachineSchedPolicy { 174 // Allow the scheduler to disable register pressure tracking. 175 bool ShouldTrackPressure = false; 176 /// Track LaneMasks to allow reordering of independent subregister writes 177 /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks() 178 bool ShouldTrackLaneMasks = false; 179 180 // Allow the scheduler to force top-down or bottom-up scheduling. If neither 181 // is true, the scheduler runs in both directions and converges. 182 bool OnlyTopDown = false; 183 bool OnlyBottomUp = false; 184 185 // Disable heuristic that tries to fetch nodes from long dependency chains 186 // first. 187 bool DisableLatencyHeuristic = false; 188 189 MachineSchedPolicy() = default; 190 }; 191 192 /// MachineSchedStrategy - Interface to the scheduling algorithm used by 193 /// ScheduleDAGMI. 194 /// 195 /// Initialization sequence: 196 /// initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots 197 class MachineSchedStrategy { 198 virtual void anchor(); 199 200 public: 201 virtual ~MachineSchedStrategy() = default; 202 203 /// Optionally override the per-region scheduling policy. initPolicy(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)204 virtual void initPolicy(MachineBasicBlock::iterator Begin, 205 MachineBasicBlock::iterator End, 206 unsigned NumRegionInstrs) {} 207 dumpPolicy()208 virtual void dumpPolicy() const {} 209 210 /// Check if pressure tracking is needed before building the DAG and 211 /// initializing this strategy. Called after initPolicy. shouldTrackPressure()212 virtual bool shouldTrackPressure() const { return true; } 213 214 /// Returns true if lanemasks should be tracked. LaneMask tracking is 215 /// necessary to reorder independent subregister defs for the same vreg. 216 /// This has to be enabled in combination with shouldTrackPressure(). shouldTrackLaneMasks()217 virtual bool shouldTrackLaneMasks() const { return false; } 218 219 // If this method returns true, handling of the scheduling regions 220 // themselves (in case of a scheduling boundary in MBB) will be done 221 // beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()222 virtual bool doMBBSchedRegionsTopDown() const { return false; } 223 224 /// Initialize the strategy after building the DAG for a new region. 225 virtual void initialize(ScheduleDAGMI *DAG) = 0; 226 227 /// Tell the strategy that MBB is about to be processed. enterMBB(MachineBasicBlock * MBB)228 virtual void enterMBB(MachineBasicBlock *MBB) {}; 229 230 /// Tell the strategy that current MBB is done. leaveMBB()231 virtual void leaveMBB() {}; 232 233 /// Notify this strategy that all roots have been released (including those 234 /// that depend on EntrySU or ExitSU). registerRoots()235 virtual void registerRoots() {} 236 237 /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to 238 /// schedule the node at the top of the unscheduled region. Otherwise it will 239 /// be scheduled at the bottom. 240 virtual SUnit *pickNode(bool &IsTopNode) = 0; 241 242 /// Scheduler callback to notify that a new subtree is scheduled. scheduleTree(unsigned SubtreeID)243 virtual void scheduleTree(unsigned SubtreeID) {} 244 245 /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an 246 /// instruction and updated scheduled/remaining flags in the DAG nodes. 247 virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; 248 249 /// When all predecessor dependencies have been resolved, free this node for 250 /// top-down scheduling. 251 virtual void releaseTopNode(SUnit *SU) = 0; 252 253 /// When all successor dependencies have been resolved, free this node for 254 /// bottom-up scheduling. 255 virtual void releaseBottomNode(SUnit *SU) = 0; 256 }; 257 258 /// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply 259 /// schedules machine instructions according to the given MachineSchedStrategy 260 /// without much extra book-keeping. This is the common functionality between 261 /// PreRA and PostRA MachineScheduler. 262 class ScheduleDAGMI : public ScheduleDAGInstrs { 263 protected: 264 AliasAnalysis *AA; 265 LiveIntervals *LIS; 266 std::unique_ptr<MachineSchedStrategy> SchedImpl; 267 268 /// Topo - A topological ordering for SUnits which permits fast IsReachable 269 /// and similar queries. 270 ScheduleDAGTopologicalSort Topo; 271 272 /// Ordered list of DAG postprocessing steps. 273 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 274 275 /// The top of the unscheduled zone. 276 MachineBasicBlock::iterator CurrentTop; 277 278 /// The bottom of the unscheduled zone. 279 MachineBasicBlock::iterator CurrentBottom; 280 281 /// Record the next node in a scheduled cluster. 282 const SUnit *NextClusterPred = nullptr; 283 const SUnit *NextClusterSucc = nullptr; 284 285 #ifndef NDEBUG 286 /// The number of instructions scheduled so far. Used to cut off the 287 /// scheduler at the point determined by misched-cutoff. 288 unsigned NumInstrsScheduled = 0; 289 #endif 290 291 public: ScheduleDAGMI(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S,bool RemoveKillFlags)292 ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S, 293 bool RemoveKillFlags) 294 : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), 295 LIS(C->LIS), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU) {} 296 297 // Provide a vtable anchor 298 ~ScheduleDAGMI() override; 299 300 /// If this method returns true, handling of the scheduling regions 301 /// themselves (in case of a scheduling boundary in MBB) will be done 302 /// beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()303 bool doMBBSchedRegionsTopDown() const override { 304 return SchedImpl->doMBBSchedRegionsTopDown(); 305 } 306 307 // Returns LiveIntervals instance for use in DAG mutators and such. getLIS()308 LiveIntervals *getLIS() const { return LIS; } 309 310 /// Return true if this DAG supports VReg liveness and RegPressure. hasVRegLiveness()311 virtual bool hasVRegLiveness() const { return false; } 312 313 /// Add a postprocessing step to the DAG builder. 314 /// Mutations are applied in the order that they are added after normal DAG 315 /// building and before MachineSchedStrategy initialization. 316 /// 317 /// ScheduleDAGMI takes ownership of the Mutation object. addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)318 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 319 if (Mutation) 320 Mutations.push_back(std::move(Mutation)); 321 } 322 323 /// True if an edge can be added from PredSU to SuccSU without creating 324 /// a cycle. 325 bool canAddEdge(SUnit *SuccSU, SUnit *PredSU); 326 327 /// Add a DAG edge to the given SU with the given predecessor 328 /// dependence data. 329 /// 330 /// \returns true if the edge may be added without creating a cycle OR if an 331 /// equivalent edge already existed (false indicates failure). 332 bool addEdge(SUnit *SuccSU, const SDep &PredDep); 333 top()334 MachineBasicBlock::iterator top() const { return CurrentTop; } bottom()335 MachineBasicBlock::iterator bottom() const { return CurrentBottom; } 336 337 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling 338 /// region. This covers all instructions in a block, while schedule() may only 339 /// cover a subset. 340 void enterRegion(MachineBasicBlock *bb, 341 MachineBasicBlock::iterator begin, 342 MachineBasicBlock::iterator end, 343 unsigned regioninstrs) override; 344 345 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of 346 /// reorderable instructions. 347 void schedule() override; 348 349 void startBlock(MachineBasicBlock *bb) override; 350 void finishBlock() override; 351 352 /// Change the position of an instruction within the basic block and update 353 /// live ranges and region boundary iterators. 354 void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); 355 getNextClusterPred()356 const SUnit *getNextClusterPred() const { return NextClusterPred; } 357 getNextClusterSucc()358 const SUnit *getNextClusterSucc() const { return NextClusterSucc; } 359 360 void viewGraph(const Twine &Name, const Twine &Title) override; 361 void viewGraph() override; 362 363 protected: 364 // Top-Level entry points for the schedule() driver... 365 366 /// Apply each ScheduleDAGMutation step in order. This allows different 367 /// instances of ScheduleDAGMI to perform custom DAG postprocessing. 368 void postprocessDAG(); 369 370 /// Release ExitSU predecessors and setup scheduler queues. 371 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); 372 373 /// Update scheduler DAG and queues after scheduling an instruction. 374 void updateQueues(SUnit *SU, bool IsTopNode); 375 376 /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues. 377 void placeDebugValues(); 378 379 /// dump the scheduled Sequence. 380 void dumpSchedule() const; 381 382 // Lesser helpers... 383 bool checkSchedLimit(); 384 385 void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 386 SmallVectorImpl<SUnit*> &BotRoots); 387 388 void releaseSucc(SUnit *SU, SDep *SuccEdge); 389 void releaseSuccessors(SUnit *SU); 390 void releasePred(SUnit *SU, SDep *PredEdge); 391 void releasePredecessors(SUnit *SU); 392 }; 393 394 /// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules 395 /// machine instructions while updating LiveIntervals and tracking regpressure. 396 class ScheduleDAGMILive : public ScheduleDAGMI { 397 protected: 398 RegisterClassInfo *RegClassInfo; 399 400 /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees 401 /// will be empty. 402 SchedDFSResult *DFSResult = nullptr; 403 BitVector ScheduledTrees; 404 405 MachineBasicBlock::iterator LiveRegionEnd; 406 407 /// Maps vregs to the SUnits of their uses in the current scheduling region. 408 VReg2SUnitMultiMap VRegUses; 409 410 // Map each SU to its summary of pressure changes. This array is updated for 411 // liveness during bottom-up scheduling. Top-down scheduling may proceed but 412 // has no affect on the pressure diffs. 413 PressureDiffs SUPressureDiffs; 414 415 /// Register pressure in this region computed by initRegPressure. 416 bool ShouldTrackPressure = false; 417 bool ShouldTrackLaneMasks = false; 418 IntervalPressure RegPressure; 419 RegPressureTracker RPTracker; 420 421 /// List of pressure sets that exceed the target's pressure limit before 422 /// scheduling, listed in increasing set ID order. Each pressure set is paired 423 /// with its max pressure in the currently scheduled regions. 424 std::vector<PressureChange> RegionCriticalPSets; 425 426 /// The top of the unscheduled zone. 427 IntervalPressure TopPressure; 428 RegPressureTracker TopRPTracker; 429 430 /// The bottom of the unscheduled zone. 431 IntervalPressure BotPressure; 432 RegPressureTracker BotRPTracker; 433 434 /// True if disconnected subregister components are already renamed. 435 /// The renaming is only done on demand if lane masks are tracked. 436 bool DisconnectedComponentsRenamed = false; 437 438 public: ScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S)439 ScheduleDAGMILive(MachineSchedContext *C, 440 std::unique_ptr<MachineSchedStrategy> S) 441 : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false), 442 RegClassInfo(C->RegClassInfo), RPTracker(RegPressure), 443 TopRPTracker(TopPressure), BotRPTracker(BotPressure) {} 444 445 ~ScheduleDAGMILive() override; 446 447 /// Return true if this DAG supports VReg liveness and RegPressure. hasVRegLiveness()448 bool hasVRegLiveness() const override { return true; } 449 450 /// Return true if register pressure tracking is enabled. isTrackingPressure()451 bool isTrackingPressure() const { return ShouldTrackPressure; } 452 453 /// Get current register pressure for the top scheduled instructions. getTopPressure()454 const IntervalPressure &getTopPressure() const { return TopPressure; } getTopRPTracker()455 const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } 456 457 /// Get current register pressure for the bottom scheduled instructions. getBotPressure()458 const IntervalPressure &getBotPressure() const { return BotPressure; } getBotRPTracker()459 const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } 460 461 /// Get register pressure for the entire scheduling region before scheduling. getRegPressure()462 const IntervalPressure &getRegPressure() const { return RegPressure; } 463 getRegionCriticalPSets()464 const std::vector<PressureChange> &getRegionCriticalPSets() const { 465 return RegionCriticalPSets; 466 } 467 getPressureDiff(const SUnit * SU)468 PressureDiff &getPressureDiff(const SUnit *SU) { 469 return SUPressureDiffs[SU->NodeNum]; 470 } getPressureDiff(const SUnit * SU)471 const PressureDiff &getPressureDiff(const SUnit *SU) const { 472 return SUPressureDiffs[SU->NodeNum]; 473 } 474 475 /// Compute a DFSResult after DAG building is complete, and before any 476 /// queue comparisons. 477 void computeDFSResult(); 478 479 /// Return a non-null DFS result if the scheduling strategy initialized it. getDFSResult()480 const SchedDFSResult *getDFSResult() const { return DFSResult; } 481 getScheduledTrees()482 BitVector &getScheduledTrees() { return ScheduledTrees; } 483 484 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling 485 /// region. This covers all instructions in a block, while schedule() may only 486 /// cover a subset. 487 void enterRegion(MachineBasicBlock *bb, 488 MachineBasicBlock::iterator begin, 489 MachineBasicBlock::iterator end, 490 unsigned regioninstrs) override; 491 492 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of 493 /// reorderable instructions. 494 void schedule() override; 495 496 /// Compute the cyclic critical path through the DAG. 497 unsigned computeCyclicCriticalPath(); 498 499 void dump() const override; 500 501 protected: 502 // Top-Level entry points for the schedule() driver... 503 504 /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking 505 /// enabled. This sets up three trackers. RPTracker will cover the entire DAG 506 /// region, TopTracker and BottomTracker will be initialized to the top and 507 /// bottom of the DAG region without covereing any unscheduled instruction. 508 void buildDAGWithRegPressure(); 509 510 /// Release ExitSU predecessors and setup scheduler queues. Re-position 511 /// the Top RP tracker in case the region beginning has changed. 512 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); 513 514 /// Move an instruction and update register pressure. 515 void scheduleMI(SUnit *SU, bool IsTopNode); 516 517 // Lesser helpers... 518 519 void initRegPressure(); 520 521 void updatePressureDiffs(ArrayRef<RegisterMaskPair> LiveUses); 522 523 void updateScheduledPressure(const SUnit *SU, 524 const std::vector<unsigned> &NewMaxPressure); 525 526 void collectVRegUses(SUnit &SU); 527 }; 528 529 //===----------------------------------------------------------------------===// 530 /// 531 /// Helpers for implementing custom MachineSchedStrategy classes. These take 532 /// care of the book-keeping associated with list scheduling heuristics. 533 /// 534 //===----------------------------------------------------------------------===// 535 536 /// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience 537 /// methods for pushing and removing nodes. ReadyQueue's are uniquely identified 538 /// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. 539 /// 540 /// This is a convenience class that may be used by implementations of 541 /// MachineSchedStrategy. 542 class ReadyQueue { 543 unsigned ID; 544 std::string Name; 545 std::vector<SUnit*> Queue; 546 547 public: ReadyQueue(unsigned id,const Twine & name)548 ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} 549 getID()550 unsigned getID() const { return ID; } 551 getName()552 StringRef getName() const { return Name; } 553 554 // SU is in this queue if it's NodeQueueID is a superset of this ID. isInQueue(SUnit * SU)555 bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } 556 empty()557 bool empty() const { return Queue.empty(); } 558 clear()559 void clear() { Queue.clear(); } 560 size()561 unsigned size() const { return Queue.size(); } 562 563 using iterator = std::vector<SUnit*>::iterator; 564 begin()565 iterator begin() { return Queue.begin(); } 566 end()567 iterator end() { return Queue.end(); } 568 elements()569 ArrayRef<SUnit*> elements() { return Queue; } 570 find(SUnit * SU)571 iterator find(SUnit *SU) { return llvm::find(Queue, SU); } 572 push(SUnit * SU)573 void push(SUnit *SU) { 574 Queue.push_back(SU); 575 SU->NodeQueueId |= ID; 576 } 577 remove(iterator I)578 iterator remove(iterator I) { 579 (*I)->NodeQueueId &= ~ID; 580 *I = Queue.back(); 581 unsigned idx = I - Queue.begin(); 582 Queue.pop_back(); 583 return Queue.begin() + idx; 584 } 585 586 void dump() const; 587 }; 588 589 /// Summarize the unscheduled region. 590 struct SchedRemainder { 591 // Critical path through the DAG in expected latency. 592 unsigned CriticalPath; 593 unsigned CyclicCritPath; 594 595 // Scaled count of micro-ops left to schedule. 596 unsigned RemIssueCount; 597 598 bool IsAcyclicLatencyLimited; 599 600 // Unscheduled resources 601 SmallVector<unsigned, 16> RemainingCounts; 602 SchedRemainderSchedRemainder603 SchedRemainder() { reset(); } 604 resetSchedRemainder605 void reset() { 606 CriticalPath = 0; 607 CyclicCritPath = 0; 608 RemIssueCount = 0; 609 IsAcyclicLatencyLimited = false; 610 RemainingCounts.clear(); 611 } 612 613 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 614 }; 615 616 /// Each Scheduling boundary is associated with ready queues. It tracks the 617 /// current cycle in the direction of movement, and maintains the state 618 /// of "hazards" and other interlocks at the current cycle. 619 class SchedBoundary { 620 public: 621 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 622 enum { 623 TopQID = 1, 624 BotQID = 2, 625 LogMaxQID = 2 626 }; 627 628 ScheduleDAGMI *DAG = nullptr; 629 const TargetSchedModel *SchedModel = nullptr; 630 SchedRemainder *Rem = nullptr; 631 632 ReadyQueue Available; 633 ReadyQueue Pending; 634 635 ScheduleHazardRecognizer *HazardRec = nullptr; 636 637 private: 638 /// True if the pending Q should be checked/updated before scheduling another 639 /// instruction. 640 bool CheckPending; 641 642 /// Number of cycles it takes to issue the instructions scheduled in this 643 /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. 644 /// See getStalls(). 645 unsigned CurrCycle; 646 647 /// Micro-ops issued in the current cycle 648 unsigned CurrMOps; 649 650 /// MinReadyCycle - Cycle of the soonest available instruction. 651 unsigned MinReadyCycle; 652 653 // The expected latency of the critical path in this scheduled zone. 654 unsigned ExpectedLatency; 655 656 // The latency of dependence chains leading into this zone. 657 // For each node scheduled bottom-up: DLat = max DLat, N.Depth. 658 // For each cycle scheduled: DLat -= 1. 659 unsigned DependentLatency; 660 661 /// Count the scheduled (issued) micro-ops that can be retired by 662 /// time=CurrCycle assuming the first scheduled instr is retired at time=0. 663 unsigned RetiredMOps; 664 665 // Count scheduled resources that have been executed. Resources are 666 // considered executed if they become ready in the time that it takes to 667 // saturate any resource including the one in question. Counts are scaled 668 // for direct comparison with other resources. Counts can be compared with 669 // MOps * getMicroOpFactor and Latency * getLatencyFactor. 670 SmallVector<unsigned, 16> ExecutedResCounts; 671 672 /// Cache the max count for a single resource. 673 unsigned MaxExecutedResCount; 674 675 // Cache the critical resources ID in this scheduled zone. 676 unsigned ZoneCritResIdx; 677 678 // Is the scheduled region resource limited vs. latency limited. 679 bool IsResourceLimited; 680 681 // Record the highest cycle at which each resource has been reserved by a 682 // scheduled instruction. 683 SmallVector<unsigned, 16> ReservedCycles; 684 685 #ifndef NDEBUG 686 // Remember the greatest possible stall as an upper bound on the number of 687 // times we should retry the pending queue because of a hazard. 688 unsigned MaxObservedStall; 689 #endif 690 691 public: 692 /// Pending queues extend the ready queues with the same ID and the 693 /// PendingFlag set. SchedBoundary(unsigned ID,const Twine & Name)694 SchedBoundary(unsigned ID, const Twine &Name): 695 Available(ID, Name+".A"), Pending(ID << LogMaxQID, Name+".P") { 696 reset(); 697 } 698 699 ~SchedBoundary(); 700 701 void reset(); 702 703 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 704 SchedRemainder *rem); 705 isTop()706 bool isTop() const { 707 return Available.getID() == TopQID; 708 } 709 710 /// Number of cycles to issue the instructions scheduled in this zone. getCurrCycle()711 unsigned getCurrCycle() const { return CurrCycle; } 712 713 /// Micro-ops issued in the current cycle getCurrMOps()714 unsigned getCurrMOps() const { return CurrMOps; } 715 716 // The latency of dependence chains leading into this zone. getDependentLatency()717 unsigned getDependentLatency() const { return DependentLatency; } 718 719 /// Get the number of latency cycles "covered" by the scheduled 720 /// instructions. This is the larger of the critical path within the zone 721 /// and the number of cycles required to issue the instructions. getScheduledLatency()722 unsigned getScheduledLatency() const { 723 return std::max(ExpectedLatency, CurrCycle); 724 } 725 getUnscheduledLatency(SUnit * SU)726 unsigned getUnscheduledLatency(SUnit *SU) const { 727 return isTop() ? SU->getHeight() : SU->getDepth(); 728 } 729 getResourceCount(unsigned ResIdx)730 unsigned getResourceCount(unsigned ResIdx) const { 731 return ExecutedResCounts[ResIdx]; 732 } 733 734 /// Get the scaled count of scheduled micro-ops and resources, including 735 /// executed resources. getCriticalCount()736 unsigned getCriticalCount() const { 737 if (!ZoneCritResIdx) 738 return RetiredMOps * SchedModel->getMicroOpFactor(); 739 return getResourceCount(ZoneCritResIdx); 740 } 741 742 /// Get a scaled count for the minimum execution time of the scheduled 743 /// micro-ops that are ready to execute by getExecutedCount. Notice the 744 /// feedback loop. getExecutedCount()745 unsigned getExecutedCount() const { 746 return std::max(CurrCycle * SchedModel->getLatencyFactor(), 747 MaxExecutedResCount); 748 } 749 getZoneCritResIdx()750 unsigned getZoneCritResIdx() const { return ZoneCritResIdx; } 751 752 // Is the scheduled region resource limited vs. latency limited. isResourceLimited()753 bool isResourceLimited() const { return IsResourceLimited; } 754 755 /// Get the difference between the given SUnit's ready time and the current 756 /// cycle. 757 unsigned getLatencyStallCycles(SUnit *SU); 758 759 unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles); 760 761 bool checkHazard(SUnit *SU); 762 763 unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); 764 765 unsigned getOtherResourceCount(unsigned &OtherCritIdx); 766 767 void releaseNode(SUnit *SU, unsigned ReadyCycle); 768 769 void bumpCycle(unsigned NextCycle); 770 771 void incExecutedResources(unsigned PIdx, unsigned Count); 772 773 unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); 774 775 void bumpNode(SUnit *SU); 776 777 void releasePending(); 778 779 void removeReady(SUnit *SU); 780 781 /// Call this before applying any other heuristics to the Available queue. 782 /// Updates the Available/Pending Q's if necessary and returns the single 783 /// available instruction, or NULL if there are multiple candidates. 784 SUnit *pickOnlyChoice(); 785 786 void dumpScheduledState() const; 787 }; 788 789 /// Base class for GenericScheduler. This class maintains information about 790 /// scheduling candidates based on TargetSchedModel making it easy to implement 791 /// heuristics for either preRA or postRA scheduling. 792 class GenericSchedulerBase : public MachineSchedStrategy { 793 public: 794 /// Represent the type of SchedCandidate found within a single queue. 795 /// pickNodeBidirectional depends on these listed by decreasing priority. 796 enum CandReason : uint8_t { 797 NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak, 798 RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 799 TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; 800 801 #ifndef NDEBUG 802 static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); 803 #endif 804 805 /// Policy for scheduling the next instruction in the candidate's zone. 806 struct CandPolicy { 807 bool ReduceLatency = false; 808 unsigned ReduceResIdx = 0; 809 unsigned DemandResIdx = 0; 810 811 CandPolicy() = default; 812 813 bool operator==(const CandPolicy &RHS) const { 814 return ReduceLatency == RHS.ReduceLatency && 815 ReduceResIdx == RHS.ReduceResIdx && 816 DemandResIdx == RHS.DemandResIdx; 817 } 818 bool operator!=(const CandPolicy &RHS) const { 819 return !(*this == RHS); 820 } 821 }; 822 823 /// Status of an instruction's critical resource consumption. 824 struct SchedResourceDelta { 825 // Count critical resources in the scheduled region required by SU. 826 unsigned CritResources = 0; 827 828 // Count critical resources from another region consumed by SU. 829 unsigned DemandedResources = 0; 830 831 SchedResourceDelta() = default; 832 833 bool operator==(const SchedResourceDelta &RHS) const { 834 return CritResources == RHS.CritResources 835 && DemandedResources == RHS.DemandedResources; 836 } 837 bool operator!=(const SchedResourceDelta &RHS) const { 838 return !operator==(RHS); 839 } 840 }; 841 842 /// Store the state used by GenericScheduler heuristics, required for the 843 /// lifetime of one invocation of pickNode(). 844 struct SchedCandidate { 845 CandPolicy Policy; 846 847 // The best SUnit candidate. 848 SUnit *SU; 849 850 // The reason for this candidate. 851 CandReason Reason; 852 853 // Whether this candidate should be scheduled at top/bottom. 854 bool AtTop; 855 856 // Register pressure values for the best candidate. 857 RegPressureDelta RPDelta; 858 859 // Critical resource consumption of the best candidate. 860 SchedResourceDelta ResDelta; 861 SchedCandidateSchedCandidate862 SchedCandidate() { reset(CandPolicy()); } SchedCandidateSchedCandidate863 SchedCandidate(const CandPolicy &Policy) { reset(Policy); } 864 resetSchedCandidate865 void reset(const CandPolicy &NewPolicy) { 866 Policy = NewPolicy; 867 SU = nullptr; 868 Reason = NoCand; 869 AtTop = false; 870 RPDelta = RegPressureDelta(); 871 ResDelta = SchedResourceDelta(); 872 } 873 isValidSchedCandidate874 bool isValid() const { return SU; } 875 876 // Copy the status of another candidate without changing policy. setBestSchedCandidate877 void setBest(SchedCandidate &Best) { 878 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 879 SU = Best.SU; 880 Reason = Best.Reason; 881 AtTop = Best.AtTop; 882 RPDelta = Best.RPDelta; 883 ResDelta = Best.ResDelta; 884 } 885 886 void initResourceDelta(const ScheduleDAGMI *DAG, 887 const TargetSchedModel *SchedModel); 888 }; 889 890 protected: 891 const MachineSchedContext *Context; 892 const TargetSchedModel *SchedModel = nullptr; 893 const TargetRegisterInfo *TRI = nullptr; 894 895 SchedRemainder Rem; 896 GenericSchedulerBase(const MachineSchedContext * C)897 GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {} 898 899 void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, 900 SchedBoundary *OtherZone); 901 902 #ifndef NDEBUG 903 void traceCandidate(const SchedCandidate &Cand); 904 #endif 905 906 private: 907 bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone, 908 bool ComputeRemLatency, unsigned &RemLatency) const; 909 }; 910 911 // Utility functions used by heuristics in tryCandidate(). 912 bool tryLess(int TryVal, int CandVal, 913 GenericSchedulerBase::SchedCandidate &TryCand, 914 GenericSchedulerBase::SchedCandidate &Cand, 915 GenericSchedulerBase::CandReason Reason); 916 bool tryGreater(int TryVal, int CandVal, 917 GenericSchedulerBase::SchedCandidate &TryCand, 918 GenericSchedulerBase::SchedCandidate &Cand, 919 GenericSchedulerBase::CandReason Reason); 920 bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, 921 GenericSchedulerBase::SchedCandidate &Cand, 922 SchedBoundary &Zone); 923 bool tryPressure(const PressureChange &TryP, 924 const PressureChange &CandP, 925 GenericSchedulerBase::SchedCandidate &TryCand, 926 GenericSchedulerBase::SchedCandidate &Cand, 927 GenericSchedulerBase::CandReason Reason, 928 const TargetRegisterInfo *TRI, 929 const MachineFunction &MF); 930 unsigned getWeakLeft(const SUnit *SU, bool isTop); 931 int biasPhysReg(const SUnit *SU, bool isTop); 932 933 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance 934 /// the schedule. 935 class GenericScheduler : public GenericSchedulerBase { 936 public: GenericScheduler(const MachineSchedContext * C)937 GenericScheduler(const MachineSchedContext *C): 938 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), 939 Bot(SchedBoundary::BotQID, "BotQ") {} 940 941 void initPolicy(MachineBasicBlock::iterator Begin, 942 MachineBasicBlock::iterator End, 943 unsigned NumRegionInstrs) override; 944 945 void dumpPolicy() const override; 946 shouldTrackPressure()947 bool shouldTrackPressure() const override { 948 return RegionPolicy.ShouldTrackPressure; 949 } 950 shouldTrackLaneMasks()951 bool shouldTrackLaneMasks() const override { 952 return RegionPolicy.ShouldTrackLaneMasks; 953 } 954 955 void initialize(ScheduleDAGMI *dag) override; 956 957 SUnit *pickNode(bool &IsTopNode) override; 958 959 void schedNode(SUnit *SU, bool IsTopNode) override; 960 releaseTopNode(SUnit * SU)961 void releaseTopNode(SUnit *SU) override { 962 if (SU->isScheduled) 963 return; 964 965 Top.releaseNode(SU, SU->TopReadyCycle); 966 TopCand.SU = nullptr; 967 } 968 releaseBottomNode(SUnit * SU)969 void releaseBottomNode(SUnit *SU) override { 970 if (SU->isScheduled) 971 return; 972 973 Bot.releaseNode(SU, SU->BotReadyCycle); 974 BotCand.SU = nullptr; 975 } 976 977 void registerRoots() override; 978 979 protected: 980 ScheduleDAGMILive *DAG = nullptr; 981 982 MachineSchedPolicy RegionPolicy; 983 984 // State of the top and bottom scheduled instruction boundaries. 985 SchedBoundary Top; 986 SchedBoundary Bot; 987 988 /// Candidate last picked from Top boundary. 989 SchedCandidate TopCand; 990 /// Candidate last picked from Bot boundary. 991 SchedCandidate BotCand; 992 993 void checkAcyclicLatency(); 994 995 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, 996 const RegPressureTracker &RPTracker, 997 RegPressureTracker &TempTracker); 998 999 virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, 1000 SchedBoundary *Zone) const; 1001 1002 SUnit *pickNodeBidirectional(bool &IsTopNode); 1003 1004 void pickNodeFromQueue(SchedBoundary &Zone, 1005 const CandPolicy &ZonePolicy, 1006 const RegPressureTracker &RPTracker, 1007 SchedCandidate &Candidate); 1008 1009 void reschedulePhysReg(SUnit *SU, bool isTop); 1010 }; 1011 1012 /// PostGenericScheduler - Interface to the scheduling algorithm used by 1013 /// ScheduleDAGMI. 1014 /// 1015 /// Callbacks from ScheduleDAGMI: 1016 /// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... 1017 class PostGenericScheduler : public GenericSchedulerBase { 1018 ScheduleDAGMI *DAG; 1019 SchedBoundary Top; 1020 SmallVector<SUnit*, 8> BotRoots; 1021 1022 public: PostGenericScheduler(const MachineSchedContext * C)1023 PostGenericScheduler(const MachineSchedContext *C): 1024 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} 1025 1026 ~PostGenericScheduler() override = default; 1027 initPolicy(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)1028 void initPolicy(MachineBasicBlock::iterator Begin, 1029 MachineBasicBlock::iterator End, 1030 unsigned NumRegionInstrs) override { 1031 /* no configurable policy */ 1032 } 1033 1034 /// PostRA scheduling does not track pressure. shouldTrackPressure()1035 bool shouldTrackPressure() const override { return false; } 1036 1037 void initialize(ScheduleDAGMI *Dag) override; 1038 1039 void registerRoots() override; 1040 1041 SUnit *pickNode(bool &IsTopNode) override; 1042 scheduleTree(unsigned SubtreeID)1043 void scheduleTree(unsigned SubtreeID) override { 1044 llvm_unreachable("PostRA scheduler does not support subtree analysis."); 1045 } 1046 1047 void schedNode(SUnit *SU, bool IsTopNode) override; 1048 releaseTopNode(SUnit * SU)1049 void releaseTopNode(SUnit *SU) override { 1050 if (SU->isScheduled) 1051 return; 1052 Top.releaseNode(SU, SU->TopReadyCycle); 1053 } 1054 1055 // Only called for roots. releaseBottomNode(SUnit * SU)1056 void releaseBottomNode(SUnit *SU) override { 1057 BotRoots.push_back(SU); 1058 } 1059 1060 protected: 1061 void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); 1062 1063 void pickNodeFromQueue(SchedCandidate &Cand); 1064 }; 1065 1066 /// Create the standard converging machine scheduler. This will be used as the 1067 /// default scheduler if the target does not set a default. 1068 /// Adds default DAG mutations. 1069 ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C); 1070 1071 /// Create a generic scheduler with no vreg liveness or DAG mutation passes. 1072 ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C); 1073 1074 std::unique_ptr<ScheduleDAGMutation> 1075 createLoadClusterDAGMutation(const TargetInstrInfo *TII, 1076 const TargetRegisterInfo *TRI); 1077 1078 std::unique_ptr<ScheduleDAGMutation> 1079 createStoreClusterDAGMutation(const TargetInstrInfo *TII, 1080 const TargetRegisterInfo *TRI); 1081 1082 std::unique_ptr<ScheduleDAGMutation> 1083 createCopyConstrainDAGMutation(const TargetInstrInfo *TII, 1084 const TargetRegisterInfo *TRI); 1085 1086 } // end namespace llvm 1087 1088 #endif // LLVM_CODEGEN_MACHINESCHEDULER_H 1089