1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief SI Machine Scheduler interface 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 16 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 17 18 #include "SIInstrInfo.h" 19 #include "llvm/CodeGen/MachineBasicBlock.h" 20 #include "llvm/CodeGen/MachineScheduler.h" 21 #include "llvm/CodeGen/RegisterPressure.h" 22 #include "llvm/CodeGen/ScheduleDAG.h" 23 #include <cassert> 24 #include <cstdint> 25 #include <map> 26 #include <memory> 27 #include <set> 28 #include <vector> 29 30 namespace llvm { 31 32 enum SIScheduleCandReason { 33 NoCand, 34 RegUsage, 35 Latency, 36 Successor, 37 Depth, 38 NodeOrder 39 }; 40 41 struct SISchedulerCandidate { 42 // The reason for this candidate. 43 SIScheduleCandReason Reason; 44 45 // Set of reasons that apply to multiple candidates. 46 uint32_t RepeatReasonSet; 47 48 SISchedulerCandidate() 49 : Reason(NoCand), RepeatReasonSet(0) {} 50 51 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); } 52 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); } 53 }; 54 55 class SIScheduleDAGMI; 56 class SIScheduleBlockCreator; 57 58 class SIScheduleBlock { 59 SIScheduleDAGMI *DAG; 60 SIScheduleBlockCreator *BC; 61 62 std::vector<SUnit*> SUnits; 63 std::map<unsigned, unsigned> NodeNum2Index; 64 std::vector<SUnit*> TopReadySUs; 65 std::vector<SUnit*> ScheduledSUnits; 66 67 /// The top of the unscheduled zone. 68 IntervalPressure TopPressure; 69 RegPressureTracker TopRPTracker; 70 71 // Pressure: number of said class of registers needed to 72 // store the live virtual and real registers. 73 // We do care only of SGPR32 and VGPR32 and do track only virtual registers. 74 // Pressure of additional registers required inside the block. 75 std::vector<unsigned> InternalAdditionnalPressure; 76 // Pressure of input and output registers 77 std::vector<unsigned> LiveInPressure; 78 std::vector<unsigned> LiveOutPressure; 79 // Registers required by the block, and outputs. 80 // We do track only virtual registers. 81 // Note that some registers are not 32 bits, 82 // and thus the pressure is not equal 83 // to the number of live registers. 84 std::set<unsigned> LiveInRegs; 85 std::set<unsigned> LiveOutRegs; 86 87 bool Scheduled; 88 bool HighLatencyBlock; 89 90 std::vector<unsigned> HasLowLatencyNonWaitedParent; 91 92 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table. 93 unsigned ID; 94 95 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors. 96 std::vector<SIScheduleBlock*> Succs; // All blocks successors. 97 unsigned NumHighLatencySuccessors; 98 99 public: 100 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC, 101 unsigned ID): 102 DAG(DAG), BC(BC), TopRPTracker(TopPressure), Scheduled(false), 103 HighLatencyBlock(false), ID(ID), NumHighLatencySuccessors(0) {} 104 105 ~SIScheduleBlock() = default; 106 107 unsigned getID() const { return ID; } 108 109 /// Functions for Block construction. 110 void addUnit(SUnit *SU); 111 112 // When all SUs have been added. 113 void finalizeUnits(); 114 115 // Add block pred, which has instruction predecessor of SU. 116 void addPred(SIScheduleBlock *Pred); 117 void addSucc(SIScheduleBlock *Succ); 118 119 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; } 120 const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; } 121 122 unsigned Height; // Maximum topdown path length to block without outputs 123 unsigned Depth; // Maximum bottomup path length to block without inputs 124 125 unsigned getNumHighLatencySuccessors() const { 126 return NumHighLatencySuccessors; 127 } 128 129 bool isHighLatencyBlock() { return HighLatencyBlock; } 130 131 // This is approximative. 132 // Ideally should take into accounts some instructions (rcp, etc) 133 // are 4 times slower. 134 int getCost() { return SUnits.size(); } 135 136 // The block Predecessors and Successors must be all registered 137 // before fastSchedule(). 138 // Fast schedule with no particular requirement. 139 void fastSchedule(); 140 141 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; } 142 143 // Complete schedule that will try to minimize reg pressure and 144 // low latencies, and will fill liveins and liveouts. 145 // Needs all MIs to be grouped between BeginBlock and EndBlock. 146 // The MIs can be moved after the scheduling, 147 // it is just used to allow correct track of live registers. 148 void schedule(MachineBasicBlock::iterator BeginBlock, 149 MachineBasicBlock::iterator EndBlock); 150 151 bool isScheduled() { return Scheduled; } 152 153 // Needs the block to be scheduled inside 154 // TODO: find a way to compute it. 155 std::vector<unsigned> &getInternalAdditionnalRegUsage() { 156 return InternalAdditionnalPressure; 157 } 158 159 std::set<unsigned> &getInRegs() { return LiveInRegs; } 160 std::set<unsigned> &getOutRegs() { return LiveOutRegs; } 161 162 void printDebug(bool Full); 163 164 private: 165 struct SISchedCandidate : SISchedulerCandidate { 166 // The best SUnit candidate. 167 SUnit *SU = nullptr; 168 169 unsigned SGPRUsage; 170 unsigned VGPRUsage; 171 bool IsLowLatency; 172 unsigned LowLatencyOffset; 173 bool HasLowLatencyNonWaitedParent; 174 175 SISchedCandidate() = default; 176 177 bool isValid() const { return SU; } 178 179 // Copy the status of another candidate without changing policy. 180 void setBest(SISchedCandidate &Best) { 181 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 182 SU = Best.SU; 183 Reason = Best.Reason; 184 SGPRUsage = Best.SGPRUsage; 185 VGPRUsage = Best.VGPRUsage; 186 IsLowLatency = Best.IsLowLatency; 187 LowLatencyOffset = Best.LowLatencyOffset; 188 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent; 189 } 190 }; 191 192 void undoSchedule(); 193 194 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge); 195 void releaseSucc(SUnit *SU, SDep *SuccEdge); 196 // InOrOutBlock: restrict to links pointing inside the block (true), 197 // or restrict to links pointing outside the block (false). 198 void releaseSuccessors(SUnit *SU, bool InOrOutBlock); 199 200 void nodeScheduled(SUnit *SU); 201 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand); 202 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand); 203 SUnit* pickNode(); 204 void traceCandidate(const SISchedCandidate &Cand); 205 void initRegPressure(MachineBasicBlock::iterator BeginBlock, 206 MachineBasicBlock::iterator EndBlock); 207 }; 208 209 struct SIScheduleBlocks { 210 std::vector<SIScheduleBlock*> Blocks; 211 std::vector<int> TopDownIndex2Block; 212 std::vector<int> TopDownBlock2Index; 213 }; 214 215 enum SISchedulerBlockCreatorVariant { 216 LatenciesAlone, 217 LatenciesGrouped, 218 LatenciesAlonePlusConsecutive 219 }; 220 221 class SIScheduleBlockCreator { 222 SIScheduleDAGMI *DAG; 223 // unique_ptr handles freeing memory for us. 224 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs; 225 std::map<SISchedulerBlockCreatorVariant, 226 SIScheduleBlocks> Blocks; 227 std::vector<SIScheduleBlock*> CurrentBlocks; 228 std::vector<int> Node2CurrentBlock; 229 230 // Topological sort 231 // Maps topological index to the node number. 232 std::vector<int> TopDownIndex2Block; 233 std::vector<int> TopDownBlock2Index; 234 std::vector<int> BottomUpIndex2Block; 235 236 // 0 -> Color not given. 237 // 1 to SUnits.size() -> Reserved group (you should only add elements to them). 238 // Above -> Other groups. 239 int NextReservedID; 240 int NextNonReservedID; 241 std::vector<int> CurrentColoring; 242 std::vector<int> CurrentTopDownReservedDependencyColoring; 243 std::vector<int> CurrentBottomUpReservedDependencyColoring; 244 245 public: 246 SIScheduleBlockCreator(SIScheduleDAGMI *DAG); 247 ~SIScheduleBlockCreator(); 248 249 SIScheduleBlocks 250 getBlocks(SISchedulerBlockCreatorVariant BlockVariant); 251 252 bool isSUInBlock(SUnit *SU, unsigned ID); 253 254 private: 255 // Give a Reserved color to every high latency. 256 void colorHighLatenciesAlone(); 257 258 // Create groups of high latencies with a Reserved color. 259 void colorHighLatenciesGroups(); 260 261 // Compute coloring for topdown and bottom traversals with 262 // different colors depending on dependencies on Reserved colors. 263 void colorComputeReservedDependencies(); 264 265 // Give color to all non-colored SUs according to Reserved groups dependencies. 266 void colorAccordingToReservedDependencies(); 267 268 // Divides Blocks having no bottom up or top down dependencies on Reserved groups. 269 // The new colors are computed according to the dependencies on the other blocks 270 // formed with colorAccordingToReservedDependencies. 271 void colorEndsAccordingToDependencies(); 272 273 // Cut groups into groups with SUs in consecutive order (except for Reserved groups). 274 void colorForceConsecutiveOrderInGroup(); 275 276 // Merge Constant loads that have all their users into another group to the group. 277 // (TODO: else if all their users depend on the same group, put them there) 278 void colorMergeConstantLoadsNextGroup(); 279 280 // Merge SUs that have all their users into another group to the group 281 void colorMergeIfPossibleNextGroup(); 282 283 // Merge SUs that have all their users into another group to the group, 284 // but only for Reserved groups. 285 void colorMergeIfPossibleNextGroupOnlyForReserved(); 286 287 // Merge SUs that have all their users into another group to the group, 288 // but only if the group is no more than a few SUs. 289 void colorMergeIfPossibleSmallGroupsToNextGroup(); 290 291 // Divides Blocks with important size. 292 // Idea of implementation: attribute new colors depending on topdown and 293 // bottom up links to other blocks. 294 void cutHugeBlocks(); 295 296 // Put in one group all instructions with no users in this scheduling region 297 // (we'd want these groups be at the end). 298 void regroupNoUserInstructions(); 299 300 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant); 301 302 void topologicalSort(); 303 304 void scheduleInsideBlocks(); 305 306 void fillStats(); 307 }; 308 309 enum SISchedulerBlockSchedulerVariant { 310 BlockLatencyRegUsage, 311 BlockRegUsageLatency, 312 BlockRegUsage 313 }; 314 315 class SIScheduleBlockScheduler { 316 SIScheduleDAGMI *DAG; 317 SISchedulerBlockSchedulerVariant Variant; 318 std::vector<SIScheduleBlock*> Blocks; 319 320 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages; 321 std::set<unsigned> LiveRegs; 322 // Num of schedulable unscheduled blocks reading the register. 323 std::map<unsigned, unsigned> LiveRegsConsumers; 324 325 std::vector<unsigned> LastPosHighLatencyParentScheduled; 326 int LastPosWaitedHighLatency; 327 328 std::vector<SIScheduleBlock*> BlocksScheduled; 329 unsigned NumBlockScheduled; 330 std::vector<SIScheduleBlock*> ReadyBlocks; 331 332 unsigned VregCurrentUsage; 333 unsigned SregCurrentUsage; 334 335 // Currently is only approximation. 336 unsigned maxVregUsage; 337 unsigned maxSregUsage; 338 339 std::vector<unsigned> BlockNumPredsLeft; 340 std::vector<unsigned> BlockNumSuccsLeft; 341 342 public: 343 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, 344 SISchedulerBlockSchedulerVariant Variant, 345 SIScheduleBlocks BlocksStruct); 346 ~SIScheduleBlockScheduler() = default; 347 348 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; } 349 350 unsigned getVGPRUsage() { return maxVregUsage; } 351 unsigned getSGPRUsage() { return maxSregUsage; } 352 353 private: 354 struct SIBlockSchedCandidate : SISchedulerCandidate { 355 // The best Block candidate. 356 SIScheduleBlock *Block = nullptr; 357 358 bool IsHighLatency; 359 int VGPRUsageDiff; 360 unsigned NumSuccessors; 361 unsigned NumHighLatencySuccessors; 362 unsigned LastPosHighLatParentScheduled; 363 unsigned Height; 364 365 SIBlockSchedCandidate() = default; 366 367 bool isValid() const { return Block; } 368 369 // Copy the status of another candidate without changing policy. 370 void setBest(SIBlockSchedCandidate &Best) { 371 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 372 Block = Best.Block; 373 Reason = Best.Reason; 374 IsHighLatency = Best.IsHighLatency; 375 VGPRUsageDiff = Best.VGPRUsageDiff; 376 NumSuccessors = Best.NumSuccessors; 377 NumHighLatencySuccessors = Best.NumHighLatencySuccessors; 378 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled; 379 Height = Best.Height; 380 } 381 }; 382 383 bool tryCandidateLatency(SIBlockSchedCandidate &Cand, 384 SIBlockSchedCandidate &TryCand); 385 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand, 386 SIBlockSchedCandidate &TryCand); 387 SIScheduleBlock *pickBlock(); 388 389 void addLiveRegs(std::set<unsigned> &Regs); 390 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs); 391 void releaseBlockSuccs(SIScheduleBlock *Parent); 392 void blockScheduled(SIScheduleBlock *Block); 393 394 // Check register pressure change 395 // by scheduling a block with these LiveIn and LiveOut. 396 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs, 397 std::set<unsigned> &OutRegs); 398 399 void schedule(); 400 }; 401 402 struct SIScheduleBlockResult { 403 std::vector<unsigned> SUs; 404 unsigned MaxSGPRUsage; 405 unsigned MaxVGPRUsage; 406 }; 407 408 class SIScheduler { 409 SIScheduleDAGMI *DAG; 410 SIScheduleBlockCreator BlockCreator; 411 412 public: 413 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {} 414 415 ~SIScheduler() = default; 416 417 struct SIScheduleBlockResult 418 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, 419 SISchedulerBlockSchedulerVariant ScheduleVariant); 420 }; 421 422 class SIScheduleDAGMI final : public ScheduleDAGMILive { 423 const SIInstrInfo *SITII; 424 const SIRegisterInfo *SITRI; 425 426 std::vector<SUnit> SUnitsLinksBackup; 427 428 // For moveLowLatencies. After all Scheduling variants are tested. 429 std::vector<unsigned> ScheduledSUnits; 430 std::vector<unsigned> ScheduledSUnitsInv; 431 432 unsigned VGPRSetID; 433 unsigned SGPRSetID; 434 435 public: 436 SIScheduleDAGMI(MachineSchedContext *C); 437 438 ~SIScheduleDAGMI() override; 439 440 // Entry point for the schedule. 441 void schedule() override; 442 443 // To init Block's RPTracker. 444 void initRPTracker(RegPressureTracker &RPTracker) { 445 RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false); 446 } 447 448 MachineBasicBlock *getBB() { return BB; } 449 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; } 450 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; } 451 LiveIntervals *getLIS() { return LIS; } 452 MachineRegisterInfo *getMRI() { return &MRI; } 453 const TargetRegisterInfo *getTRI() { return TRI; } 454 SUnit& getEntrySU() { return EntrySU; } 455 SUnit& getExitSU() { return ExitSU; } 456 457 void restoreSULinksLeft(); 458 459 template<typename _Iterator> void fillVgprSgprCost(_Iterator First, 460 _Iterator End, 461 unsigned &VgprUsage, 462 unsigned &SgprUsage); 463 464 std::set<unsigned> getInRegs() { 465 std::set<unsigned> InRegs; 466 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) { 467 InRegs.insert(RegMaskPair.RegUnit); 468 } 469 return InRegs; 470 } 471 472 unsigned getVGPRSetID() const { return VGPRSetID; } 473 unsigned getSGPRSetID() const { return SGPRSetID; } 474 475 private: 476 void topologicalSort(); 477 // After scheduling is done, improve low latency placements. 478 void moveLowLatencies(); 479 480 public: 481 // Some stats for scheduling inside blocks. 482 std::vector<unsigned> IsLowLatencySU; 483 std::vector<unsigned> LowLatencyOffset; 484 std::vector<unsigned> IsHighLatencySU; 485 // Topological sort 486 // Maps topological index to the node number. 487 std::vector<int> TopDownIndex2SU; 488 std::vector<int> BottomUpIndex2SU; 489 }; 490 491 } // end namespace llvm 492 493 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 494