1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // MachineScheduler schedules machine instructions after phi elimination. It 10 // preserves LiveIntervals so it can be invoked before register allocation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "HexagonMachineScheduler.h" 15 #include "HexagonInstrInfo.h" 16 #include "HexagonSubtarget.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/CodeGen/DFAPacketizer.h" 19 #include "llvm/CodeGen/MachineBasicBlock.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstr.h" 22 #include "llvm/CodeGen/MachineLoopInfo.h" 23 #include "llvm/CodeGen/RegisterClassInfo.h" 24 #include "llvm/CodeGen/RegisterPressure.h" 25 #include "llvm/CodeGen/ScheduleDAG.h" 26 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 27 #include "llvm/CodeGen/TargetInstrInfo.h" 28 #include "llvm/CodeGen/TargetOpcodes.h" 29 #include "llvm/CodeGen/TargetRegisterInfo.h" 30 #include "llvm/CodeGen/TargetSchedule.h" 31 #include "llvm/CodeGen/TargetSubtargetInfo.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include <algorithm> 37 #include <cassert> 38 #include <iomanip> 39 #include <limits> 40 #include <memory> 41 #include <sstream> 42 43 using namespace llvm; 44 45 #define DEBUG_TYPE "machine-scheduler" 46 47 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", 48 cl::Hidden, cl::ZeroOrMore, cl::init(false)); 49 50 static cl::opt<bool> UseNewerCandidate("use-newer-candidate", 51 cl::Hidden, cl::ZeroOrMore, cl::init(true)); 52 53 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level", 54 cl::Hidden, cl::ZeroOrMore, cl::init(1)); 55 56 // Check if the scheduler should penalize instructions that are available to 57 // early due to a zero-latency dependence. 58 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, 59 cl::ZeroOrMore, cl::init(true)); 60 61 // This value is used to determine if a register class is a high pressure set. 62 // We compute the maximum number of registers needed and divided by the total 63 // available. Then, we compare the result to this value. 64 static cl::opt<float> RPThreshold("hexagon-reg-pressure", cl::Hidden, 65 cl::init(0.75f), cl::desc("High register pressure threhold.")); 66 67 /// Return true if there is a dependence between SUd and SUu. 68 static bool hasDependence(const SUnit *SUd, const SUnit *SUu, 69 const HexagonInstrInfo &QII) { 70 if (SUd->Succs.size() == 0) 71 return false; 72 73 // Enable .cur formation. 74 if (QII.mayBeCurLoad(*SUd->getInstr())) 75 return false; 76 77 if (QII.canExecuteInBundle(*SUd->getInstr(), *SUu->getInstr())) 78 return false; 79 80 for (const auto &S : SUd->Succs) { 81 // Since we do not add pseudos to packets, might as well 82 // ignore order dependencies. 83 if (S.isCtrl()) 84 continue; 85 86 if (S.getSUnit() == SUu && S.getLatency() > 0) 87 return true; 88 } 89 return false; 90 } 91 92 /// Check if scheduling of this SU is possible 93 /// in the current packet. 94 /// It is _not_ precise (statefull), it is more like 95 /// another heuristic. Many corner cases are figured 96 /// empirically. 97 bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { 98 if (!SU || !SU->getInstr()) 99 return false; 100 101 // First see if the pipeline could receive this instruction 102 // in the current cycle. 103 switch (SU->getInstr()->getOpcode()) { 104 default: 105 if (!ResourcesModel->canReserveResources(*SU->getInstr())) 106 return false; 107 break; 108 case TargetOpcode::EXTRACT_SUBREG: 109 case TargetOpcode::INSERT_SUBREG: 110 case TargetOpcode::SUBREG_TO_REG: 111 case TargetOpcode::REG_SEQUENCE: 112 case TargetOpcode::IMPLICIT_DEF: 113 case TargetOpcode::COPY: 114 case TargetOpcode::INLINEASM: 115 case TargetOpcode::INLINEASM_BR: 116 break; 117 } 118 119 MachineBasicBlock *MBB = SU->getInstr()->getParent(); 120 auto &QST = MBB->getParent()->getSubtarget<HexagonSubtarget>(); 121 const auto &QII = *QST.getInstrInfo(); 122 123 // Now see if there are no other dependencies to instructions already 124 // in the packet. 125 if (IsTop) { 126 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 127 if (hasDependence(Packet[i], SU, QII)) 128 return false; 129 } else { 130 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 131 if (hasDependence(SU, Packet[i], QII)) 132 return false; 133 } 134 return true; 135 } 136 137 /// Keep track of available resources. 138 bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { 139 bool startNewCycle = false; 140 // Artificially reset state. 141 if (!SU) { 142 ResourcesModel->clearResources(); 143 Packet.clear(); 144 TotalPackets++; 145 return false; 146 } 147 // If this SU does not fit in the packet or the packet is now full 148 // start a new one. 149 if (!isResourceAvailable(SU, IsTop) || 150 Packet.size() >= SchedModel->getIssueWidth()) { 151 ResourcesModel->clearResources(); 152 Packet.clear(); 153 TotalPackets++; 154 startNewCycle = true; 155 } 156 157 switch (SU->getInstr()->getOpcode()) { 158 default: 159 ResourcesModel->reserveResources(*SU->getInstr()); 160 break; 161 case TargetOpcode::EXTRACT_SUBREG: 162 case TargetOpcode::INSERT_SUBREG: 163 case TargetOpcode::SUBREG_TO_REG: 164 case TargetOpcode::REG_SEQUENCE: 165 case TargetOpcode::IMPLICIT_DEF: 166 case TargetOpcode::KILL: 167 case TargetOpcode::CFI_INSTRUCTION: 168 case TargetOpcode::EH_LABEL: 169 case TargetOpcode::COPY: 170 case TargetOpcode::INLINEASM: 171 case TargetOpcode::INLINEASM_BR: 172 break; 173 } 174 Packet.push_back(SU); 175 176 #ifndef NDEBUG 177 LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); 178 for (unsigned i = 0, e = Packet.size(); i != e; ++i) { 179 LLVM_DEBUG(dbgs() << "\t[" << i << "] SU("); 180 LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); 181 LLVM_DEBUG(Packet[i]->getInstr()->dump()); 182 } 183 #endif 184 185 return startNewCycle; 186 } 187 188 /// schedule - Called back from MachineScheduler::runOnMachineFunction 189 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 190 /// only includes instructions that have DAG nodes, not scheduling boundaries. 191 void VLIWMachineScheduler::schedule() { 192 LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " 193 << printMBBReference(*BB) << " " << BB->getName() 194 << " in_func " << BB->getParent()->getName() 195 << " at loop depth " << MLI->getLoopDepth(BB) << " \n"); 196 197 buildDAGWithRegPressure(); 198 199 Topo.InitDAGTopologicalSorting(); 200 201 // Postprocess the DAG to add platform-specific artificial dependencies. 202 postprocessDAG(); 203 204 SmallVector<SUnit*, 8> TopRoots, BotRoots; 205 findRootsAndBiasEdges(TopRoots, BotRoots); 206 207 // Initialize the strategy before modifying the DAG. 208 SchedImpl->initialize(this); 209 210 LLVM_DEBUG(unsigned maxH = 0; 211 for (unsigned su = 0, e = SUnits.size(); su != e; 212 ++su) if (SUnits[su].getHeight() > maxH) maxH = 213 SUnits[su].getHeight(); 214 dbgs() << "Max Height " << maxH << "\n";); 215 LLVM_DEBUG(unsigned maxD = 0; 216 for (unsigned su = 0, e = SUnits.size(); su != e; 217 ++su) if (SUnits[su].getDepth() > maxD) maxD = 218 SUnits[su].getDepth(); 219 dbgs() << "Max Depth " << maxD << "\n";); 220 LLVM_DEBUG(dump()); 221 222 initQueues(TopRoots, BotRoots); 223 224 bool IsTopNode = false; 225 while (true) { 226 LLVM_DEBUG( 227 dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); 228 SUnit *SU = SchedImpl->pickNode(IsTopNode); 229 if (!SU) break; 230 231 if (!checkSchedLimit()) 232 break; 233 234 scheduleMI(SU, IsTopNode); 235 236 // Notify the scheduling strategy after updating the DAG. 237 SchedImpl->schedNode(SU, IsTopNode); 238 239 updateQueues(SU, IsTopNode); 240 } 241 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 242 243 placeDebugValues(); 244 245 LLVM_DEBUG({ 246 dbgs() << "*** Final schedule for " 247 << printMBBReference(*begin()->getParent()) << " ***\n"; 248 dumpSchedule(); 249 dbgs() << '\n'; 250 }); 251 } 252 253 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { 254 DAG = static_cast<VLIWMachineScheduler*>(dag); 255 SchedModel = DAG->getSchedModel(); 256 257 Top.init(DAG, SchedModel); 258 Bot.init(DAG, SchedModel); 259 260 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 261 // are disabled, then these HazardRecs will be disabled. 262 const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); 263 const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); 264 const TargetInstrInfo *TII = STI.getInstrInfo(); 265 delete Top.HazardRec; 266 delete Bot.HazardRec; 267 Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 268 Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 269 270 delete Top.ResourceModel; 271 delete Bot.ResourceModel; 272 Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); 273 Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); 274 275 const std::vector<unsigned> &MaxPressure = 276 DAG->getRegPressure().MaxSetPressure; 277 HighPressureSets.assign(MaxPressure.size(), 0); 278 for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { 279 unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i); 280 HighPressureSets[i] = 281 ((float) MaxPressure[i] > ((float) Limit * RPThreshold)); 282 } 283 284 assert((!ForceTopDown || !ForceBottomUp) && 285 "-misched-topdown incompatible with -misched-bottomup"); 286 } 287 288 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { 289 for (const SDep &PI : SU->Preds) { 290 unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; 291 unsigned MinLatency = PI.getLatency(); 292 #ifndef NDEBUG 293 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 294 #endif 295 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 296 SU->TopReadyCycle = PredReadyCycle + MinLatency; 297 } 298 299 if (!SU->isScheduled) 300 Top.releaseNode(SU, SU->TopReadyCycle); 301 } 302 303 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { 304 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 305 306 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 307 I != E; ++I) { 308 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 309 unsigned MinLatency = I->getLatency(); 310 #ifndef NDEBUG 311 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 312 #endif 313 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 314 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 315 } 316 317 if (!SU->isScheduled) 318 Bot.releaseNode(SU, SU->BotReadyCycle); 319 } 320 321 /// Does this SU have a hazard within the current instruction group. 322 /// 323 /// The scheduler supports two modes of hazard recognition. The first is the 324 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 325 /// supports highly complicated in-order reservation tables 326 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. 327 /// 328 /// The second is a streamlined mechanism that checks for hazards based on 329 /// simple counters that the scheduler itself maintains. It explicitly checks 330 /// for instruction dispatch limitations, including the number of micro-ops that 331 /// can dispatch per cycle. 332 /// 333 /// TODO: Also check whether the SU must start a new group. 334 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { 335 if (HazardRec->isEnabled()) 336 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 337 338 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 339 if (IssueCount + uops > SchedModel->getIssueWidth()) 340 return true; 341 342 return false; 343 } 344 345 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU, 346 unsigned ReadyCycle) { 347 if (ReadyCycle < MinReadyCycle) 348 MinReadyCycle = ReadyCycle; 349 350 // Check for interlocks first. For the purpose of other heuristics, an 351 // instruction that cannot issue appears as if it's not in the ReadyQueue. 352 if (ReadyCycle > CurrCycle || checkHazard(SU)) 353 354 Pending.push(SU); 355 else 356 Available.push(SU); 357 } 358 359 /// Move the boundary of scheduled code by one cycle. 360 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { 361 unsigned Width = SchedModel->getIssueWidth(); 362 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 363 364 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && 365 "MinReadyCycle uninitialized"); 366 unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); 367 368 if (!HazardRec->isEnabled()) { 369 // Bypass HazardRec virtual calls. 370 CurrCycle = NextCycle; 371 } else { 372 // Bypass getHazardType calls in case of long latency. 373 for (; CurrCycle != NextCycle; ++CurrCycle) { 374 if (isTop()) 375 HazardRec->AdvanceCycle(); 376 else 377 HazardRec->RecedeCycle(); 378 } 379 } 380 CheckPending = true; 381 382 LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " 383 << CurrCycle << '\n'); 384 } 385 386 /// Move the boundary of scheduled code by one SUnit. 387 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { 388 bool startNewCycle = false; 389 390 // Update the reservation table. 391 if (HazardRec->isEnabled()) { 392 if (!isTop() && SU->isCall) { 393 // Calls are scheduled with their preceding instructions. For bottom-up 394 // scheduling, clear the pipeline state before emitting. 395 HazardRec->Reset(); 396 } 397 HazardRec->EmitInstruction(SU); 398 } 399 400 // Update DFA model. 401 startNewCycle = ResourceModel->reserveResources(SU, isTop()); 402 403 // Check the instruction group dispatch limit. 404 // TODO: Check if this SU must end a dispatch group. 405 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 406 if (startNewCycle) { 407 LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); 408 bumpCycle(); 409 } 410 else 411 LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " 412 << CurrCycle << '\n'); 413 } 414 415 /// Release pending ready nodes in to the available queue. This makes them 416 /// visible to heuristics. 417 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { 418 // If the available queue is empty, it is safe to reset MinReadyCycle. 419 if (Available.empty()) 420 MinReadyCycle = std::numeric_limits<unsigned>::max(); 421 422 // Check to see if any of the pending instructions are ready to issue. If 423 // so, add them to the available queue. 424 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 425 SUnit *SU = *(Pending.begin()+i); 426 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 427 428 if (ReadyCycle < MinReadyCycle) 429 MinReadyCycle = ReadyCycle; 430 431 if (ReadyCycle > CurrCycle) 432 continue; 433 434 if (checkHazard(SU)) 435 continue; 436 437 Available.push(SU); 438 Pending.remove(Pending.begin()+i); 439 --i; --e; 440 } 441 CheckPending = false; 442 } 443 444 /// Remove SU from the ready set for this boundary. 445 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { 446 if (Available.isInQueue(SU)) 447 Available.remove(Available.find(SU)); 448 else { 449 assert(Pending.isInQueue(SU) && "bad ready count"); 450 Pending.remove(Pending.find(SU)); 451 } 452 } 453 454 /// If this queue only has one ready candidate, return it. As a side effect, 455 /// advance the cycle until at least one node is ready. If multiple instructions 456 /// are ready, return NULL. 457 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { 458 if (CheckPending) 459 releasePending(); 460 461 auto AdvanceCycle = [this]() { 462 if (Available.empty()) 463 return true; 464 if (Available.size() == 1 && Pending.size() > 0) 465 return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) || 466 getWeakLeft(*Available.begin(), isTop()) != 0; 467 return false; 468 }; 469 for (unsigned i = 0; AdvanceCycle(); ++i) { 470 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 471 "permanent hazard"); (void)i; 472 ResourceModel->reserveResources(nullptr, isTop()); 473 bumpCycle(); 474 releasePending(); 475 } 476 if (Available.size() == 1) 477 return *Available.begin(); 478 return nullptr; 479 } 480 481 #ifndef NDEBUG 482 void ConvergingVLIWScheduler::traceCandidate(const char *Label, 483 const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) { 484 dbgs() << Label << " " << Q.getName() << " "; 485 if (P.isValid()) 486 dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" 487 << P.getUnitInc() << " "; 488 else 489 dbgs() << " "; 490 dbgs() << "cost(" << Cost << ")\t"; 491 DAG->dumpNode(*SU); 492 } 493 494 // Very detailed queue dump, to be used with higher verbosity levels. 495 void ConvergingVLIWScheduler::readyQueueVerboseDump( 496 const RegPressureTracker &RPTracker, SchedCandidate &Candidate, 497 ReadyQueue &Q) { 498 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 499 500 dbgs() << ">>> " << Q.getName() << "\n"; 501 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 502 RegPressureDelta RPDelta; 503 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 504 DAG->getRegionCriticalPSets(), 505 DAG->getRegPressure().MaxSetPressure); 506 std::stringstream dbgstr; 507 dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; 508 dbgs() << dbgstr.str(); 509 SchedulingCost(Q, *I, Candidate, RPDelta, true); 510 dbgs() << "\t"; 511 (*I)->getInstr()->dump(); 512 } 513 dbgs() << "\n"; 514 } 515 #endif 516 517 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor 518 /// of SU, return true (we may have duplicates) 519 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { 520 if (SU->NumPredsLeft == 0) 521 return false; 522 523 for (auto &Pred : SU->Preds) { 524 // We found an available, but not scheduled, predecessor. 525 if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) 526 return false; 527 } 528 529 return true; 530 } 531 532 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor 533 /// of SU, return true (we may have duplicates) 534 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { 535 if (SU->NumSuccsLeft == 0) 536 return false; 537 538 for (auto &Succ : SU->Succs) { 539 // We found an available, but not scheduled, successor. 540 if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) 541 return false; 542 } 543 return true; 544 } 545 546 /// Check if the instruction changes the register pressure of a register in the 547 /// high pressure set. The function returns a negative value if the pressure 548 /// decreases and a positive value is the pressure increases. If the instruction 549 /// doesn't use a high pressure register or doesn't change the register 550 /// pressure, then return 0. 551 int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { 552 PressureDiff &PD = DAG->getPressureDiff(SU); 553 for (auto &P : PD) { 554 if (!P.isValid()) 555 continue; 556 // The pressure differences are computed bottom-up, so the comparision for 557 // an increase is positive in the bottom direction, but negative in the 558 // top-down direction. 559 if (HighPressureSets[P.getPSet()]) 560 return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); 561 } 562 return 0; 563 } 564 565 // Constants used to denote relative importance of 566 // heuristic components for cost computation. 567 static const unsigned PriorityOne = 200; 568 static const unsigned PriorityTwo = 50; 569 static const unsigned PriorityThree = 75; 570 static const unsigned ScaleTwo = 10; 571 572 /// Single point to compute overall scheduling cost. 573 /// TODO: More heuristics will be used soon. 574 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, 575 SchedCandidate &Candidate, 576 RegPressureDelta &Delta, 577 bool verbose) { 578 // Initial trivial priority. 579 int ResCount = 1; 580 581 // Do not waste time on a node that is already scheduled. 582 if (!SU || SU->isScheduled) 583 return ResCount; 584 585 LLVM_DEBUG(if (verbose) dbgs() 586 << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); 587 // Forced priority is high. 588 if (SU->isScheduleHigh) { 589 ResCount += PriorityOne; 590 LLVM_DEBUG(dbgs() << "H|"); 591 } 592 593 unsigned IsAvailableAmt = 0; 594 // Critical path first. 595 if (Q.getID() == TopQID) { 596 if (Top.isLatencyBound(SU)) { 597 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 598 ResCount += (SU->getHeight() * ScaleTwo); 599 } 600 601 LLVM_DEBUG(if (verbose) { 602 std::stringstream dbgstr; 603 dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; 604 dbgs() << dbgstr.str(); 605 }); 606 607 // If resources are available for it, multiply the 608 // chance of scheduling. 609 if (Top.ResourceModel->isResourceAvailable(SU, true)) { 610 IsAvailableAmt = (PriorityTwo + PriorityThree); 611 ResCount += IsAvailableAmt; 612 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 613 } else 614 LLVM_DEBUG(if (verbose) dbgs() << " |"); 615 } else { 616 if (Bot.isLatencyBound(SU)) { 617 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 618 ResCount += (SU->getDepth() * ScaleTwo); 619 } 620 621 LLVM_DEBUG(if (verbose) { 622 std::stringstream dbgstr; 623 dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; 624 dbgs() << dbgstr.str(); 625 }); 626 627 // If resources are available for it, multiply the 628 // chance of scheduling. 629 if (Bot.ResourceModel->isResourceAvailable(SU, false)) { 630 IsAvailableAmt = (PriorityTwo + PriorityThree); 631 ResCount += IsAvailableAmt; 632 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 633 } else 634 LLVM_DEBUG(if (verbose) dbgs() << " |"); 635 } 636 637 unsigned NumNodesBlocking = 0; 638 if (Q.getID() == TopQID) { 639 // How many SUs does it block from scheduling? 640 // Look at all of the successors of this node. 641 // Count the number of nodes that 642 // this node is the sole unscheduled node for. 643 if (Top.isLatencyBound(SU)) 644 for (const SDep &SI : SU->Succs) 645 if (isSingleUnscheduledPred(SI.getSUnit(), SU)) 646 ++NumNodesBlocking; 647 } else { 648 // How many unscheduled predecessors block this node? 649 if (Bot.isLatencyBound(SU)) 650 for (const SDep &PI : SU->Preds) 651 if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) 652 ++NumNodesBlocking; 653 } 654 ResCount += (NumNodesBlocking * ScaleTwo); 655 656 LLVM_DEBUG(if (verbose) { 657 std::stringstream dbgstr; 658 dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; 659 dbgs() << dbgstr.str(); 660 }); 661 662 // Factor in reg pressure as a heuristic. 663 if (!IgnoreBBRegPressure) { 664 // Decrease priority by the amount that register pressure exceeds the limit. 665 ResCount -= (Delta.Excess.getUnitInc()*PriorityOne); 666 // Decrease priority if register pressure exceeds the limit. 667 ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne); 668 // Decrease priority slightly if register pressure would increase over the 669 // current maximum. 670 ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo); 671 // If there are register pressure issues, then we remove the value added for 672 // the instruction being available. The rationale is that we really don't 673 // want to schedule an instruction that causes a spill. 674 if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 && 675 (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || 676 Delta.CurrentMax.getUnitInc())) 677 ResCount -= IsAvailableAmt; 678 LLVM_DEBUG(if (verbose) { 679 dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" 680 << Delta.CriticalMax.getUnitInc() << "/" 681 << Delta.CurrentMax.getUnitInc() << ")|"; 682 }); 683 } 684 685 // Give a little extra priority to a .cur instruction if there is a resource 686 // available for it. 687 auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>(); 688 auto &QII = *QST.getInstrInfo(); 689 if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) { 690 if (Q.getID() == TopQID && 691 Top.ResourceModel->isResourceAvailable(SU, true)) { 692 ResCount += PriorityTwo; 693 LLVM_DEBUG(if (verbose) dbgs() << "C|"); 694 } else if (Q.getID() == BotQID && 695 Bot.ResourceModel->isResourceAvailable(SU, false)) { 696 ResCount += PriorityTwo; 697 LLVM_DEBUG(if (verbose) dbgs() << "C|"); 698 } 699 } 700 701 // Give preference to a zero latency instruction if the dependent 702 // instruction is in the current packet. 703 if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) { 704 for (const SDep &PI : SU->Preds) { 705 if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && 706 PI.getLatency() == 0 && 707 Top.ResourceModel->isInPacket(PI.getSUnit())) { 708 ResCount += PriorityThree; 709 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 710 } 711 } 712 } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) { 713 for (const SDep &SI : SU->Succs) { 714 if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && 715 SI.getLatency() == 0 && 716 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 717 ResCount += PriorityThree; 718 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 719 } 720 } 721 } 722 723 // If the instruction has a non-zero latency dependence with an instruction in 724 // the current packet, then it should not be scheduled yet. The case occurs 725 // when the dependent instruction is scheduled in a new packet, so the 726 // scheduler updates the current cycle and pending instructions become 727 // available. 728 if (CheckEarlyAvail) { 729 if (Q.getID() == TopQID) { 730 for (const auto &PI : SU->Preds) { 731 if (PI.getLatency() > 0 && 732 Top.ResourceModel->isInPacket(PI.getSUnit())) { 733 ResCount -= PriorityOne; 734 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 735 } 736 } 737 } else { 738 for (const auto &SI : SU->Succs) { 739 if (SI.getLatency() > 0 && 740 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 741 ResCount -= PriorityOne; 742 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 743 } 744 } 745 } 746 } 747 748 LLVM_DEBUG(if (verbose) { 749 std::stringstream dbgstr; 750 dbgstr << "Total " << std::setw(4) << ResCount << ")"; 751 dbgs() << dbgstr.str(); 752 }); 753 754 return ResCount; 755 } 756 757 /// Pick the best candidate from the top queue. 758 /// 759 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 760 /// DAG building. To adjust for the current scheduling location we need to 761 /// maintain the number of vreg uses remaining to be top-scheduled. 762 ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: 763 pickNodeFromQueue(VLIWSchedBoundary &Zone, const RegPressureTracker &RPTracker, 764 SchedCandidate &Candidate) { 765 ReadyQueue &Q = Zone.Available; 766 LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) 767 readyQueueVerboseDump(RPTracker, Candidate, Q); 768 else Q.dump();); 769 770 // getMaxPressureDelta temporarily modifies the tracker. 771 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 772 773 // BestSU remains NULL if no top candidates beat the best existing candidate. 774 CandResult FoundCandidate = NoCand; 775 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 776 RegPressureDelta RPDelta; 777 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 778 DAG->getRegionCriticalPSets(), 779 DAG->getRegPressure().MaxSetPressure); 780 781 int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); 782 783 // Initialize the candidate if needed. 784 if (!Candidate.SU) { 785 LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); 786 Candidate.SU = *I; 787 Candidate.RPDelta = RPDelta; 788 Candidate.SCost = CurrentCost; 789 FoundCandidate = NodeOrder; 790 continue; 791 } 792 793 // Choose node order for negative cost candidates. There is no good 794 // candidate in this case. 795 if (CurrentCost < 0 && Candidate.SCost < 0) { 796 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) 797 || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 798 LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost)); 799 Candidate.SU = *I; 800 Candidate.RPDelta = RPDelta; 801 Candidate.SCost = CurrentCost; 802 FoundCandidate = NodeOrder; 803 } 804 continue; 805 } 806 807 // Best cost. 808 if (CurrentCost > Candidate.SCost) { 809 LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); 810 Candidate.SU = *I; 811 Candidate.RPDelta = RPDelta; 812 Candidate.SCost = CurrentCost; 813 FoundCandidate = BestCost; 814 continue; 815 } 816 817 // Choose an instruction that does not depend on an artificial edge. 818 unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID)); 819 unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID)); 820 if (CurrWeak != CandWeak) { 821 if (CurrWeak < CandWeak) { 822 LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost)); 823 Candidate.SU = *I; 824 Candidate.RPDelta = RPDelta; 825 Candidate.SCost = CurrentCost; 826 FoundCandidate = Weak; 827 } 828 continue; 829 } 830 831 if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) { 832 unsigned CurrSize, CandSize; 833 if (Q.getID() == TopQID) { 834 CurrSize = (*I)->Succs.size(); 835 CandSize = Candidate.SU->Succs.size(); 836 } else { 837 CurrSize = (*I)->Preds.size(); 838 CandSize = Candidate.SU->Preds.size(); 839 } 840 if (CurrSize > CandSize) { 841 LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); 842 Candidate.SU = *I; 843 Candidate.RPDelta = RPDelta; 844 Candidate.SCost = CurrentCost; 845 FoundCandidate = BestCost; 846 } 847 // Keep the old candidate if it's a better candidate. That is, don't use 848 // the subsequent tie breaker. 849 if (CurrSize != CandSize) 850 continue; 851 } 852 853 // Tie breaker. 854 // To avoid scheduling indeterminism, we need a tie breaker 855 // for the case when cost is identical for two nodes. 856 if (UseNewerCandidate && CurrentCost == Candidate.SCost) { 857 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) 858 || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 859 LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost)); 860 Candidate.SU = *I; 861 Candidate.RPDelta = RPDelta; 862 Candidate.SCost = CurrentCost; 863 FoundCandidate = NodeOrder; 864 continue; 865 } 866 } 867 868 // Fall through to original instruction order. 869 // Only consider node order if Candidate was chosen from this Q. 870 if (FoundCandidate == NoCand) 871 continue; 872 } 873 return FoundCandidate; 874 } 875 876 /// Pick the best candidate node from either the top or bottom queue. 877 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { 878 // Schedule as far as possible in the direction of no choice. This is most 879 // efficient, but also provides the best heuristics for CriticalPSets. 880 if (SUnit *SU = Bot.pickOnlyChoice()) { 881 LLVM_DEBUG(dbgs() << "Picked only Bottom\n"); 882 IsTopNode = false; 883 return SU; 884 } 885 if (SUnit *SU = Top.pickOnlyChoice()) { 886 LLVM_DEBUG(dbgs() << "Picked only Top\n"); 887 IsTopNode = true; 888 return SU; 889 } 890 SchedCandidate BotCand; 891 // Prefer bottom scheduling when heuristics are silent. 892 CandResult BotResult = pickNodeFromQueue(Bot, 893 DAG->getBotRPTracker(), BotCand); 894 assert(BotResult != NoCand && "failed to find the first candidate"); 895 896 // If either Q has a single candidate that provides the least increase in 897 // Excess pressure, we can immediately schedule from that Q. 898 // 899 // RegionCriticalPSets summarizes the pressure within the scheduled region and 900 // affects picking from either Q. If scheduling in one direction must 901 // increase pressure for one of the excess PSets, then schedule in that 902 // direction first to provide more freedom in the other direction. 903 if (BotResult == SingleExcess || BotResult == SingleCritical) { 904 LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n"); 905 IsTopNode = false; 906 return BotCand.SU; 907 } 908 // Check if the top Q has a better candidate. 909 SchedCandidate TopCand; 910 CandResult TopResult = pickNodeFromQueue(Top, 911 DAG->getTopRPTracker(), TopCand); 912 assert(TopResult != NoCand && "failed to find the first candidate"); 913 914 if (TopResult == SingleExcess || TopResult == SingleCritical) { 915 LLVM_DEBUG(dbgs() << "Prefered Top Node\n"); 916 IsTopNode = true; 917 return TopCand.SU; 918 } 919 // If either Q has a single candidate that minimizes pressure above the 920 // original region's pressure pick it. 921 if (BotResult == SingleMax) { 922 LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); 923 IsTopNode = false; 924 return BotCand.SU; 925 } 926 if (TopResult == SingleMax) { 927 LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); 928 IsTopNode = true; 929 return TopCand.SU; 930 } 931 if (TopCand.SCost > BotCand.SCost) { 932 LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n"); 933 IsTopNode = true; 934 return TopCand.SU; 935 } 936 // Otherwise prefer the bottom candidate in node order. 937 LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n"); 938 IsTopNode = false; 939 return BotCand.SU; 940 } 941 942 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 943 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { 944 if (DAG->top() == DAG->bottom()) { 945 assert(Top.Available.empty() && Top.Pending.empty() && 946 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 947 return nullptr; 948 } 949 SUnit *SU; 950 if (ForceTopDown) { 951 SU = Top.pickOnlyChoice(); 952 if (!SU) { 953 SchedCandidate TopCand; 954 CandResult TopResult = 955 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 956 assert(TopResult != NoCand && "failed to find the first candidate"); 957 (void)TopResult; 958 SU = TopCand.SU; 959 } 960 IsTopNode = true; 961 } else if (ForceBottomUp) { 962 SU = Bot.pickOnlyChoice(); 963 if (!SU) { 964 SchedCandidate BotCand; 965 CandResult BotResult = 966 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 967 assert(BotResult != NoCand && "failed to find the first candidate"); 968 (void)BotResult; 969 SU = BotCand.SU; 970 } 971 IsTopNode = false; 972 } else { 973 SU = pickNodeBidrectional(IsTopNode); 974 } 975 if (SU->isTopReady()) 976 Top.removeReady(SU); 977 if (SU->isBottomReady()) 978 Bot.removeReady(SU); 979 980 LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 981 << " Scheduling instruction in cycle " 982 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" 983 << reportPackets() << ")\n"; 984 DAG->dumpNode(*SU)); 985 return SU; 986 } 987 988 /// Update the scheduler's state after scheduling a node. This is the same node 989 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs 990 /// to update it's state based on the current cycle before MachineSchedStrategy 991 /// does. 992 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { 993 if (IsTopNode) { 994 Top.bumpNode(SU); 995 SU->TopReadyCycle = Top.CurrCycle; 996 } else { 997 Bot.bumpNode(SU); 998 SU->BotReadyCycle = Bot.CurrCycle; 999 } 1000 } 1001