1 //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // MachineScheduler schedules machine instructions after phi elimination. It 10 // preserves LiveIntervals so it can be invoked before register allocation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/VLIWMachineScheduler.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/CodeGen/DFAPacketizer.h" 17 #include "llvm/CodeGen/MachineBasicBlock.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineLoopInfo.h" 21 #include "llvm/CodeGen/RegisterClassInfo.h" 22 #include "llvm/CodeGen/RegisterPressure.h" 23 #include "llvm/CodeGen/ScheduleDAG.h" 24 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/CodeGen/TargetOpcodes.h" 27 #include "llvm/CodeGen/TargetRegisterInfo.h" 28 #include "llvm/CodeGen/TargetSchedule.h" 29 #include "llvm/CodeGen/TargetSubtargetInfo.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include <algorithm> 35 #include <cassert> 36 #include <iomanip> 37 #include <limits> 38 #include <memory> 39 #include <sstream> 40 41 using namespace llvm; 42 43 #define DEBUG_TYPE "machine-scheduler" 44 45 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden, 46 cl::ZeroOrMore, cl::init(false)); 47 48 static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden, 49 cl::ZeroOrMore, cl::init(true)); 50 51 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level", 52 cl::Hidden, cl::ZeroOrMore, 53 cl::init(1)); 54 55 // Check if the scheduler should penalize instructions that are available to 56 // early due to a zero-latency dependence. 57 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, 58 cl::ZeroOrMore, cl::init(true)); 59 60 // This value is used to determine if a register class is a high pressure set. 61 // We compute the maximum number of registers needed and divided by the total 62 // available. Then, we compare the result to this value. 63 static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden, 64 cl::init(0.75f), 65 cl::desc("High register pressure threhold.")); 66 67 VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI, 68 const TargetSchedModel *SM) 69 : TII(STI.getInstrInfo()), SchedModel(SM) { 70 ResourcesModel = createPacketizer(STI); 71 72 // This hard requirement could be relaxed, 73 // but for now do not let it proceed. 74 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); 75 76 Packet.reserve(SchedModel->getIssueWidth()); 77 Packet.clear(); 78 ResourcesModel->clearResources(); 79 } 80 81 void VLIWResourceModel::reset() { 82 Packet.clear(); 83 ResourcesModel->clearResources(); 84 } 85 86 VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; } 87 88 /// Return true if there is a dependence between SUd and SUu. 89 bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) { 90 if (SUd->Succs.size() == 0) 91 return false; 92 93 for (const auto &S : SUd->Succs) { 94 // Since we do not add pseudos to packets, might as well 95 // ignore order dependencies. 96 if (S.isCtrl()) 97 continue; 98 99 if (S.getSUnit() == SUu && S.getLatency() > 0) 100 return true; 101 } 102 return false; 103 } 104 105 /// Check if scheduling of this SU is possible 106 /// in the current packet. 107 /// It is _not_ precise (statefull), it is more like 108 /// another heuristic. Many corner cases are figured 109 /// empirically. 110 bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { 111 if (!SU || !SU->getInstr()) 112 return false; 113 114 // First see if the pipeline could receive this instruction 115 // in the current cycle. 116 switch (SU->getInstr()->getOpcode()) { 117 default: 118 if (!ResourcesModel->canReserveResources(*SU->getInstr())) 119 return false; 120 break; 121 case TargetOpcode::EXTRACT_SUBREG: 122 case TargetOpcode::INSERT_SUBREG: 123 case TargetOpcode::SUBREG_TO_REG: 124 case TargetOpcode::REG_SEQUENCE: 125 case TargetOpcode::IMPLICIT_DEF: 126 case TargetOpcode::COPY: 127 case TargetOpcode::INLINEASM: 128 case TargetOpcode::INLINEASM_BR: 129 break; 130 } 131 132 // Now see if there are no other dependencies to instructions already 133 // in the packet. 134 if (IsTop) { 135 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 136 if (hasDependence(Packet[i], SU)) 137 return false; 138 } else { 139 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 140 if (hasDependence(SU, Packet[i])) 141 return false; 142 } 143 return true; 144 } 145 146 /// Keep track of available resources. 147 bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { 148 bool startNewCycle = false; 149 // Artificially reset state. 150 if (!SU) { 151 reset(); 152 TotalPackets++; 153 return false; 154 } 155 // If this SU does not fit in the packet or the packet is now full 156 // start a new one. 157 if (!isResourceAvailable(SU, IsTop) || 158 Packet.size() >= SchedModel->getIssueWidth()) { 159 reset(); 160 TotalPackets++; 161 startNewCycle = true; 162 } 163 164 switch (SU->getInstr()->getOpcode()) { 165 default: 166 ResourcesModel->reserveResources(*SU->getInstr()); 167 break; 168 case TargetOpcode::EXTRACT_SUBREG: 169 case TargetOpcode::INSERT_SUBREG: 170 case TargetOpcode::SUBREG_TO_REG: 171 case TargetOpcode::REG_SEQUENCE: 172 case TargetOpcode::IMPLICIT_DEF: 173 case TargetOpcode::KILL: 174 case TargetOpcode::CFI_INSTRUCTION: 175 case TargetOpcode::EH_LABEL: 176 case TargetOpcode::COPY: 177 case TargetOpcode::INLINEASM: 178 case TargetOpcode::INLINEASM_BR: 179 break; 180 } 181 Packet.push_back(SU); 182 183 #ifndef NDEBUG 184 LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); 185 for (unsigned i = 0, e = Packet.size(); i != e; ++i) { 186 LLVM_DEBUG(dbgs() << "\t[" << i << "] SU("); 187 LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); 188 LLVM_DEBUG(Packet[i]->getInstr()->dump()); 189 } 190 #endif 191 192 return startNewCycle; 193 } 194 195 DFAPacketizer * 196 VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const { 197 return STI.getInstrInfo()->CreateTargetScheduleState(STI); 198 } 199 200 /// schedule - Called back from MachineScheduler::runOnMachineFunction 201 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 202 /// only includes instructions that have DAG nodes, not scheduling boundaries. 203 void VLIWMachineScheduler::schedule() { 204 LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " 205 << printMBBReference(*BB) << " " << BB->getName() 206 << " in_func " << BB->getParent()->getName() 207 << " at loop depth " << MLI->getLoopDepth(BB) << " \n"); 208 209 buildDAGWithRegPressure(); 210 211 Topo.InitDAGTopologicalSorting(); 212 213 // Postprocess the DAG to add platform-specific artificial dependencies. 214 postprocessDAG(); 215 216 SmallVector<SUnit *, 8> TopRoots, BotRoots; 217 findRootsAndBiasEdges(TopRoots, BotRoots); 218 219 // Initialize the strategy before modifying the DAG. 220 SchedImpl->initialize(this); 221 222 LLVM_DEBUG(unsigned maxH = 0; 223 for (unsigned su = 0, e = SUnits.size(); su != e; 224 ++su) if (SUnits[su].getHeight() > maxH) maxH = 225 SUnits[su].getHeight(); 226 dbgs() << "Max Height " << maxH << "\n";); 227 LLVM_DEBUG(unsigned maxD = 0; 228 for (unsigned su = 0, e = SUnits.size(); su != e; 229 ++su) if (SUnits[su].getDepth() > maxD) maxD = 230 SUnits[su].getDepth(); 231 dbgs() << "Max Depth " << maxD << "\n";); 232 LLVM_DEBUG(dump()); 233 if (ViewMISchedDAGs) 234 viewGraph(); 235 236 initQueues(TopRoots, BotRoots); 237 238 bool IsTopNode = false; 239 while (true) { 240 LLVM_DEBUG( 241 dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); 242 SUnit *SU = SchedImpl->pickNode(IsTopNode); 243 if (!SU) 244 break; 245 246 if (!checkSchedLimit()) 247 break; 248 249 scheduleMI(SU, IsTopNode); 250 251 // Notify the scheduling strategy after updating the DAG. 252 SchedImpl->schedNode(SU, IsTopNode); 253 254 updateQueues(SU, IsTopNode); 255 } 256 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 257 258 placeDebugValues(); 259 260 LLVM_DEBUG({ 261 dbgs() << "*** Final schedule for " 262 << printMBBReference(*begin()->getParent()) << " ***\n"; 263 dumpSchedule(); 264 dbgs() << '\n'; 265 }); 266 } 267 268 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { 269 DAG = static_cast<VLIWMachineScheduler *>(dag); 270 SchedModel = DAG->getSchedModel(); 271 272 Top.init(DAG, SchedModel); 273 Bot.init(DAG, SchedModel); 274 275 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 276 // are disabled, then these HazardRecs will be disabled. 277 const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); 278 const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); 279 const TargetInstrInfo *TII = STI.getInstrInfo(); 280 delete Top.HazardRec; 281 delete Bot.HazardRec; 282 Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 283 Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 284 285 delete Top.ResourceModel; 286 delete Bot.ResourceModel; 287 Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); 288 Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); 289 290 const std::vector<unsigned> &MaxPressure = 291 DAG->getRegPressure().MaxSetPressure; 292 HighPressureSets.assign(MaxPressure.size(), 0); 293 for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { 294 unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i); 295 HighPressureSets[i] = 296 ((float)MaxPressure[i] > ((float)Limit * RPThreshold)); 297 } 298 299 assert((!ForceTopDown || !ForceBottomUp) && 300 "-misched-topdown incompatible with -misched-bottomup"); 301 } 302 303 VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel( 304 const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const { 305 return new VLIWResourceModel(STI, SchedModel); 306 } 307 308 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { 309 for (const SDep &PI : SU->Preds) { 310 unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; 311 unsigned MinLatency = PI.getLatency(); 312 #ifndef NDEBUG 313 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 314 #endif 315 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 316 SU->TopReadyCycle = PredReadyCycle + MinLatency; 317 } 318 319 if (!SU->isScheduled) 320 Top.releaseNode(SU, SU->TopReadyCycle); 321 } 322 323 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { 324 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 325 326 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; 327 ++I) { 328 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 329 unsigned MinLatency = I->getLatency(); 330 #ifndef NDEBUG 331 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 332 #endif 333 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 334 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 335 } 336 337 if (!SU->isScheduled) 338 Bot.releaseNode(SU, SU->BotReadyCycle); 339 } 340 341 ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() { 342 delete ResourceModel; 343 delete HazardRec; 344 } 345 346 /// Does this SU have a hazard within the current instruction group. 347 /// 348 /// The scheduler supports two modes of hazard recognition. The first is the 349 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 350 /// supports highly complicated in-order reservation tables 351 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. 352 /// 353 /// The second is a streamlined mechanism that checks for hazards based on 354 /// simple counters that the scheduler itself maintains. It explicitly checks 355 /// for instruction dispatch limitations, including the number of micro-ops that 356 /// can dispatch per cycle. 357 /// 358 /// TODO: Also check whether the SU must start a new group. 359 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { 360 if (HazardRec->isEnabled()) 361 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 362 363 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 364 if (IssueCount + uops > SchedModel->getIssueWidth()) 365 return true; 366 367 return false; 368 } 369 370 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode( 371 SUnit *SU, unsigned ReadyCycle) { 372 if (ReadyCycle < MinReadyCycle) 373 MinReadyCycle = ReadyCycle; 374 375 // Check for interlocks first. For the purpose of other heuristics, an 376 // instruction that cannot issue appears as if it's not in the ReadyQueue. 377 if (ReadyCycle > CurrCycle || checkHazard(SU)) 378 379 Pending.push(SU); 380 else 381 Available.push(SU); 382 } 383 384 /// Move the boundary of scheduled code by one cycle. 385 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { 386 unsigned Width = SchedModel->getIssueWidth(); 387 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 388 389 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && 390 "MinReadyCycle uninitialized"); 391 unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); 392 393 if (!HazardRec->isEnabled()) { 394 // Bypass HazardRec virtual calls. 395 CurrCycle = NextCycle; 396 } else { 397 // Bypass getHazardType calls in case of long latency. 398 for (; CurrCycle != NextCycle; ++CurrCycle) { 399 if (isTop()) 400 HazardRec->AdvanceCycle(); 401 else 402 HazardRec->RecedeCycle(); 403 } 404 } 405 CheckPending = true; 406 407 LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " 408 << CurrCycle << '\n'); 409 } 410 411 /// Move the boundary of scheduled code by one SUnit. 412 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { 413 bool startNewCycle = false; 414 415 // Update the reservation table. 416 if (HazardRec->isEnabled()) { 417 if (!isTop() && SU->isCall) { 418 // Calls are scheduled with their preceding instructions. For bottom-up 419 // scheduling, clear the pipeline state before emitting. 420 HazardRec->Reset(); 421 } 422 HazardRec->EmitInstruction(SU); 423 } 424 425 // Update DFA model. 426 startNewCycle = ResourceModel->reserveResources(SU, isTop()); 427 428 // Check the instruction group dispatch limit. 429 // TODO: Check if this SU must end a dispatch group. 430 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 431 if (startNewCycle) { 432 LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); 433 bumpCycle(); 434 } else 435 LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " 436 << CurrCycle << '\n'); 437 } 438 439 /// Release pending ready nodes in to the available queue. This makes them 440 /// visible to heuristics. 441 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { 442 // If the available queue is empty, it is safe to reset MinReadyCycle. 443 if (Available.empty()) 444 MinReadyCycle = std::numeric_limits<unsigned>::max(); 445 446 // Check to see if any of the pending instructions are ready to issue. If 447 // so, add them to the available queue. 448 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 449 SUnit *SU = *(Pending.begin() + i); 450 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 451 452 if (ReadyCycle < MinReadyCycle) 453 MinReadyCycle = ReadyCycle; 454 455 if (ReadyCycle > CurrCycle) 456 continue; 457 458 if (checkHazard(SU)) 459 continue; 460 461 Available.push(SU); 462 Pending.remove(Pending.begin() + i); 463 --i; 464 --e; 465 } 466 CheckPending = false; 467 } 468 469 /// Remove SU from the ready set for this boundary. 470 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { 471 if (Available.isInQueue(SU)) 472 Available.remove(Available.find(SU)); 473 else { 474 assert(Pending.isInQueue(SU) && "bad ready count"); 475 Pending.remove(Pending.find(SU)); 476 } 477 } 478 479 /// If this queue only has one ready candidate, return it. As a side effect, 480 /// advance the cycle until at least one node is ready. If multiple instructions 481 /// are ready, return NULL. 482 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { 483 if (CheckPending) 484 releasePending(); 485 486 auto AdvanceCycle = [this]() { 487 if (Available.empty()) 488 return true; 489 if (Available.size() == 1 && Pending.size() > 0) 490 return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) || 491 getWeakLeft(*Available.begin(), isTop()) != 0; 492 return false; 493 }; 494 for (unsigned i = 0; AdvanceCycle(); ++i) { 495 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 496 "permanent hazard"); 497 (void)i; 498 ResourceModel->reserveResources(nullptr, isTop()); 499 bumpCycle(); 500 releasePending(); 501 } 502 if (Available.size() == 1) 503 return *Available.begin(); 504 return nullptr; 505 } 506 507 #ifndef NDEBUG 508 void ConvergingVLIWScheduler::traceCandidate(const char *Label, 509 const ReadyQueue &Q, SUnit *SU, 510 int Cost, PressureChange P) { 511 dbgs() << Label << " " << Q.getName() << " "; 512 if (P.isValid()) 513 dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" 514 << P.getUnitInc() << " "; 515 else 516 dbgs() << " "; 517 dbgs() << "cost(" << Cost << ")\t"; 518 DAG->dumpNode(*SU); 519 } 520 521 // Very detailed queue dump, to be used with higher verbosity levels. 522 void ConvergingVLIWScheduler::readyQueueVerboseDump( 523 const RegPressureTracker &RPTracker, SchedCandidate &Candidate, 524 ReadyQueue &Q) { 525 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 526 527 dbgs() << ">>> " << Q.getName() << "\n"; 528 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 529 RegPressureDelta RPDelta; 530 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 531 DAG->getRegionCriticalPSets(), 532 DAG->getRegPressure().MaxSetPressure); 533 std::stringstream dbgstr; 534 dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; 535 dbgs() << dbgstr.str(); 536 SchedulingCost(Q, *I, Candidate, RPDelta, true); 537 dbgs() << "\t"; 538 (*I)->getInstr()->dump(); 539 } 540 dbgs() << "\n"; 541 } 542 #endif 543 544 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor 545 /// of SU, return true (we may have duplicates) 546 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { 547 if (SU->NumPredsLeft == 0) 548 return false; 549 550 for (auto &Pred : SU->Preds) { 551 // We found an available, but not scheduled, predecessor. 552 if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) 553 return false; 554 } 555 556 return true; 557 } 558 559 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor 560 /// of SU, return true (we may have duplicates) 561 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { 562 if (SU->NumSuccsLeft == 0) 563 return false; 564 565 for (auto &Succ : SU->Succs) { 566 // We found an available, but not scheduled, successor. 567 if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) 568 return false; 569 } 570 return true; 571 } 572 573 /// Check if the instruction changes the register pressure of a register in the 574 /// high pressure set. The function returns a negative value if the pressure 575 /// decreases and a positive value is the pressure increases. If the instruction 576 /// doesn't use a high pressure register or doesn't change the register 577 /// pressure, then return 0. 578 int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { 579 PressureDiff &PD = DAG->getPressureDiff(SU); 580 for (auto &P : PD) { 581 if (!P.isValid()) 582 continue; 583 // The pressure differences are computed bottom-up, so the comparision for 584 // an increase is positive in the bottom direction, but negative in the 585 // top-down direction. 586 if (HighPressureSets[P.getPSet()]) 587 return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); 588 } 589 return 0; 590 } 591 592 /// Single point to compute overall scheduling cost. 593 /// TODO: More heuristics will be used soon. 594 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, 595 SchedCandidate &Candidate, 596 RegPressureDelta &Delta, 597 bool verbose) { 598 // Initial trivial priority. 599 int ResCount = 1; 600 601 // Do not waste time on a node that is already scheduled. 602 if (!SU || SU->isScheduled) 603 return ResCount; 604 605 LLVM_DEBUG(if (verbose) dbgs() 606 << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); 607 // Forced priority is high. 608 if (SU->isScheduleHigh) { 609 ResCount += PriorityOne; 610 LLVM_DEBUG(dbgs() << "H|"); 611 } 612 613 unsigned IsAvailableAmt = 0; 614 // Critical path first. 615 if (Q.getID() == TopQID) { 616 if (Top.isLatencyBound(SU)) { 617 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 618 ResCount += (SU->getHeight() * ScaleTwo); 619 } 620 621 LLVM_DEBUG(if (verbose) { 622 std::stringstream dbgstr; 623 dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; 624 dbgs() << dbgstr.str(); 625 }); 626 627 // If resources are available for it, multiply the 628 // chance of scheduling. 629 if (Top.ResourceModel->isResourceAvailable(SU, true)) { 630 IsAvailableAmt = (PriorityTwo + PriorityThree); 631 ResCount += IsAvailableAmt; 632 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 633 } else 634 LLVM_DEBUG(if (verbose) dbgs() << " |"); 635 } else { 636 if (Bot.isLatencyBound(SU)) { 637 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 638 ResCount += (SU->getDepth() * ScaleTwo); 639 } 640 641 LLVM_DEBUG(if (verbose) { 642 std::stringstream dbgstr; 643 dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; 644 dbgs() << dbgstr.str(); 645 }); 646 647 // If resources are available for it, multiply the 648 // chance of scheduling. 649 if (Bot.ResourceModel->isResourceAvailable(SU, false)) { 650 IsAvailableAmt = (PriorityTwo + PriorityThree); 651 ResCount += IsAvailableAmt; 652 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 653 } else 654 LLVM_DEBUG(if (verbose) dbgs() << " |"); 655 } 656 657 unsigned NumNodesBlocking = 0; 658 if (Q.getID() == TopQID) { 659 // How many SUs does it block from scheduling? 660 // Look at all of the successors of this node. 661 // Count the number of nodes that 662 // this node is the sole unscheduled node for. 663 if (Top.isLatencyBound(SU)) 664 for (const SDep &SI : SU->Succs) 665 if (isSingleUnscheduledPred(SI.getSUnit(), SU)) 666 ++NumNodesBlocking; 667 } else { 668 // How many unscheduled predecessors block this node? 669 if (Bot.isLatencyBound(SU)) 670 for (const SDep &PI : SU->Preds) 671 if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) 672 ++NumNodesBlocking; 673 } 674 ResCount += (NumNodesBlocking * ScaleTwo); 675 676 LLVM_DEBUG(if (verbose) { 677 std::stringstream dbgstr; 678 dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; 679 dbgs() << dbgstr.str(); 680 }); 681 682 // Factor in reg pressure as a heuristic. 683 if (!IgnoreBBRegPressure) { 684 // Decrease priority by the amount that register pressure exceeds the limit. 685 ResCount -= (Delta.Excess.getUnitInc() * PriorityOne); 686 // Decrease priority if register pressure exceeds the limit. 687 ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne); 688 // Decrease priority slightly if register pressure would increase over the 689 // current maximum. 690 ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo); 691 // If there are register pressure issues, then we remove the value added for 692 // the instruction being available. The rationale is that we really don't 693 // want to schedule an instruction that causes a spill. 694 if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 && 695 (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || 696 Delta.CurrentMax.getUnitInc())) 697 ResCount -= IsAvailableAmt; 698 LLVM_DEBUG(if (verbose) { 699 dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" 700 << Delta.CriticalMax.getUnitInc() << "/" 701 << Delta.CurrentMax.getUnitInc() << ")|"; 702 }); 703 } 704 705 // Give preference to a zero latency instruction if the dependent 706 // instruction is in the current packet. 707 if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) { 708 for (const SDep &PI : SU->Preds) { 709 if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && 710 PI.getLatency() == 0 && 711 Top.ResourceModel->isInPacket(PI.getSUnit())) { 712 ResCount += PriorityThree; 713 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 714 } 715 } 716 } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) { 717 for (const SDep &SI : SU->Succs) { 718 if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && 719 SI.getLatency() == 0 && 720 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 721 ResCount += PriorityThree; 722 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 723 } 724 } 725 } 726 727 // If the instruction has a non-zero latency dependence with an instruction in 728 // the current packet, then it should not be scheduled yet. The case occurs 729 // when the dependent instruction is scheduled in a new packet, so the 730 // scheduler updates the current cycle and pending instructions become 731 // available. 732 if (CheckEarlyAvail) { 733 if (Q.getID() == TopQID) { 734 for (const auto &PI : SU->Preds) { 735 if (PI.getLatency() > 0 && 736 Top.ResourceModel->isInPacket(PI.getSUnit())) { 737 ResCount -= PriorityOne; 738 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 739 } 740 } 741 } else { 742 for (const auto &SI : SU->Succs) { 743 if (SI.getLatency() > 0 && 744 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 745 ResCount -= PriorityOne; 746 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 747 } 748 } 749 } 750 } 751 752 LLVM_DEBUG(if (verbose) { 753 std::stringstream dbgstr; 754 dbgstr << "Total " << std::setw(4) << ResCount << ")"; 755 dbgs() << dbgstr.str(); 756 }); 757 758 return ResCount; 759 } 760 761 /// Pick the best candidate from the top queue. 762 /// 763 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 764 /// DAG building. To adjust for the current scheduling location we need to 765 /// maintain the number of vreg uses remaining to be top-scheduled. 766 ConvergingVLIWScheduler::CandResult 767 ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone, 768 const RegPressureTracker &RPTracker, 769 SchedCandidate &Candidate) { 770 ReadyQueue &Q = Zone.Available; 771 LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) 772 readyQueueVerboseDump(RPTracker, Candidate, Q); 773 else Q.dump();); 774 775 // getMaxPressureDelta temporarily modifies the tracker. 776 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 777 778 // BestSU remains NULL if no top candidates beat the best existing candidate. 779 CandResult FoundCandidate = NoCand; 780 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 781 RegPressureDelta RPDelta; 782 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 783 DAG->getRegionCriticalPSets(), 784 DAG->getRegPressure().MaxSetPressure); 785 786 int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); 787 788 // Initialize the candidate if needed. 789 if (!Candidate.SU) { 790 LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); 791 Candidate.SU = *I; 792 Candidate.RPDelta = RPDelta; 793 Candidate.SCost = CurrentCost; 794 FoundCandidate = NodeOrder; 795 continue; 796 } 797 798 // Choose node order for negative cost candidates. There is no good 799 // candidate in this case. 800 if (CurrentCost < 0 && Candidate.SCost < 0) { 801 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || 802 (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 803 LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost)); 804 Candidate.SU = *I; 805 Candidate.RPDelta = RPDelta; 806 Candidate.SCost = CurrentCost; 807 FoundCandidate = NodeOrder; 808 } 809 continue; 810 } 811 812 // Best cost. 813 if (CurrentCost > Candidate.SCost) { 814 LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); 815 Candidate.SU = *I; 816 Candidate.RPDelta = RPDelta; 817 Candidate.SCost = CurrentCost; 818 FoundCandidate = BestCost; 819 continue; 820 } 821 822 // Choose an instruction that does not depend on an artificial edge. 823 unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID)); 824 unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID)); 825 if (CurrWeak != CandWeak) { 826 if (CurrWeak < CandWeak) { 827 LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost)); 828 Candidate.SU = *I; 829 Candidate.RPDelta = RPDelta; 830 Candidate.SCost = CurrentCost; 831 FoundCandidate = Weak; 832 } 833 continue; 834 } 835 836 if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) { 837 unsigned CurrSize, CandSize; 838 if (Q.getID() == TopQID) { 839 CurrSize = (*I)->Succs.size(); 840 CandSize = Candidate.SU->Succs.size(); 841 } else { 842 CurrSize = (*I)->Preds.size(); 843 CandSize = Candidate.SU->Preds.size(); 844 } 845 if (CurrSize > CandSize) { 846 LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); 847 Candidate.SU = *I; 848 Candidate.RPDelta = RPDelta; 849 Candidate.SCost = CurrentCost; 850 FoundCandidate = BestCost; 851 } 852 // Keep the old candidate if it's a better candidate. That is, don't use 853 // the subsequent tie breaker. 854 if (CurrSize != CandSize) 855 continue; 856 } 857 858 // Tie breaker. 859 // To avoid scheduling indeterminism, we need a tie breaker 860 // for the case when cost is identical for two nodes. 861 if (UseNewerCandidate && CurrentCost == Candidate.SCost) { 862 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || 863 (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 864 LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost)); 865 Candidate.SU = *I; 866 Candidate.RPDelta = RPDelta; 867 Candidate.SCost = CurrentCost; 868 FoundCandidate = NodeOrder; 869 continue; 870 } 871 } 872 873 // Fall through to original instruction order. 874 // Only consider node order if Candidate was chosen from this Q. 875 if (FoundCandidate == NoCand) 876 continue; 877 } 878 return FoundCandidate; 879 } 880 881 /// Pick the best candidate node from either the top or bottom queue. 882 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { 883 // Schedule as far as possible in the direction of no choice. This is most 884 // efficient, but also provides the best heuristics for CriticalPSets. 885 if (SUnit *SU = Bot.pickOnlyChoice()) { 886 LLVM_DEBUG(dbgs() << "Picked only Bottom\n"); 887 IsTopNode = false; 888 return SU; 889 } 890 if (SUnit *SU = Top.pickOnlyChoice()) { 891 LLVM_DEBUG(dbgs() << "Picked only Top\n"); 892 IsTopNode = true; 893 return SU; 894 } 895 SchedCandidate BotCand; 896 // Prefer bottom scheduling when heuristics are silent. 897 CandResult BotResult = 898 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 899 assert(BotResult != NoCand && "failed to find the first candidate"); 900 901 // If either Q has a single candidate that provides the least increase in 902 // Excess pressure, we can immediately schedule from that Q. 903 // 904 // RegionCriticalPSets summarizes the pressure within the scheduled region and 905 // affects picking from either Q. If scheduling in one direction must 906 // increase pressure for one of the excess PSets, then schedule in that 907 // direction first to provide more freedom in the other direction. 908 if (BotResult == SingleExcess || BotResult == SingleCritical) { 909 LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n"); 910 IsTopNode = false; 911 return BotCand.SU; 912 } 913 // Check if the top Q has a better candidate. 914 SchedCandidate TopCand; 915 CandResult TopResult = 916 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 917 assert(TopResult != NoCand && "failed to find the first candidate"); 918 919 if (TopResult == SingleExcess || TopResult == SingleCritical) { 920 LLVM_DEBUG(dbgs() << "Prefered Top Node\n"); 921 IsTopNode = true; 922 return TopCand.SU; 923 } 924 // If either Q has a single candidate that minimizes pressure above the 925 // original region's pressure pick it. 926 if (BotResult == SingleMax) { 927 LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); 928 IsTopNode = false; 929 return BotCand.SU; 930 } 931 if (TopResult == SingleMax) { 932 LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); 933 IsTopNode = true; 934 return TopCand.SU; 935 } 936 if (TopCand.SCost > BotCand.SCost) { 937 LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n"); 938 IsTopNode = true; 939 return TopCand.SU; 940 } 941 // Otherwise prefer the bottom candidate in node order. 942 LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n"); 943 IsTopNode = false; 944 return BotCand.SU; 945 } 946 947 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 948 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { 949 if (DAG->top() == DAG->bottom()) { 950 assert(Top.Available.empty() && Top.Pending.empty() && 951 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 952 return nullptr; 953 } 954 SUnit *SU; 955 if (ForceTopDown) { 956 SU = Top.pickOnlyChoice(); 957 if (!SU) { 958 SchedCandidate TopCand; 959 CandResult TopResult = 960 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 961 assert(TopResult != NoCand && "failed to find the first candidate"); 962 (void)TopResult; 963 SU = TopCand.SU; 964 } 965 IsTopNode = true; 966 } else if (ForceBottomUp) { 967 SU = Bot.pickOnlyChoice(); 968 if (!SU) { 969 SchedCandidate BotCand; 970 CandResult BotResult = 971 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 972 assert(BotResult != NoCand && "failed to find the first candidate"); 973 (void)BotResult; 974 SU = BotCand.SU; 975 } 976 IsTopNode = false; 977 } else { 978 SU = pickNodeBidrectional(IsTopNode); 979 } 980 if (SU->isTopReady()) 981 Top.removeReady(SU); 982 if (SU->isBottomReady()) 983 Bot.removeReady(SU); 984 985 LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 986 << " Scheduling instruction in cycle " 987 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" 988 << reportPackets() << ")\n"; 989 DAG->dumpNode(*SU)); 990 return SU; 991 } 992 993 /// Update the scheduler's state after scheduling a node. This is the same node 994 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs 995 /// to update it's state based on the current cycle before MachineSchedStrategy 996 /// does. 997 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { 998 if (IsTopNode) { 999 Top.bumpNode(SU); 1000 SU->TopReadyCycle = Top.CurrCycle; 1001 } else { 1002 Bot.bumpNode(SU); 1003 SU->BotReadyCycle = Bot.CurrCycle; 1004 } 1005 } 1006