1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // MachineScheduler schedules machine instructions after phi elimination. It 11 // preserves LiveIntervals so it can be invoked before register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "HexagonMachineScheduler.h" 16 #include "HexagonSubtarget.h" 17 #include "llvm/CodeGen/MachineLoopInfo.h" 18 #include "llvm/CodeGen/ScheduleDAGMutation.h" 19 #include "llvm/IR/Function.h" 20 21 #include <iomanip> 22 #include <sstream> 23 24 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", 25 cl::Hidden, cl::ZeroOrMore, cl::init(false)); 26 27 static cl::opt<bool> SchedPredsCloser("sched-preds-closer", 28 cl::Hidden, cl::ZeroOrMore, cl::init(true)); 29 30 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level", 31 cl::Hidden, cl::ZeroOrMore, cl::init(1)); 32 33 static cl::opt<bool> TopUseShorterTie("top-use-shorter-tie", 34 cl::Hidden, cl::ZeroOrMore, cl::init(false)); 35 36 static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie", 37 cl::Hidden, cl::ZeroOrMore, cl::init(false)); 38 39 static cl::opt<bool> DisableTCTie("disable-tc-tie", 40 cl::Hidden, cl::ZeroOrMore, cl::init(false)); 41 42 static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization", 43 cl::Hidden, cl::ZeroOrMore, cl::init(true)); 44 45 // Check if the scheduler should penalize instructions that are available to 46 // early due to a zero-latency dependence. 47 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, 48 cl::ZeroOrMore, cl::init(true)); 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "misched" 53 54 namespace { 55 class HexagonCallMutation : public ScheduleDAGMutation { 56 public: 57 void apply(ScheduleDAGInstrs *DAG) override; 58 private: 59 bool shouldTFRICallBind(const HexagonInstrInfo &HII, 60 const SUnit &Inst1, const SUnit &Inst2) const; 61 }; 62 } // end anonymous namespace 63 64 // Check if a call and subsequent A2_tfrpi instructions should maintain 65 // scheduling affinity. We are looking for the TFRI to be consumed in 66 // the next instruction. This should help reduce the instances of 67 // double register pairs being allocated and scheduled before a call 68 // when not used until after the call. This situation is exacerbated 69 // by the fact that we allocate the pair from the callee saves list, 70 // leading to excess spills and restores. 71 bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII, 72 const SUnit &Inst1, const SUnit &Inst2) const { 73 if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi) 74 return false; 75 76 // TypeXTYPE are 64 bit operations. 77 unsigned Type = HII.getType(*Inst2.getInstr()); 78 if (Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op || 79 Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM) 80 return true; 81 return false; 82 } 83 84 void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) { 85 SUnit* LastSequentialCall = nullptr; 86 unsigned VRegHoldingRet = 0; 87 unsigned RetRegister; 88 SUnit* LastUseOfRet = nullptr; 89 auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo(); 90 auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); 91 92 // Currently we only catch the situation when compare gets scheduled 93 // before preceding call. 94 for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) { 95 // Remember the call. 96 if (DAG->SUnits[su].getInstr()->isCall()) 97 LastSequentialCall = &DAG->SUnits[su]; 98 // Look for a compare that defines a predicate. 99 else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall) 100 DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); 101 // Look for call and tfri* instructions. 102 else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 && 103 shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1])) 104 DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier)); 105 // Prevent redundant register copies between two calls, which are caused by 106 // both the return value and the argument for the next call being in %R0. 107 // Example: 108 // 1: <call1> 109 // 2: %VregX = COPY %R0 110 // 3: <use of %VregX> 111 // 4: %R0 = ... 112 // 5: <call2> 113 // The scheduler would often swap 3 and 4, so an additional register is 114 // needed. This code inserts a Barrier dependence between 3 & 4 to prevent 115 // this. The same applies for %D0 and %V0/%W0, which are also handled. 116 else if (SchedRetvalOptimization) { 117 const MachineInstr *MI = DAG->SUnits[su].getInstr(); 118 if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) || 119 MI->readsRegister(Hexagon::V0, &TRI))) { 120 // %vregX = COPY %R0 121 VRegHoldingRet = MI->getOperand(0).getReg(); 122 RetRegister = MI->getOperand(1).getReg(); 123 LastUseOfRet = nullptr; 124 } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet)) 125 // <use of %vregX> 126 LastUseOfRet = &DAG->SUnits[su]; 127 else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI)) 128 // %R0 = ... 129 DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier)); 130 } 131 } 132 } 133 134 135 /// Save the last formed packet 136 void VLIWResourceModel::savePacket() { 137 OldPacket = Packet; 138 } 139 140 /// Check if scheduling of this SU is possible 141 /// in the current packet. 142 /// It is _not_ precise (statefull), it is more like 143 /// another heuristic. Many corner cases are figured 144 /// empirically. 145 bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { 146 if (!SU || !SU->getInstr()) 147 return false; 148 149 // First see if the pipeline could receive this instruction 150 // in the current cycle. 151 switch (SU->getInstr()->getOpcode()) { 152 default: 153 if (!ResourcesModel->canReserveResources(*SU->getInstr())) 154 return false; 155 case TargetOpcode::EXTRACT_SUBREG: 156 case TargetOpcode::INSERT_SUBREG: 157 case TargetOpcode::SUBREG_TO_REG: 158 case TargetOpcode::REG_SEQUENCE: 159 case TargetOpcode::IMPLICIT_DEF: 160 case TargetOpcode::COPY: 161 case TargetOpcode::INLINEASM: 162 break; 163 } 164 165 MachineFunction &MF = *SU->getInstr()->getParent()->getParent(); 166 auto &QII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); 167 168 // Now see if there are no other dependencies to instructions already 169 // in the packet. 170 for (unsigned i = 0, e = Packet.size(); i != e; ++i) { 171 if (Packet[i]->Succs.size() == 0) 172 continue; 173 174 // Enable .cur formation. 175 if (QII.mayBeCurLoad(*Packet[i]->getInstr())) 176 continue; 177 178 for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), 179 E = Packet[i]->Succs.end(); I != E; ++I) { 180 // Since we do not add pseudos to packets, might as well 181 // ignore order dependencies. 182 if (I->isCtrl()) 183 continue; 184 185 if (I->getSUnit() == SU) 186 return false; 187 } 188 } 189 return true; 190 } 191 192 /// Keep track of available resources. 193 bool VLIWResourceModel::reserveResources(SUnit *SU) { 194 bool startNewCycle = false; 195 // Artificially reset state. 196 if (!SU) { 197 ResourcesModel->clearResources(); 198 savePacket(); 199 Packet.clear(); 200 TotalPackets++; 201 return false; 202 } 203 // If this SU does not fit in the packet 204 // start a new one. 205 if (!isResourceAvailable(SU)) { 206 ResourcesModel->clearResources(); 207 savePacket(); 208 Packet.clear(); 209 TotalPackets++; 210 startNewCycle = true; 211 } 212 213 switch (SU->getInstr()->getOpcode()) { 214 default: 215 ResourcesModel->reserveResources(*SU->getInstr()); 216 break; 217 case TargetOpcode::EXTRACT_SUBREG: 218 case TargetOpcode::INSERT_SUBREG: 219 case TargetOpcode::SUBREG_TO_REG: 220 case TargetOpcode::REG_SEQUENCE: 221 case TargetOpcode::IMPLICIT_DEF: 222 case TargetOpcode::KILL: 223 case TargetOpcode::CFI_INSTRUCTION: 224 case TargetOpcode::EH_LABEL: 225 case TargetOpcode::COPY: 226 case TargetOpcode::INLINEASM: 227 break; 228 } 229 Packet.push_back(SU); 230 231 #ifndef NDEBUG 232 DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); 233 for (unsigned i = 0, e = Packet.size(); i != e; ++i) { 234 DEBUG(dbgs() << "\t[" << i << "] SU("); 235 DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); 236 DEBUG(Packet[i]->getInstr()->dump()); 237 } 238 #endif 239 240 // If packet is now full, reset the state so in the next cycle 241 // we start fresh. 242 if (Packet.size() >= SchedModel->getIssueWidth()) { 243 ResourcesModel->clearResources(); 244 savePacket(); 245 Packet.clear(); 246 TotalPackets++; 247 startNewCycle = true; 248 } 249 250 return startNewCycle; 251 } 252 253 /// schedule - Called back from MachineScheduler::runOnMachineFunction 254 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 255 /// only includes instructions that have DAG nodes, not scheduling boundaries. 256 void VLIWMachineScheduler::schedule() { 257 DEBUG(dbgs() 258 << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber() 259 << " " << BB->getName() 260 << " in_func " << BB->getParent()->getFunction()->getName() 261 << " at loop depth " << MLI->getLoopDepth(BB) 262 << " \n"); 263 264 buildDAGWithRegPressure(); 265 266 SmallVector<SUnit*, 8> TopRoots, BotRoots; 267 findRootsAndBiasEdges(TopRoots, BotRoots); 268 269 // Initialize the strategy before modifying the DAG. 270 SchedImpl->initialize(this); 271 272 DEBUG(unsigned maxH = 0; 273 for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 274 if (SUnits[su].getHeight() > maxH) 275 maxH = SUnits[su].getHeight(); 276 dbgs() << "Max Height " << maxH << "\n";); 277 DEBUG(unsigned maxD = 0; 278 for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 279 if (SUnits[su].getDepth() > maxD) 280 maxD = SUnits[su].getDepth(); 281 dbgs() << "Max Depth " << maxD << "\n";); 282 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 283 SUnits[su].dumpAll(this)); 284 285 initQueues(TopRoots, BotRoots); 286 287 bool IsTopNode = false; 288 while (true) { 289 DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); 290 SUnit *SU = SchedImpl->pickNode(IsTopNode); 291 if (!SU) break; 292 293 if (!checkSchedLimit()) 294 break; 295 296 scheduleMI(SU, IsTopNode); 297 298 updateQueues(SU, IsTopNode); 299 300 // Notify the scheduling strategy after updating the DAG. 301 SchedImpl->schedNode(SU, IsTopNode); 302 } 303 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 304 305 placeDebugValues(); 306 307 DEBUG({ 308 unsigned BBNum = begin()->getParent()->getNumber(); 309 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 310 dumpSchedule(); 311 dbgs() << '\n'; 312 }); 313 } 314 315 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { 316 DAG = static_cast<VLIWMachineScheduler*>(dag); 317 SchedModel = DAG->getSchedModel(); 318 319 Top.init(DAG, SchedModel); 320 Bot.init(DAG, SchedModel); 321 322 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 323 // are disabled, then these HazardRecs will be disabled. 324 const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); 325 const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); 326 const TargetInstrInfo *TII = STI.getInstrInfo(); 327 delete Top.HazardRec; 328 delete Bot.HazardRec; 329 Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 330 Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 331 332 delete Top.ResourceModel; 333 delete Bot.ResourceModel; 334 Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); 335 Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); 336 337 assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) && 338 "-misched-topdown incompatible with -misched-bottomup"); 339 340 DAG->addMutation(make_unique<HexagonSubtarget::HexagonDAGMutation>()); 341 DAG->addMutation(make_unique<HexagonCallMutation>()); 342 } 343 344 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { 345 if (SU->isScheduled) 346 return; 347 348 for (const SDep &PI : SU->Preds) { 349 unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; 350 unsigned MinLatency = PI.getLatency(); 351 #ifndef NDEBUG 352 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 353 #endif 354 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 355 SU->TopReadyCycle = PredReadyCycle + MinLatency; 356 } 357 Top.releaseNode(SU, SU->TopReadyCycle); 358 } 359 360 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { 361 if (SU->isScheduled) 362 return; 363 364 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 365 366 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 367 I != E; ++I) { 368 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 369 unsigned MinLatency = I->getLatency(); 370 #ifndef NDEBUG 371 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 372 #endif 373 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 374 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 375 } 376 Bot.releaseNode(SU, SU->BotReadyCycle); 377 } 378 379 /// Does this SU have a hazard within the current instruction group. 380 /// 381 /// The scheduler supports two modes of hazard recognition. The first is the 382 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 383 /// supports highly complicated in-order reservation tables 384 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. 385 /// 386 /// The second is a streamlined mechanism that checks for hazards based on 387 /// simple counters that the scheduler itself maintains. It explicitly checks 388 /// for instruction dispatch limitations, including the number of micro-ops that 389 /// can dispatch per cycle. 390 /// 391 /// TODO: Also check whether the SU must start a new group. 392 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { 393 if (HazardRec->isEnabled()) 394 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 395 396 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 397 if (IssueCount + uops > SchedModel->getIssueWidth()) 398 return true; 399 400 return false; 401 } 402 403 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU, 404 unsigned ReadyCycle) { 405 if (ReadyCycle < MinReadyCycle) 406 MinReadyCycle = ReadyCycle; 407 408 // Check for interlocks first. For the purpose of other heuristics, an 409 // instruction that cannot issue appears as if it's not in the ReadyQueue. 410 if (ReadyCycle > CurrCycle || checkHazard(SU)) 411 412 Pending.push(SU); 413 else 414 Available.push(SU); 415 } 416 417 /// Move the boundary of scheduled code by one cycle. 418 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { 419 unsigned Width = SchedModel->getIssueWidth(); 420 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 421 422 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 423 unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); 424 425 if (!HazardRec->isEnabled()) { 426 // Bypass HazardRec virtual calls. 427 CurrCycle = NextCycle; 428 } else { 429 // Bypass getHazardType calls in case of long latency. 430 for (; CurrCycle != NextCycle; ++CurrCycle) { 431 if (isTop()) 432 HazardRec->AdvanceCycle(); 433 else 434 HazardRec->RecedeCycle(); 435 } 436 } 437 CheckPending = true; 438 439 DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " 440 << CurrCycle << '\n'); 441 } 442 443 /// Move the boundary of scheduled code by one SUnit. 444 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { 445 bool startNewCycle = false; 446 447 // Update the reservation table. 448 if (HazardRec->isEnabled()) { 449 if (!isTop() && SU->isCall) { 450 // Calls are scheduled with their preceding instructions. For bottom-up 451 // scheduling, clear the pipeline state before emitting. 452 HazardRec->Reset(); 453 } 454 HazardRec->EmitInstruction(SU); 455 } 456 457 // Update DFA model. 458 startNewCycle = ResourceModel->reserveResources(SU); 459 460 // Check the instruction group dispatch limit. 461 // TODO: Check if this SU must end a dispatch group. 462 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 463 if (startNewCycle) { 464 DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); 465 bumpCycle(); 466 } 467 else 468 DEBUG(dbgs() << "*** IssueCount " << IssueCount 469 << " at cycle " << CurrCycle << '\n'); 470 } 471 472 /// Release pending ready nodes in to the available queue. This makes them 473 /// visible to heuristics. 474 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { 475 // If the available queue is empty, it is safe to reset MinReadyCycle. 476 if (Available.empty()) 477 MinReadyCycle = UINT_MAX; 478 479 // Check to see if any of the pending instructions are ready to issue. If 480 // so, add them to the available queue. 481 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 482 SUnit *SU = *(Pending.begin()+i); 483 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 484 485 if (ReadyCycle < MinReadyCycle) 486 MinReadyCycle = ReadyCycle; 487 488 if (ReadyCycle > CurrCycle) 489 continue; 490 491 if (checkHazard(SU)) 492 continue; 493 494 Available.push(SU); 495 Pending.remove(Pending.begin()+i); 496 --i; --e; 497 } 498 CheckPending = false; 499 } 500 501 /// Remove SU from the ready set for this boundary. 502 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { 503 if (Available.isInQueue(SU)) 504 Available.remove(Available.find(SU)); 505 else { 506 assert(Pending.isInQueue(SU) && "bad ready count"); 507 Pending.remove(Pending.find(SU)); 508 } 509 } 510 511 /// If this queue only has one ready candidate, return it. As a side effect, 512 /// advance the cycle until at least one node is ready. If multiple instructions 513 /// are ready, return NULL. 514 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { 515 if (CheckPending) 516 releasePending(); 517 518 for (unsigned i = 0; Available.empty(); ++i) { 519 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 520 "permanent hazard"); (void)i; 521 ResourceModel->reserveResources(nullptr); 522 bumpCycle(); 523 releasePending(); 524 } 525 if (Available.size() == 1) 526 return *Available.begin(); 527 return nullptr; 528 } 529 530 #ifndef NDEBUG 531 void ConvergingVLIWScheduler::traceCandidate(const char *Label, 532 const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) { 533 dbgs() << Label << " " << Q.getName() << " "; 534 if (P.isValid()) 535 dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" 536 << P.getUnitInc() << " "; 537 else 538 dbgs() << " "; 539 dbgs() << "cost(" << Cost << ")\t"; 540 SU->dump(DAG); 541 } 542 543 // Very detailed queue dump, to be used with higher verbosity levels. 544 void ConvergingVLIWScheduler::readyQueueVerboseDump( 545 const RegPressureTracker &RPTracker, SchedCandidate &Candidate, 546 ReadyQueue &Q) { 547 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 548 549 dbgs() << ">>> " << Q.getName() << "\n"; 550 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 551 RegPressureDelta RPDelta; 552 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 553 DAG->getRegionCriticalPSets(), 554 DAG->getRegPressure().MaxSetPressure); 555 std::stringstream dbgstr; 556 dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; 557 dbgs() << dbgstr.str(); 558 SchedulingCost(Q, *I, Candidate, RPDelta, true); 559 dbgs() << "\t"; 560 (*I)->getInstr()->dump(); 561 } 562 dbgs() << "\n"; 563 } 564 #endif 565 566 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor 567 /// of SU, return true (we may have duplicates) 568 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { 569 if (SU->NumPredsLeft == 0) 570 return false; 571 572 for (auto &Pred : SU->Preds) { 573 // We found an available, but not scheduled, predecessor. 574 if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) 575 return false; 576 } 577 578 return true; 579 } 580 581 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor 582 /// of SU, return true (we may have duplicates) 583 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { 584 if (SU->NumSuccsLeft == 0) 585 return false; 586 587 for (auto &Succ : SU->Succs) { 588 // We found an available, but not scheduled, successor. 589 if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) 590 return false; 591 } 592 return true; 593 } 594 595 // Constants used to denote relative importance of 596 // heuristic components for cost computation. 597 static const unsigned PriorityOne = 200; 598 static const unsigned PriorityTwo = 50; 599 static const unsigned PriorityThree = 75; 600 static const unsigned ScaleTwo = 10; 601 static const unsigned FactorOne = 2; 602 603 /// Single point to compute overall scheduling cost. 604 /// TODO: More heuristics will be used soon. 605 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, 606 SchedCandidate &Candidate, 607 RegPressureDelta &Delta, 608 bool verbose) { 609 // Initial trivial priority. 610 int ResCount = 1; 611 612 // Do not waste time on a node that is already scheduled. 613 if (!SU || SU->isScheduled) 614 return ResCount; 615 616 MachineInstr &Instr = *SU->getInstr(); 617 618 DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); 619 // Forced priority is high. 620 if (SU->isScheduleHigh) { 621 ResCount += PriorityOne; 622 DEBUG(dbgs() << "H|"); 623 } 624 625 // Critical path first. 626 if (Q.getID() == TopQID) { 627 ResCount += (SU->getHeight() * ScaleTwo); 628 629 DEBUG(if (verbose) { 630 std::stringstream dbgstr; 631 dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; 632 dbgs() << dbgstr.str(); 633 }); 634 635 // If resources are available for it, multiply the 636 // chance of scheduling. 637 if (Top.ResourceModel->isResourceAvailable(SU)) { 638 ResCount <<= FactorOne; 639 ResCount += PriorityThree; 640 DEBUG(if (verbose) dbgs() << "A|"); 641 } else 642 DEBUG(if (verbose) dbgs() << " |"); 643 } else { 644 ResCount += (SU->getDepth() * ScaleTwo); 645 646 DEBUG(if (verbose) { 647 std::stringstream dbgstr; 648 dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; 649 dbgs() << dbgstr.str(); 650 }); 651 652 // If resources are available for it, multiply the 653 // chance of scheduling. 654 if (Bot.ResourceModel->isResourceAvailable(SU)) { 655 ResCount <<= FactorOne; 656 ResCount += PriorityThree; 657 DEBUG(if (verbose) dbgs() << "A|"); 658 } else 659 DEBUG(if (verbose) dbgs() << " |"); 660 } 661 662 unsigned NumNodesBlocking = 0; 663 if (Q.getID() == TopQID) { 664 // How many SUs does it block from scheduling? 665 // Look at all of the successors of this node. 666 // Count the number of nodes that 667 // this node is the sole unscheduled node for. 668 for (const SDep &SI : SU->Succs) 669 if (isSingleUnscheduledPred(SI.getSUnit(), SU)) 670 ++NumNodesBlocking; 671 } else { 672 // How many unscheduled predecessors block this node? 673 for (const SDep &PI : SU->Preds) 674 if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) 675 ++NumNodesBlocking; 676 } 677 ResCount += (NumNodesBlocking * ScaleTwo); 678 679 DEBUG(if (verbose) { 680 std::stringstream dbgstr; 681 dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; 682 dbgs() << dbgstr.str(); 683 }); 684 685 // Factor in reg pressure as a heuristic. 686 if (!IgnoreBBRegPressure) { 687 // Decrease priority by the amount that register pressure exceeds the limit. 688 ResCount -= (Delta.Excess.getUnitInc()*PriorityOne); 689 // Decrease priority if register pressure exceeds the limit. 690 ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne); 691 // Decrease priority slightly if register pressure would increase over the 692 // current maximum. 693 ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo); 694 DEBUG(if (verbose) { 695 dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" 696 << Delta.CriticalMax.getUnitInc() <<"/" 697 << Delta.CurrentMax.getUnitInc() << ")|"; 698 }); 699 } 700 701 // Give a little extra priority to a .cur instruction if there is a resource 702 // available for it. 703 auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>(); 704 auto &QII = *QST.getInstrInfo(); 705 if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) { 706 if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) { 707 ResCount += PriorityTwo; 708 DEBUG(if (verbose) dbgs() << "C|"); 709 } else if (Q.getID() == BotQID && 710 Bot.ResourceModel->isResourceAvailable(SU)) { 711 ResCount += PriorityTwo; 712 DEBUG(if (verbose) dbgs() << "C|"); 713 } 714 } 715 716 // Give preference to a zero latency instruction if the dependent 717 // instruction is in the current packet. 718 if (Q.getID() == TopQID) { 719 for (const SDep &PI : SU->Preds) { 720 if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && 721 PI.getLatency() == 0 && 722 Top.ResourceModel->isInPacket(PI.getSUnit())) { 723 ResCount += PriorityThree; 724 DEBUG(if (verbose) dbgs() << "Z|"); 725 } 726 } 727 } else { 728 for (const SDep &SI : SU->Succs) { 729 if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && 730 SI.getLatency() == 0 && 731 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 732 ResCount += PriorityThree; 733 DEBUG(if (verbose) dbgs() << "Z|"); 734 } 735 } 736 } 737 738 // Give less preference to an instruction that will cause a stall with 739 // an instruction in the previous packet. 740 if (QII.isHVXVec(Instr)) { 741 // Check for stalls in the previous packet. 742 if (Q.getID() == TopQID) { 743 for (auto J : Top.ResourceModel->OldPacket) 744 if (QII.producesStall(*J->getInstr(), Instr)) 745 ResCount -= PriorityOne; 746 } else { 747 for (auto J : Bot.ResourceModel->OldPacket) 748 if (QII.producesStall(Instr, *J->getInstr())) 749 ResCount -= PriorityOne; 750 } 751 } 752 753 // If the instruction has a non-zero latency dependence with an instruction in 754 // the current packet, then it should not be scheduled yet. The case occurs 755 // when the dependent instruction is scheduled in a new packet, so the 756 // scheduler updates the current cycle and pending instructions become 757 // available. 758 if (CheckEarlyAvail) { 759 if (Q.getID() == TopQID) { 760 for (const auto &PI : SU->Preds) { 761 if (PI.getLatency() > 0 && 762 Top.ResourceModel->isInPacket(PI.getSUnit())) { 763 ResCount -= PriorityOne; 764 DEBUG(if (verbose) dbgs() << "D|"); 765 } 766 } 767 } else { 768 for (const auto &SI : SU->Succs) { 769 if (SI.getLatency() > 0 && 770 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 771 ResCount -= PriorityOne; 772 DEBUG(if (verbose) dbgs() << "D|"); 773 } 774 } 775 } 776 } 777 778 DEBUG(if (verbose) { 779 std::stringstream dbgstr; 780 dbgstr << "Total " << std::setw(4) << ResCount << ")"; 781 dbgs() << dbgstr.str(); 782 }); 783 784 return ResCount; 785 } 786 787 /// Pick the best candidate from the top queue. 788 /// 789 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 790 /// DAG building. To adjust for the current scheduling location we need to 791 /// maintain the number of vreg uses remaining to be top-scheduled. 792 ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: 793 pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, 794 SchedCandidate &Candidate) { 795 DEBUG(if (SchedDebugVerboseLevel > 1) 796 readyQueueVerboseDump(RPTracker, Candidate, Q); 797 else Q.dump();); 798 799 // getMaxPressureDelta temporarily modifies the tracker. 800 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 801 802 // BestSU remains NULL if no top candidates beat the best existing candidate. 803 CandResult FoundCandidate = NoCand; 804 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 805 RegPressureDelta RPDelta; 806 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 807 DAG->getRegionCriticalPSets(), 808 DAG->getRegPressure().MaxSetPressure); 809 810 int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); 811 812 // Initialize the candidate if needed. 813 if (!Candidate.SU) { 814 DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); 815 Candidate.SU = *I; 816 Candidate.RPDelta = RPDelta; 817 Candidate.SCost = CurrentCost; 818 FoundCandidate = NodeOrder; 819 continue; 820 } 821 822 // Best cost. 823 if (CurrentCost > Candidate.SCost) { 824 DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); 825 Candidate.SU = *I; 826 Candidate.RPDelta = RPDelta; 827 Candidate.SCost = CurrentCost; 828 FoundCandidate = BestCost; 829 continue; 830 } 831 832 // Tie breaker using Timing Class. 833 if (!DisableTCTie) { 834 auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>(); 835 auto &QII = *QST.getInstrInfo(); 836 837 const MachineInstr *MI = (*I)->getInstr(); 838 const MachineInstr *CandI = Candidate.SU->getInstr(); 839 const InstrItineraryData *InstrItins = QST.getInstrItineraryData(); 840 841 unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, *MI); 842 unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, *CandI); 843 DEBUG(dbgs() << "TC Tie Breaker Cand: " 844 << CandLatency << " Instr:" << InstrLatency << "\n" 845 << *MI << *CandI << "\n"); 846 if (Q.getID() == TopQID && CurrentCost == Candidate.SCost) { 847 if (InstrLatency < CandLatency && TopUseShorterTie) { 848 Candidate.SU = *I; 849 Candidate.RPDelta = RPDelta; 850 Candidate.SCost = CurrentCost; 851 FoundCandidate = BestCost; 852 DEBUG(dbgs() << "Used top shorter tie breaker\n"); 853 continue; 854 } else if (InstrLatency > CandLatency && !TopUseShorterTie) { 855 Candidate.SU = *I; 856 Candidate.RPDelta = RPDelta; 857 Candidate.SCost = CurrentCost; 858 FoundCandidate = BestCost; 859 DEBUG(dbgs() << "Used top longer tie breaker\n"); 860 continue; 861 } 862 } else if (Q.getID() == BotQID && CurrentCost == Candidate.SCost) { 863 if (InstrLatency < CandLatency && BotUseShorterTie) { 864 Candidate.SU = *I; 865 Candidate.RPDelta = RPDelta; 866 Candidate.SCost = CurrentCost; 867 FoundCandidate = BestCost; 868 DEBUG(dbgs() << "Used Bot shorter tie breaker\n"); 869 continue; 870 } else if (InstrLatency > CandLatency && !BotUseShorterTie) { 871 Candidate.SU = *I; 872 Candidate.RPDelta = RPDelta; 873 Candidate.SCost = CurrentCost; 874 FoundCandidate = BestCost; 875 DEBUG(dbgs() << "Used Bot longer tie breaker\n"); 876 continue; 877 } 878 } 879 } 880 881 if (CurrentCost == Candidate.SCost) { 882 if ((Q.getID() == TopQID && 883 (*I)->Succs.size() > Candidate.SU->Succs.size()) || 884 (Q.getID() == BotQID && 885 (*I)->Preds.size() < Candidate.SU->Preds.size())) { 886 DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); 887 Candidate.SU = *I; 888 Candidate.RPDelta = RPDelta; 889 Candidate.SCost = CurrentCost; 890 FoundCandidate = BestCost; 891 continue; 892 } 893 } 894 895 // Fall through to original instruction order. 896 // Only consider node order if Candidate was chosen from this Q. 897 if (FoundCandidate == NoCand) 898 continue; 899 } 900 return FoundCandidate; 901 } 902 903 /// Pick the best candidate node from either the top or bottom queue. 904 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { 905 // Schedule as far as possible in the direction of no choice. This is most 906 // efficient, but also provides the best heuristics for CriticalPSets. 907 if (SUnit *SU = Bot.pickOnlyChoice()) { 908 DEBUG(dbgs() << "Picked only Bottom\n"); 909 IsTopNode = false; 910 return SU; 911 } 912 if (SUnit *SU = Top.pickOnlyChoice()) { 913 DEBUG(dbgs() << "Picked only Top\n"); 914 IsTopNode = true; 915 return SU; 916 } 917 SchedCandidate BotCand; 918 // Prefer bottom scheduling when heuristics are silent. 919 CandResult BotResult = pickNodeFromQueue(Bot.Available, 920 DAG->getBotRPTracker(), BotCand); 921 assert(BotResult != NoCand && "failed to find the first candidate"); 922 923 // If either Q has a single candidate that provides the least increase in 924 // Excess pressure, we can immediately schedule from that Q. 925 // 926 // RegionCriticalPSets summarizes the pressure within the scheduled region and 927 // affects picking from either Q. If scheduling in one direction must 928 // increase pressure for one of the excess PSets, then schedule in that 929 // direction first to provide more freedom in the other direction. 930 if (BotResult == SingleExcess || BotResult == SingleCritical) { 931 DEBUG(dbgs() << "Prefered Bottom Node\n"); 932 IsTopNode = false; 933 return BotCand.SU; 934 } 935 // Check if the top Q has a better candidate. 936 SchedCandidate TopCand; 937 CandResult TopResult = pickNodeFromQueue(Top.Available, 938 DAG->getTopRPTracker(), TopCand); 939 assert(TopResult != NoCand && "failed to find the first candidate"); 940 941 if (TopResult == SingleExcess || TopResult == SingleCritical) { 942 DEBUG(dbgs() << "Prefered Top Node\n"); 943 IsTopNode = true; 944 return TopCand.SU; 945 } 946 // If either Q has a single candidate that minimizes pressure above the 947 // original region's pressure pick it. 948 if (BotResult == SingleMax) { 949 DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); 950 IsTopNode = false; 951 return BotCand.SU; 952 } 953 if (TopResult == SingleMax) { 954 DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); 955 IsTopNode = true; 956 return TopCand.SU; 957 } 958 if (TopCand.SCost > BotCand.SCost) { 959 DEBUG(dbgs() << "Prefered Top Node Cost\n"); 960 IsTopNode = true; 961 return TopCand.SU; 962 } 963 // Otherwise prefer the bottom candidate in node order. 964 DEBUG(dbgs() << "Prefered Bottom in Node order\n"); 965 IsTopNode = false; 966 return BotCand.SU; 967 } 968 969 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 970 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { 971 if (DAG->top() == DAG->bottom()) { 972 assert(Top.Available.empty() && Top.Pending.empty() && 973 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 974 return nullptr; 975 } 976 SUnit *SU; 977 if (llvm::ForceTopDown) { 978 SU = Top.pickOnlyChoice(); 979 if (!SU) { 980 SchedCandidate TopCand; 981 CandResult TopResult = 982 pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); 983 assert(TopResult != NoCand && "failed to find the first candidate"); 984 (void)TopResult; 985 SU = TopCand.SU; 986 } 987 IsTopNode = true; 988 } else if (llvm::ForceBottomUp) { 989 SU = Bot.pickOnlyChoice(); 990 if (!SU) { 991 SchedCandidate BotCand; 992 CandResult BotResult = 993 pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); 994 assert(BotResult != NoCand && "failed to find the first candidate"); 995 (void)BotResult; 996 SU = BotCand.SU; 997 } 998 IsTopNode = false; 999 } else { 1000 SU = pickNodeBidrectional(IsTopNode); 1001 } 1002 if (SU->isTopReady()) 1003 Top.removeReady(SU); 1004 if (SU->isBottomReady()) 1005 Bot.removeReady(SU); 1006 1007 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 1008 << " Scheduling Instruction in cycle " 1009 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; 1010 SU->dump(DAG)); 1011 return SU; 1012 } 1013 1014 /// Update the scheduler's state after scheduling a node. This is the same node 1015 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs 1016 /// to update it's state based on the current cycle before MachineSchedStrategy 1017 /// does. 1018 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { 1019 if (IsTopNode) { 1020 SU->TopReadyCycle = Top.CurrCycle; 1021 Top.bumpNode(SU); 1022 } else { 1023 SU->BotReadyCycle = Bot.CurrCycle; 1024 Bot.bumpNode(SU); 1025 } 1026 } 1027