1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "HexagonMachineScheduler.h"
15 #include "HexagonInstrInfo.h"
16 #include "HexagonSubtarget.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/CodeGen/DFAPacketizer.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstr.h"
22 #include "llvm/CodeGen/MachineLoopInfo.h"
23 #include "llvm/CodeGen/RegisterClassInfo.h"
24 #include "llvm/CodeGen/RegisterPressure.h"
25 #include "llvm/CodeGen/ScheduleDAG.h"
26 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
27 #include "llvm/CodeGen/TargetInstrInfo.h"
28 #include "llvm/CodeGen/TargetOpcodes.h"
29 #include "llvm/CodeGen/TargetRegisterInfo.h"
30 #include "llvm/CodeGen/TargetSchedule.h"
31 #include "llvm/CodeGen/TargetSubtargetInfo.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cassert>
38 #include <iomanip>
39 #include <limits>
40 #include <memory>
41 #include <sstream>
42 
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "machine-scheduler"
46 
47 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
48     cl::Hidden, cl::ZeroOrMore, cl::init(false));
49 
50 static cl::opt<bool> UseNewerCandidate("use-newer-candidate",
51     cl::Hidden, cl::ZeroOrMore, cl::init(true));
52 
53 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
54     cl::Hidden, cl::ZeroOrMore, cl::init(1));
55 
56 // Check if the scheduler should penalize instructions that are available to
57 // early due to a zero-latency dependence.
58 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
59     cl::ZeroOrMore, cl::init(true));
60 
61 // This value is used to determine if a register class is a high pressure set.
62 // We compute the maximum number of registers needed and divided by the total
63 // available. Then, we compare the result to this value.
64 static cl::opt<float> RPThreshold("hexagon-reg-pressure", cl::Hidden,
65     cl::init(0.75f), cl::desc("High register pressure threhold."));
66 
67 /// Return true if there is a dependence between SUd and SUu.
68 static bool hasDependence(const SUnit *SUd, const SUnit *SUu,
69                           const HexagonInstrInfo &QII) {
70   if (SUd->Succs.size() == 0)
71     return false;
72 
73   // Enable .cur formation.
74   if (QII.mayBeCurLoad(*SUd->getInstr()))
75     return false;
76 
77   if (QII.canExecuteInBundle(*SUd->getInstr(), *SUu->getInstr()))
78     return false;
79 
80   for (const auto &S : SUd->Succs) {
81     // Since we do not add pseudos to packets, might as well
82     // ignore order dependencies.
83     if (S.isCtrl())
84       continue;
85 
86     if (S.getSUnit() == SUu && S.getLatency() > 0)
87       return true;
88   }
89   return false;
90 }
91 
92 /// Check if scheduling of this SU is possible
93 /// in the current packet.
94 /// It is _not_ precise (statefull), it is more like
95 /// another heuristic. Many corner cases are figured
96 /// empirically.
97 bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
98   if (!SU || !SU->getInstr())
99     return false;
100 
101   // First see if the pipeline could receive this instruction
102   // in the current cycle.
103   switch (SU->getInstr()->getOpcode()) {
104   default:
105     if (!ResourcesModel->canReserveResources(*SU->getInstr()))
106       return false;
107     break;
108   case TargetOpcode::EXTRACT_SUBREG:
109   case TargetOpcode::INSERT_SUBREG:
110   case TargetOpcode::SUBREG_TO_REG:
111   case TargetOpcode::REG_SEQUENCE:
112   case TargetOpcode::IMPLICIT_DEF:
113   case TargetOpcode::COPY:
114   case TargetOpcode::INLINEASM:
115   case TargetOpcode::INLINEASM_BR:
116     break;
117   }
118 
119   MachineBasicBlock *MBB = SU->getInstr()->getParent();
120   auto &QST = MBB->getParent()->getSubtarget<HexagonSubtarget>();
121   const auto &QII = *QST.getInstrInfo();
122 
123   // Now see if there are no other dependencies to instructions already
124   // in the packet.
125   if (IsTop) {
126     for (unsigned i = 0, e = Packet.size(); i != e; ++i)
127       if (hasDependence(Packet[i], SU, QII))
128         return false;
129   } else {
130     for (unsigned i = 0, e = Packet.size(); i != e; ++i)
131       if (hasDependence(SU, Packet[i], QII))
132         return false;
133   }
134   return true;
135 }
136 
137 /// Keep track of available resources.
138 bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
139   bool startNewCycle = false;
140   // Artificially reset state.
141   if (!SU) {
142     ResourcesModel->clearResources();
143     Packet.clear();
144     TotalPackets++;
145     return false;
146   }
147   // If this SU does not fit in the packet or the packet is now full
148   // start a new one.
149   if (!isResourceAvailable(SU, IsTop) ||
150       Packet.size() >= SchedModel->getIssueWidth()) {
151     ResourcesModel->clearResources();
152     Packet.clear();
153     TotalPackets++;
154     startNewCycle = true;
155   }
156 
157   switch (SU->getInstr()->getOpcode()) {
158   default:
159     ResourcesModel->reserveResources(*SU->getInstr());
160     break;
161   case TargetOpcode::EXTRACT_SUBREG:
162   case TargetOpcode::INSERT_SUBREG:
163   case TargetOpcode::SUBREG_TO_REG:
164   case TargetOpcode::REG_SEQUENCE:
165   case TargetOpcode::IMPLICIT_DEF:
166   case TargetOpcode::KILL:
167   case TargetOpcode::CFI_INSTRUCTION:
168   case TargetOpcode::EH_LABEL:
169   case TargetOpcode::COPY:
170   case TargetOpcode::INLINEASM:
171   case TargetOpcode::INLINEASM_BR:
172     break;
173   }
174   Packet.push_back(SU);
175 
176 #ifndef NDEBUG
177   LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
178   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
179     LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
180     LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
181     LLVM_DEBUG(Packet[i]->getInstr()->dump());
182   }
183 #endif
184 
185   return startNewCycle;
186 }
187 
188 /// schedule - Called back from MachineScheduler::runOnMachineFunction
189 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
190 /// only includes instructions that have DAG nodes, not scheduling boundaries.
191 void VLIWMachineScheduler::schedule() {
192   LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
193                     << printMBBReference(*BB) << " " << BB->getName()
194                     << " in_func " << BB->getParent()->getName()
195                     << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
196 
197   buildDAGWithRegPressure();
198 
199   Topo.InitDAGTopologicalSorting();
200 
201   // Postprocess the DAG to add platform-specific artificial dependencies.
202   postprocessDAG();
203 
204   SmallVector<SUnit*, 8> TopRoots, BotRoots;
205   findRootsAndBiasEdges(TopRoots, BotRoots);
206 
207   // Initialize the strategy before modifying the DAG.
208   SchedImpl->initialize(this);
209 
210   LLVM_DEBUG(unsigned maxH = 0;
211              for (unsigned su = 0, e = SUnits.size(); su != e;
212                   ++su) if (SUnits[su].getHeight() > maxH) maxH =
213                  SUnits[su].getHeight();
214              dbgs() << "Max Height " << maxH << "\n";);
215   LLVM_DEBUG(unsigned maxD = 0;
216              for (unsigned su = 0, e = SUnits.size(); su != e;
217                   ++su) if (SUnits[su].getDepth() > maxD) maxD =
218                  SUnits[su].getDepth();
219              dbgs() << "Max Depth " << maxD << "\n";);
220   LLVM_DEBUG(dump());
221 
222   initQueues(TopRoots, BotRoots);
223 
224   bool IsTopNode = false;
225   while (true) {
226     LLVM_DEBUG(
227         dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
228     SUnit *SU = SchedImpl->pickNode(IsTopNode);
229     if (!SU) break;
230 
231     if (!checkSchedLimit())
232       break;
233 
234     scheduleMI(SU, IsTopNode);
235 
236     // Notify the scheduling strategy after updating the DAG.
237     SchedImpl->schedNode(SU, IsTopNode);
238 
239     updateQueues(SU, IsTopNode);
240   }
241   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
242 
243   placeDebugValues();
244 
245   LLVM_DEBUG({
246     dbgs() << "*** Final schedule for "
247            << printMBBReference(*begin()->getParent()) << " ***\n";
248     dumpSchedule();
249     dbgs() << '\n';
250   });
251 }
252 
253 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
254   DAG = static_cast<VLIWMachineScheduler*>(dag);
255   SchedModel = DAG->getSchedModel();
256 
257   Top.init(DAG, SchedModel);
258   Bot.init(DAG, SchedModel);
259 
260   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
261   // are disabled, then these HazardRecs will be disabled.
262   const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
263   const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
264   const TargetInstrInfo *TII = STI.getInstrInfo();
265   delete Top.HazardRec;
266   delete Bot.HazardRec;
267   Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
268   Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
269 
270   delete Top.ResourceModel;
271   delete Bot.ResourceModel;
272   Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
273   Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
274 
275   const std::vector<unsigned> &MaxPressure =
276     DAG->getRegPressure().MaxSetPressure;
277   HighPressureSets.assign(MaxPressure.size(), 0);
278   for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
279     unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
280     HighPressureSets[i] =
281       ((float) MaxPressure[i] > ((float) Limit * RPThreshold));
282   }
283 
284   assert((!ForceTopDown || !ForceBottomUp) &&
285          "-misched-topdown incompatible with -misched-bottomup");
286 }
287 
288 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
289   for (const SDep &PI : SU->Preds) {
290     unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
291     unsigned MinLatency = PI.getLatency();
292 #ifndef NDEBUG
293     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
294 #endif
295     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
296       SU->TopReadyCycle = PredReadyCycle + MinLatency;
297   }
298 
299   if (!SU->isScheduled)
300     Top.releaseNode(SU, SU->TopReadyCycle);
301 }
302 
303 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
304   assert(SU->getInstr() && "Scheduled SUnit must have instr");
305 
306   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
307        I != E; ++I) {
308     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
309     unsigned MinLatency = I->getLatency();
310 #ifndef NDEBUG
311     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
312 #endif
313     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
314       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
315   }
316 
317   if (!SU->isScheduled)
318     Bot.releaseNode(SU, SU->BotReadyCycle);
319 }
320 
321 /// Does this SU have a hazard within the current instruction group.
322 ///
323 /// The scheduler supports two modes of hazard recognition. The first is the
324 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
325 /// supports highly complicated in-order reservation tables
326 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
327 ///
328 /// The second is a streamlined mechanism that checks for hazards based on
329 /// simple counters that the scheduler itself maintains. It explicitly checks
330 /// for instruction dispatch limitations, including the number of micro-ops that
331 /// can dispatch per cycle.
332 ///
333 /// TODO: Also check whether the SU must start a new group.
334 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
335   if (HazardRec->isEnabled())
336     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
337 
338   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
339   if (IssueCount + uops > SchedModel->getIssueWidth())
340     return true;
341 
342   return false;
343 }
344 
345 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU,
346                                                      unsigned ReadyCycle) {
347   if (ReadyCycle < MinReadyCycle)
348     MinReadyCycle = ReadyCycle;
349 
350   // Check for interlocks first. For the purpose of other heuristics, an
351   // instruction that cannot issue appears as if it's not in the ReadyQueue.
352   if (ReadyCycle > CurrCycle || checkHazard(SU))
353 
354     Pending.push(SU);
355   else
356     Available.push(SU);
357 }
358 
359 /// Move the boundary of scheduled code by one cycle.
360 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
361   unsigned Width = SchedModel->getIssueWidth();
362   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
363 
364   assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
365          "MinReadyCycle uninitialized");
366   unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
367 
368   if (!HazardRec->isEnabled()) {
369     // Bypass HazardRec virtual calls.
370     CurrCycle = NextCycle;
371   } else {
372     // Bypass getHazardType calls in case of long latency.
373     for (; CurrCycle != NextCycle; ++CurrCycle) {
374       if (isTop())
375         HazardRec->AdvanceCycle();
376       else
377         HazardRec->RecedeCycle();
378     }
379   }
380   CheckPending = true;
381 
382   LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
383                     << CurrCycle << '\n');
384 }
385 
386 /// Move the boundary of scheduled code by one SUnit.
387 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
388   bool startNewCycle = false;
389 
390   // Update the reservation table.
391   if (HazardRec->isEnabled()) {
392     if (!isTop() && SU->isCall) {
393       // Calls are scheduled with their preceding instructions. For bottom-up
394       // scheduling, clear the pipeline state before emitting.
395       HazardRec->Reset();
396     }
397     HazardRec->EmitInstruction(SU);
398   }
399 
400   // Update DFA model.
401   startNewCycle = ResourceModel->reserveResources(SU, isTop());
402 
403   // Check the instruction group dispatch limit.
404   // TODO: Check if this SU must end a dispatch group.
405   IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
406   if (startNewCycle) {
407     LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
408     bumpCycle();
409   }
410   else
411     LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
412                       << CurrCycle << '\n');
413 }
414 
415 /// Release pending ready nodes in to the available queue. This makes them
416 /// visible to heuristics.
417 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
418   // If the available queue is empty, it is safe to reset MinReadyCycle.
419   if (Available.empty())
420     MinReadyCycle = std::numeric_limits<unsigned>::max();
421 
422   // Check to see if any of the pending instructions are ready to issue.  If
423   // so, add them to the available queue.
424   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
425     SUnit *SU = *(Pending.begin()+i);
426     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
427 
428     if (ReadyCycle < MinReadyCycle)
429       MinReadyCycle = ReadyCycle;
430 
431     if (ReadyCycle > CurrCycle)
432       continue;
433 
434     if (checkHazard(SU))
435       continue;
436 
437     Available.push(SU);
438     Pending.remove(Pending.begin()+i);
439     --i; --e;
440   }
441   CheckPending = false;
442 }
443 
444 /// Remove SU from the ready set for this boundary.
445 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
446   if (Available.isInQueue(SU))
447     Available.remove(Available.find(SU));
448   else {
449     assert(Pending.isInQueue(SU) && "bad ready count");
450     Pending.remove(Pending.find(SU));
451   }
452 }
453 
454 /// If this queue only has one ready candidate, return it. As a side effect,
455 /// advance the cycle until at least one node is ready. If multiple instructions
456 /// are ready, return NULL.
457 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
458   if (CheckPending)
459     releasePending();
460 
461   auto AdvanceCycle = [this]() {
462     if (Available.empty())
463       return true;
464     if (Available.size() == 1 && Pending.size() > 0)
465       return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||
466         getWeakLeft(*Available.begin(), isTop()) != 0;
467     return false;
468   };
469   for (unsigned i = 0; AdvanceCycle(); ++i) {
470     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
471            "permanent hazard"); (void)i;
472     ResourceModel->reserveResources(nullptr, isTop());
473     bumpCycle();
474     releasePending();
475   }
476   if (Available.size() == 1)
477     return *Available.begin();
478   return nullptr;
479 }
480 
481 #ifndef NDEBUG
482 void ConvergingVLIWScheduler::traceCandidate(const char *Label,
483       const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) {
484   dbgs() << Label << " " << Q.getName() << " ";
485   if (P.isValid())
486     dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
487            << P.getUnitInc() << " ";
488   else
489     dbgs() << "     ";
490   dbgs() << "cost(" << Cost << ")\t";
491   DAG->dumpNode(*SU);
492 }
493 
494 // Very detailed queue dump, to be used with higher verbosity levels.
495 void ConvergingVLIWScheduler::readyQueueVerboseDump(
496       const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
497       ReadyQueue &Q) {
498   RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
499 
500   dbgs() << ">>> " << Q.getName() << "\n";
501   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
502     RegPressureDelta RPDelta;
503     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
504                                     DAG->getRegionCriticalPSets(),
505                                     DAG->getRegPressure().MaxSetPressure);
506     std::stringstream dbgstr;
507     dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
508     dbgs() << dbgstr.str();
509     SchedulingCost(Q, *I, Candidate, RPDelta, true);
510     dbgs() << "\t";
511     (*I)->getInstr()->dump();
512   }
513   dbgs() << "\n";
514 }
515 #endif
516 
517 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
518 /// of SU, return true (we may have duplicates)
519 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
520   if (SU->NumPredsLeft == 0)
521     return false;
522 
523   for (auto &Pred : SU->Preds) {
524     // We found an available, but not scheduled, predecessor.
525     if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
526       return false;
527   }
528 
529   return true;
530 }
531 
532 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
533 /// of SU, return true (we may have duplicates)
534 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
535   if (SU->NumSuccsLeft == 0)
536     return false;
537 
538   for (auto &Succ : SU->Succs) {
539     // We found an available, but not scheduled, successor.
540     if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
541       return false;
542   }
543   return true;
544 }
545 
546 /// Check if the instruction changes the register pressure of a register in the
547 /// high pressure set. The function returns a negative value if the pressure
548 /// decreases and a positive value is the pressure increases. If the instruction
549 /// doesn't use a high pressure register or doesn't change the register
550 /// pressure, then return 0.
551 int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
552   PressureDiff &PD = DAG->getPressureDiff(SU);
553   for (auto &P : PD) {
554     if (!P.isValid())
555       continue;
556     // The pressure differences are computed bottom-up, so the comparision for
557     // an increase is positive in the bottom direction, but negative in the
558     //  top-down direction.
559     if (HighPressureSets[P.getPSet()])
560       return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
561   }
562   return 0;
563 }
564 
565 // Constants used to denote relative importance of
566 // heuristic components for cost computation.
567 static const unsigned PriorityOne = 200;
568 static const unsigned PriorityTwo = 50;
569 static const unsigned PriorityThree = 75;
570 static const unsigned ScaleTwo = 10;
571 
572 /// Single point to compute overall scheduling cost.
573 /// TODO: More heuristics will be used soon.
574 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
575                                             SchedCandidate &Candidate,
576                                             RegPressureDelta &Delta,
577                                             bool verbose) {
578   // Initial trivial priority.
579   int ResCount = 1;
580 
581   // Do not waste time on a node that is already scheduled.
582   if (!SU || SU->isScheduled)
583     return ResCount;
584 
585   LLVM_DEBUG(if (verbose) dbgs()
586              << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
587   // Forced priority is high.
588   if (SU->isScheduleHigh) {
589     ResCount += PriorityOne;
590     LLVM_DEBUG(dbgs() << "H|");
591   }
592 
593   unsigned IsAvailableAmt = 0;
594   // Critical path first.
595   if (Q.getID() == TopQID) {
596     if (Top.isLatencyBound(SU)) {
597       LLVM_DEBUG(if (verbose) dbgs() << "LB|");
598       ResCount += (SU->getHeight() * ScaleTwo);
599     }
600 
601     LLVM_DEBUG(if (verbose) {
602       std::stringstream dbgstr;
603       dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
604       dbgs() << dbgstr.str();
605     });
606 
607     // If resources are available for it, multiply the
608     // chance of scheduling.
609     if (Top.ResourceModel->isResourceAvailable(SU, true)) {
610       IsAvailableAmt = (PriorityTwo + PriorityThree);
611       ResCount += IsAvailableAmt;
612       LLVM_DEBUG(if (verbose) dbgs() << "A|");
613     } else
614       LLVM_DEBUG(if (verbose) dbgs() << " |");
615   } else {
616     if (Bot.isLatencyBound(SU)) {
617       LLVM_DEBUG(if (verbose) dbgs() << "LB|");
618       ResCount += (SU->getDepth() * ScaleTwo);
619     }
620 
621     LLVM_DEBUG(if (verbose) {
622       std::stringstream dbgstr;
623       dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
624       dbgs() << dbgstr.str();
625     });
626 
627     // If resources are available for it, multiply the
628     // chance of scheduling.
629     if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
630       IsAvailableAmt = (PriorityTwo + PriorityThree);
631       ResCount += IsAvailableAmt;
632       LLVM_DEBUG(if (verbose) dbgs() << "A|");
633     } else
634       LLVM_DEBUG(if (verbose) dbgs() << " |");
635   }
636 
637   unsigned NumNodesBlocking = 0;
638   if (Q.getID() == TopQID) {
639     // How many SUs does it block from scheduling?
640     // Look at all of the successors of this node.
641     // Count the number of nodes that
642     // this node is the sole unscheduled node for.
643     if (Top.isLatencyBound(SU))
644       for (const SDep &SI : SU->Succs)
645         if (isSingleUnscheduledPred(SI.getSUnit(), SU))
646           ++NumNodesBlocking;
647   } else {
648     // How many unscheduled predecessors block this node?
649     if (Bot.isLatencyBound(SU))
650       for (const SDep &PI : SU->Preds)
651         if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
652           ++NumNodesBlocking;
653   }
654   ResCount += (NumNodesBlocking * ScaleTwo);
655 
656   LLVM_DEBUG(if (verbose) {
657     std::stringstream dbgstr;
658     dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
659     dbgs() << dbgstr.str();
660   });
661 
662   // Factor in reg pressure as a heuristic.
663   if (!IgnoreBBRegPressure) {
664     // Decrease priority by the amount that register pressure exceeds the limit.
665     ResCount -= (Delta.Excess.getUnitInc()*PriorityOne);
666     // Decrease priority if register pressure exceeds the limit.
667     ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne);
668     // Decrease priority slightly if register pressure would increase over the
669     // current maximum.
670     ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo);
671     // If there are register pressure issues, then we remove the value added for
672     // the instruction being available. The rationale is that we really don't
673     // want to schedule an instruction that causes a spill.
674     if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&
675         (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
676          Delta.CurrentMax.getUnitInc()))
677       ResCount -= IsAvailableAmt;
678     LLVM_DEBUG(if (verbose) {
679       dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
680              << Delta.CriticalMax.getUnitInc() << "/"
681              << Delta.CurrentMax.getUnitInc() << ")|";
682     });
683   }
684 
685   // Give a little extra priority to a .cur instruction if there is a resource
686   // available for it.
687   auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
688   auto &QII = *QST.getInstrInfo();
689   if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) {
690     if (Q.getID() == TopQID &&
691         Top.ResourceModel->isResourceAvailable(SU, true)) {
692       ResCount += PriorityTwo;
693       LLVM_DEBUG(if (verbose) dbgs() << "C|");
694     } else if (Q.getID() == BotQID &&
695                Bot.ResourceModel->isResourceAvailable(SU, false)) {
696       ResCount += PriorityTwo;
697       LLVM_DEBUG(if (verbose) dbgs() << "C|");
698     }
699   }
700 
701   // Give preference to a zero latency instruction if the dependent
702   // instruction is in the current packet.
703   if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
704     for (const SDep &PI : SU->Preds) {
705       if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
706           PI.getLatency() == 0 &&
707           Top.ResourceModel->isInPacket(PI.getSUnit())) {
708         ResCount += PriorityThree;
709         LLVM_DEBUG(if (verbose) dbgs() << "Z|");
710       }
711     }
712   } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
713     for (const SDep &SI : SU->Succs) {
714       if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
715           SI.getLatency() == 0 &&
716           Bot.ResourceModel->isInPacket(SI.getSUnit())) {
717         ResCount += PriorityThree;
718         LLVM_DEBUG(if (verbose) dbgs() << "Z|");
719       }
720     }
721   }
722 
723   // If the instruction has a non-zero latency dependence with an instruction in
724   // the current packet, then it should not be scheduled yet. The case occurs
725   // when the dependent instruction is scheduled in a new packet, so the
726   // scheduler updates the current cycle and pending instructions become
727   // available.
728   if (CheckEarlyAvail) {
729     if (Q.getID() == TopQID) {
730       for (const auto &PI : SU->Preds) {
731         if (PI.getLatency() > 0 &&
732             Top.ResourceModel->isInPacket(PI.getSUnit())) {
733           ResCount -= PriorityOne;
734           LLVM_DEBUG(if (verbose) dbgs() << "D|");
735         }
736       }
737     } else {
738       for (const auto &SI : SU->Succs) {
739         if (SI.getLatency() > 0 &&
740             Bot.ResourceModel->isInPacket(SI.getSUnit())) {
741           ResCount -= PriorityOne;
742           LLVM_DEBUG(if (verbose) dbgs() << "D|");
743         }
744       }
745     }
746   }
747 
748   LLVM_DEBUG(if (verbose) {
749     std::stringstream dbgstr;
750     dbgstr << "Total " << std::setw(4) << ResCount << ")";
751     dbgs() << dbgstr.str();
752   });
753 
754   return ResCount;
755 }
756 
757 /// Pick the best candidate from the top queue.
758 ///
759 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
760 /// DAG building. To adjust for the current scheduling location we need to
761 /// maintain the number of vreg uses remaining to be top-scheduled.
762 ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
763 pickNodeFromQueue(VLIWSchedBoundary &Zone, const RegPressureTracker &RPTracker,
764                   SchedCandidate &Candidate) {
765   ReadyQueue &Q = Zone.Available;
766   LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
767                  readyQueueVerboseDump(RPTracker, Candidate, Q);
768              else Q.dump(););
769 
770   // getMaxPressureDelta temporarily modifies the tracker.
771   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
772 
773   // BestSU remains NULL if no top candidates beat the best existing candidate.
774   CandResult FoundCandidate = NoCand;
775   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
776     RegPressureDelta RPDelta;
777     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
778                                     DAG->getRegionCriticalPSets(),
779                                     DAG->getRegPressure().MaxSetPressure);
780 
781     int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
782 
783     // Initialize the candidate if needed.
784     if (!Candidate.SU) {
785       LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
786       Candidate.SU = *I;
787       Candidate.RPDelta = RPDelta;
788       Candidate.SCost = CurrentCost;
789       FoundCandidate = NodeOrder;
790       continue;
791     }
792 
793     // Choose node order for negative cost candidates. There is no good
794     // candidate in this case.
795     if (CurrentCost < 0 && Candidate.SCost < 0) {
796       if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum)
797           || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
798         LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
799         Candidate.SU = *I;
800         Candidate.RPDelta = RPDelta;
801         Candidate.SCost = CurrentCost;
802         FoundCandidate = NodeOrder;
803       }
804       continue;
805     }
806 
807     // Best cost.
808     if (CurrentCost > Candidate.SCost) {
809       LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
810       Candidate.SU = *I;
811       Candidate.RPDelta = RPDelta;
812       Candidate.SCost = CurrentCost;
813       FoundCandidate = BestCost;
814       continue;
815     }
816 
817     // Choose an instruction that does not depend on an artificial edge.
818     unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
819     unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
820     if (CurrWeak != CandWeak) {
821       if (CurrWeak < CandWeak) {
822         LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
823         Candidate.SU = *I;
824         Candidate.RPDelta = RPDelta;
825         Candidate.SCost = CurrentCost;
826         FoundCandidate = Weak;
827       }
828       continue;
829     }
830 
831     if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
832       unsigned CurrSize, CandSize;
833       if (Q.getID() == TopQID) {
834         CurrSize = (*I)->Succs.size();
835         CandSize = Candidate.SU->Succs.size();
836       } else {
837         CurrSize = (*I)->Preds.size();
838         CandSize = Candidate.SU->Preds.size();
839       }
840       if (CurrSize > CandSize) {
841         LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
842         Candidate.SU = *I;
843         Candidate.RPDelta = RPDelta;
844         Candidate.SCost = CurrentCost;
845         FoundCandidate = BestCost;
846       }
847       // Keep the old candidate if it's a better candidate. That is, don't use
848       // the subsequent tie breaker.
849       if (CurrSize != CandSize)
850         continue;
851     }
852 
853     // Tie breaker.
854     // To avoid scheduling indeterminism, we need a tie breaker
855     // for the case when cost is identical for two nodes.
856     if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
857       if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum)
858           || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
859         LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
860         Candidate.SU = *I;
861         Candidate.RPDelta = RPDelta;
862         Candidate.SCost = CurrentCost;
863         FoundCandidate = NodeOrder;
864         continue;
865       }
866     }
867 
868     // Fall through to original instruction order.
869     // Only consider node order if Candidate was chosen from this Q.
870     if (FoundCandidate == NoCand)
871       continue;
872   }
873   return FoundCandidate;
874 }
875 
876 /// Pick the best candidate node from either the top or bottom queue.
877 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
878   // Schedule as far as possible in the direction of no choice. This is most
879   // efficient, but also provides the best heuristics for CriticalPSets.
880   if (SUnit *SU = Bot.pickOnlyChoice()) {
881     LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
882     IsTopNode = false;
883     return SU;
884   }
885   if (SUnit *SU = Top.pickOnlyChoice()) {
886     LLVM_DEBUG(dbgs() << "Picked only Top\n");
887     IsTopNode = true;
888     return SU;
889   }
890   SchedCandidate BotCand;
891   // Prefer bottom scheduling when heuristics are silent.
892   CandResult BotResult = pickNodeFromQueue(Bot,
893                                            DAG->getBotRPTracker(), BotCand);
894   assert(BotResult != NoCand && "failed to find the first candidate");
895 
896   // If either Q has a single candidate that provides the least increase in
897   // Excess pressure, we can immediately schedule from that Q.
898   //
899   // RegionCriticalPSets summarizes the pressure within the scheduled region and
900   // affects picking from either Q. If scheduling in one direction must
901   // increase pressure for one of the excess PSets, then schedule in that
902   // direction first to provide more freedom in the other direction.
903   if (BotResult == SingleExcess || BotResult == SingleCritical) {
904     LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
905     IsTopNode = false;
906     return BotCand.SU;
907   }
908   // Check if the top Q has a better candidate.
909   SchedCandidate TopCand;
910   CandResult TopResult = pickNodeFromQueue(Top,
911                                            DAG->getTopRPTracker(), TopCand);
912   assert(TopResult != NoCand && "failed to find the first candidate");
913 
914   if (TopResult == SingleExcess || TopResult == SingleCritical) {
915     LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
916     IsTopNode = true;
917     return TopCand.SU;
918   }
919   // If either Q has a single candidate that minimizes pressure above the
920   // original region's pressure pick it.
921   if (BotResult == SingleMax) {
922     LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
923     IsTopNode = false;
924     return BotCand.SU;
925   }
926   if (TopResult == SingleMax) {
927     LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
928     IsTopNode = true;
929     return TopCand.SU;
930   }
931   if (TopCand.SCost > BotCand.SCost) {
932     LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
933     IsTopNode = true;
934     return TopCand.SU;
935   }
936   // Otherwise prefer the bottom candidate in node order.
937   LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
938   IsTopNode = false;
939   return BotCand.SU;
940 }
941 
942 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
943 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
944   if (DAG->top() == DAG->bottom()) {
945     assert(Top.Available.empty() && Top.Pending.empty() &&
946            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
947     return nullptr;
948   }
949   SUnit *SU;
950   if (ForceTopDown) {
951     SU = Top.pickOnlyChoice();
952     if (!SU) {
953       SchedCandidate TopCand;
954       CandResult TopResult =
955         pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
956       assert(TopResult != NoCand && "failed to find the first candidate");
957       (void)TopResult;
958       SU = TopCand.SU;
959     }
960     IsTopNode = true;
961   } else if (ForceBottomUp) {
962     SU = Bot.pickOnlyChoice();
963     if (!SU) {
964       SchedCandidate BotCand;
965       CandResult BotResult =
966         pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
967       assert(BotResult != NoCand && "failed to find the first candidate");
968       (void)BotResult;
969       SU = BotCand.SU;
970     }
971     IsTopNode = false;
972   } else {
973     SU = pickNodeBidrectional(IsTopNode);
974   }
975   if (SU->isTopReady())
976     Top.removeReady(SU);
977   if (SU->isBottomReady())
978     Bot.removeReady(SU);
979 
980   LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
981                     << " Scheduling instruction in cycle "
982                     << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
983                     << reportPackets() << ")\n";
984              DAG->dumpNode(*SU));
985   return SU;
986 }
987 
988 /// Update the scheduler's state after scheduling a node. This is the same node
989 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
990 /// to update it's state based on the current cycle before MachineSchedStrategy
991 /// does.
992 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
993   if (IsTopNode) {
994     Top.bumpNode(SU);
995     SU->TopReadyCycle = Top.CurrCycle;
996   } else {
997     Bot.bumpNode(SU);
998     SU->BotReadyCycle = Bot.CurrCycle;
999   }
1000 }
1001