1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ResourcePriorityQueue class, which is a
10 // SchedulingPriorityQueue that prioritizes instructions using DFA state to
11 // reduce the length of the critical path through the basic block
12 // on VLIW platforms.
13 // The scheduler is basically a top-down adaptable list scheduler with DFA
14 // resource tracking added to the cost function.
15 // DFA is queried as a state machine to model "packets/bundles" during
16 // schedule. Currently packets/bundles are discarded at the end of
17 // scheduling, affecting only order of instructions.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #include "llvm/CodeGen/ResourcePriorityQueue.h"
22 #include "llvm/CodeGen/MachineInstr.h"
23 #include "llvm/CodeGen/SelectionDAGNodes.h"
24 #include "llvm/CodeGen/TargetLowering.h"
25 #include "llvm/CodeGen/TargetSubtargetInfo.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Target/TargetMachine.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "scheduler"
34 
35 static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
36   cl::ZeroOrMore, cl::init(false),
37   cl::desc("Disable use of DFA during scheduling"));
38 
39 static cl::opt<int> RegPressureThreshold(
40   "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
41   cl::desc("Track reg pressure and switch priority to in-depth"));
42 
43 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
44     : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
45   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
46   TRI = STI.getRegisterInfo();
47   TLI = IS->TLI;
48   TII = STI.getInstrInfo();
49   ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
50   // This hard requirement could be relaxed, but for now
51   // do not let it proceed.
52   assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
53 
54   unsigned NumRC = TRI->getNumRegClasses();
55   RegLimit.resize(NumRC);
56   RegPressure.resize(NumRC);
57   std::fill(RegLimit.begin(), RegLimit.end(), 0);
58   std::fill(RegPressure.begin(), RegPressure.end(), 0);
59   for (const TargetRegisterClass *RC : TRI->regclasses())
60     RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
61 
62   ParallelLiveRanges = 0;
63   HorizontalVerticalBalance = 0;
64 }
65 
66 unsigned
67 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
68   unsigned NumberDeps = 0;
69   for (SDep &Pred : SU->Preds) {
70     if (Pred.isCtrl())
71       continue;
72 
73     SUnit *PredSU = Pred.getSUnit();
74     const SDNode *ScegN = PredSU->getNode();
75 
76     if (!ScegN)
77       continue;
78 
79     // If value is passed to CopyToReg, it is probably
80     // live outside BB.
81     switch (ScegN->getOpcode()) {
82       default:  break;
83       case ISD::TokenFactor:    break;
84       case ISD::CopyFromReg:    NumberDeps++;  break;
85       case ISD::CopyToReg:      break;
86       case ISD::INLINEASM:      break;
87     }
88     if (!ScegN->isMachineOpcode())
89       continue;
90 
91     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
92       MVT VT = ScegN->getSimpleValueType(i);
93       if (TLI->isTypeLegal(VT)
94           && (TLI->getRegClassFor(VT)->getID() == RCId)) {
95         NumberDeps++;
96         break;
97       }
98     }
99   }
100   return NumberDeps;
101 }
102 
103 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
104                                                     unsigned RCId) {
105   unsigned NumberDeps = 0;
106   for (const SDep &Succ : SU->Succs) {
107     if (Succ.isCtrl())
108       continue;
109 
110     SUnit *SuccSU = Succ.getSUnit();
111     const SDNode *ScegN = SuccSU->getNode();
112     if (!ScegN)
113       continue;
114 
115     // If value is passed to CopyToReg, it is probably
116     // live outside BB.
117     switch (ScegN->getOpcode()) {
118       default:  break;
119       case ISD::TokenFactor:    break;
120       case ISD::CopyFromReg:    break;
121       case ISD::CopyToReg:      NumberDeps++;  break;
122       case ISD::INLINEASM:      break;
123     }
124     if (!ScegN->isMachineOpcode())
125       continue;
126 
127     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
128       const SDValue &Op = ScegN->getOperand(i);
129       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
130       if (TLI->isTypeLegal(VT)
131           && (TLI->getRegClassFor(VT)->getID() == RCId)) {
132         NumberDeps++;
133         break;
134       }
135     }
136   }
137   return NumberDeps;
138 }
139 
140 static unsigned numberCtrlDepsInSU(SUnit *SU) {
141   unsigned NumberDeps = 0;
142   for (const SDep &Succ : SU->Succs)
143     if (Succ.isCtrl())
144       NumberDeps++;
145 
146   return NumberDeps;
147 }
148 
149 static unsigned numberCtrlPredInSU(SUnit *SU) {
150   unsigned NumberDeps = 0;
151   for (SDep &Pred : SU->Preds)
152     if (Pred.isCtrl())
153       NumberDeps++;
154 
155   return NumberDeps;
156 }
157 
158 ///
159 /// Initialize nodes.
160 ///
161 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
162   SUnits = &sunits;
163   NumNodesSolelyBlocking.resize(SUnits->size(), 0);
164 
165   for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
166     SUnit *SU = &(*SUnits)[i];
167     initNumRegDefsLeft(SU);
168     SU->NodeQueueId = 0;
169   }
170 }
171 
172 /// This heuristic is used if DFA scheduling is not desired
173 /// for some VLIW platform.
174 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
175   // The isScheduleHigh flag allows nodes with wraparound dependencies that
176   // cannot easily be modeled as edges with latencies to be scheduled as
177   // soon as possible in a top-down schedule.
178   if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
179     return false;
180 
181   if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
182     return true;
183 
184   unsigned LHSNum = LHS->NodeNum;
185   unsigned RHSNum = RHS->NodeNum;
186 
187   // The most important heuristic is scheduling the critical path.
188   unsigned LHSLatency = PQ->getLatency(LHSNum);
189   unsigned RHSLatency = PQ->getLatency(RHSNum);
190   if (LHSLatency < RHSLatency) return true;
191   if (LHSLatency > RHSLatency) return false;
192 
193   // After that, if two nodes have identical latencies, look to see if one will
194   // unblock more other nodes than the other.
195   unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
196   unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
197   if (LHSBlocked < RHSBlocked) return true;
198   if (LHSBlocked > RHSBlocked) return false;
199 
200   // Finally, just to provide a stable ordering, use the node number as a
201   // deciding factor.
202   return LHSNum < RHSNum;
203 }
204 
205 
206 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
207 /// of SU, return it, otherwise return null.
208 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
209   SUnit *OnlyAvailablePred = nullptr;
210   for (const SDep &Pred : SU->Preds) {
211     SUnit &PredSU = *Pred.getSUnit();
212     if (!PredSU.isScheduled) {
213       // We found an available, but not scheduled, predecessor.  If it's the
214       // only one we have found, keep track of it... otherwise give up.
215       if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
216         return nullptr;
217       OnlyAvailablePred = &PredSU;
218     }
219   }
220   return OnlyAvailablePred;
221 }
222 
223 void ResourcePriorityQueue::push(SUnit *SU) {
224   // Look at all of the successors of this node.  Count the number of nodes that
225   // this node is the sole unscheduled node for.
226   unsigned NumNodesBlocking = 0;
227   for (const SDep &Succ : SU->Succs)
228     if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
229       ++NumNodesBlocking;
230 
231   NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
232   Queue.push_back(SU);
233 }
234 
235 /// Check if scheduling of this SU is possible
236 /// in the current packet.
237 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
238   if (!SU || !SU->getNode())
239     return false;
240 
241   // If this is a compound instruction,
242   // it is likely to be a call. Do not delay it.
243   if (SU->getNode()->getGluedNode())
244     return true;
245 
246   // First see if the pipeline could receive this instruction
247   // in the current cycle.
248   if (SU->getNode()->isMachineOpcode())
249     switch (SU->getNode()->getMachineOpcode()) {
250     default:
251       if (!ResourcesModel->canReserveResources(&TII->get(
252           SU->getNode()->getMachineOpcode())))
253            return false;
254       break;
255     case TargetOpcode::EXTRACT_SUBREG:
256     case TargetOpcode::INSERT_SUBREG:
257     case TargetOpcode::SUBREG_TO_REG:
258     case TargetOpcode::REG_SEQUENCE:
259     case TargetOpcode::IMPLICIT_DEF:
260         break;
261     }
262 
263   // Now see if there are no other dependencies
264   // to instructions already in the packet.
265   for (unsigned i = 0, e = Packet.size(); i != e; ++i)
266     for (const SDep &Succ : Packet[i]->Succs) {
267       // Since we do not add pseudos to packets, might as well
268       // ignore order deps.
269       if (Succ.isCtrl())
270         continue;
271 
272       if (Succ.getSUnit() == SU)
273         return false;
274     }
275 
276   return true;
277 }
278 
279 /// Keep track of available resources.
280 void ResourcePriorityQueue::reserveResources(SUnit *SU) {
281   // If this SU does not fit in the packet
282   // start a new one.
283   if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
284     ResourcesModel->clearResources();
285     Packet.clear();
286   }
287 
288   if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
289     switch (SU->getNode()->getMachineOpcode()) {
290     default:
291       ResourcesModel->reserveResources(&TII->get(
292         SU->getNode()->getMachineOpcode()));
293       break;
294     case TargetOpcode::EXTRACT_SUBREG:
295     case TargetOpcode::INSERT_SUBREG:
296     case TargetOpcode::SUBREG_TO_REG:
297     case TargetOpcode::REG_SEQUENCE:
298     case TargetOpcode::IMPLICIT_DEF:
299       break;
300     }
301     Packet.push_back(SU);
302   }
303   // Forcefully end packet for PseudoOps.
304   else {
305     ResourcesModel->clearResources();
306     Packet.clear();
307   }
308 
309   // If packet is now full, reset the state so in the next cycle
310   // we start fresh.
311   if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
312     ResourcesModel->clearResources();
313     Packet.clear();
314   }
315 }
316 
317 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
318   int RegBalance = 0;
319 
320   if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
321     return RegBalance;
322 
323   // Gen estimate.
324   for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
325       MVT VT = SU->getNode()->getSimpleValueType(i);
326       if (TLI->isTypeLegal(VT)
327           && TLI->getRegClassFor(VT)
328           && TLI->getRegClassFor(VT)->getID() == RCId)
329         RegBalance += numberRCValSuccInSU(SU, RCId);
330   }
331   // Kill estimate.
332   for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
333       const SDValue &Op = SU->getNode()->getOperand(i);
334       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
335       if (isa<ConstantSDNode>(Op.getNode()))
336         continue;
337 
338       if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
339           && TLI->getRegClassFor(VT)->getID() == RCId)
340         RegBalance -= numberRCValPredInSU(SU, RCId);
341   }
342   return RegBalance;
343 }
344 
345 /// Estimates change in reg pressure from this SU.
346 /// It is achieved by trivial tracking of defined
347 /// and used vregs in dependent instructions.
348 /// The RawPressure flag makes this function to ignore
349 /// existing reg file sizes, and report raw def/use
350 /// balance.
351 int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
352   int RegBalance = 0;
353 
354   if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
355     return RegBalance;
356 
357   if (RawPressure) {
358     for (const TargetRegisterClass *RC : TRI->regclasses())
359       RegBalance += rawRegPressureDelta(SU, RC->getID());
360   }
361   else {
362     for (const TargetRegisterClass *RC : TRI->regclasses()) {
363       if ((RegPressure[RC->getID()] +
364            rawRegPressureDelta(SU, RC->getID()) > 0) &&
365           (RegPressure[RC->getID()] +
366            rawRegPressureDelta(SU, RC->getID())  >= RegLimit[RC->getID()]))
367         RegBalance += rawRegPressureDelta(SU, RC->getID());
368     }
369   }
370 
371   return RegBalance;
372 }
373 
374 // Constants used to denote relative importance of
375 // heuristic components for cost computation.
376 static const unsigned PriorityOne = 200;
377 static const unsigned PriorityTwo = 50;
378 static const unsigned PriorityThree = 15;
379 static const unsigned PriorityFour = 5;
380 static const unsigned ScaleOne = 20;
381 static const unsigned ScaleTwo = 10;
382 static const unsigned ScaleThree = 5;
383 static const unsigned FactorOne = 2;
384 
385 /// Returns single number reflecting benefit of scheduling SU
386 /// in the current cycle.
387 int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
388   // Initial trivial priority.
389   int ResCount = 1;
390 
391   // Do not waste time on a node that is already scheduled.
392   if (SU->isScheduled)
393     return ResCount;
394 
395   // Forced priority is high.
396   if (SU->isScheduleHigh)
397     ResCount += PriorityOne;
398 
399   // Adaptable scheduling
400   // A small, but very parallel
401   // region, where reg pressure is an issue.
402   if (HorizontalVerticalBalance > RegPressureThreshold) {
403     // Critical path first
404     ResCount += (SU->getHeight() * ScaleTwo);
405     // If resources are available for it, multiply the
406     // chance of scheduling.
407     if (isResourceAvailable(SU))
408       ResCount <<= FactorOne;
409 
410     // Consider change to reg pressure from scheduling
411     // this SU.
412     ResCount -= (regPressureDelta(SU,true) * ScaleOne);
413   }
414   // Default heuristic, greeady and
415   // critical path driven.
416   else {
417     // Critical path first.
418     ResCount += (SU->getHeight() * ScaleTwo);
419     // Now see how many instructions is blocked by this SU.
420     ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
421     // If resources are available for it, multiply the
422     // chance of scheduling.
423     if (isResourceAvailable(SU))
424       ResCount <<= FactorOne;
425 
426     ResCount -= (regPressureDelta(SU) * ScaleTwo);
427   }
428 
429   // These are platform-specific things.
430   // Will need to go into the back end
431   // and accessed from here via a hook.
432   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
433     if (N->isMachineOpcode()) {
434       const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
435       if (TID.isCall())
436         ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
437     }
438     else
439       switch (N->getOpcode()) {
440       default:  break;
441       case ISD::TokenFactor:
442       case ISD::CopyFromReg:
443       case ISD::CopyToReg:
444         ResCount += PriorityFour;
445         break;
446 
447       case ISD::INLINEASM:
448         ResCount += PriorityThree;
449         break;
450       }
451   }
452   return ResCount;
453 }
454 
455 
456 /// Main resource tracking point.
457 void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
458   // Use NULL entry as an event marker to reset
459   // the DFA state.
460   if (!SU) {
461     ResourcesModel->clearResources();
462     Packet.clear();
463     return;
464   }
465 
466   const SDNode *ScegN = SU->getNode();
467   // Update reg pressure tracking.
468   // First update current node.
469   if (ScegN->isMachineOpcode()) {
470     // Estimate generated regs.
471     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
472       MVT VT = ScegN->getSimpleValueType(i);
473 
474       if (TLI->isTypeLegal(VT)) {
475         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
476         if (RC)
477           RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
478       }
479     }
480     // Estimate killed regs.
481     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
482       const SDValue &Op = ScegN->getOperand(i);
483       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
484 
485       if (TLI->isTypeLegal(VT)) {
486         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
487         if (RC) {
488           if (RegPressure[RC->getID()] >
489             (numberRCValPredInSU(SU, RC->getID())))
490             RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
491           else RegPressure[RC->getID()] = 0;
492         }
493       }
494     }
495     for (SDep &Pred : SU->Preds) {
496       if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
497         continue;
498       --Pred.getSUnit()->NumRegDefsLeft;
499     }
500   }
501 
502   // Reserve resources for this SU.
503   reserveResources(SU);
504 
505   // Adjust number of parallel live ranges.
506   // Heuristic is simple - node with no data successors reduces
507   // number of live ranges. All others, increase it.
508   unsigned NumberNonControlDeps = 0;
509 
510   for (const SDep &Succ : SU->Succs) {
511     adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
512     if (!Succ.isCtrl())
513       NumberNonControlDeps++;
514   }
515 
516   if (!NumberNonControlDeps) {
517     if (ParallelLiveRanges >= SU->NumPreds)
518       ParallelLiveRanges -= SU->NumPreds;
519     else
520       ParallelLiveRanges = 0;
521 
522   }
523   else
524     ParallelLiveRanges += SU->NumRegDefsLeft;
525 
526   // Track parallel live chains.
527   HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
528   HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
529 }
530 
531 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
532   unsigned  NodeNumDefs = 0;
533   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
534     if (N->isMachineOpcode()) {
535       const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
536       // No register need be allocated for this.
537       if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
538         NodeNumDefs = 0;
539         break;
540       }
541       NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
542     }
543     else
544       switch(N->getOpcode()) {
545         default:     break;
546         case ISD::CopyFromReg:
547           NodeNumDefs++;
548           break;
549         case ISD::INLINEASM:
550           NodeNumDefs++;
551           break;
552       }
553 
554   SU->NumRegDefsLeft = NodeNumDefs;
555 }
556 
557 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
558 /// scheduled.  If SU is not itself available, then there is at least one
559 /// predecessor node that has not been scheduled yet.  If SU has exactly ONE
560 /// unscheduled predecessor, we want to increase its priority: it getting
561 /// scheduled will make this node available, so it is better than some other
562 /// node of the same priority that will not make a node available.
563 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
564   if (SU->isAvailable) return;  // All preds scheduled.
565 
566   SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
567   if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
568     return;
569 
570   // Okay, we found a single predecessor that is available, but not scheduled.
571   // Since it is available, it must be in the priority queue.  First remove it.
572   remove(OnlyAvailablePred);
573 
574   // Reinsert the node into the priority queue, which recomputes its
575   // NumNodesSolelyBlocking value.
576   push(OnlyAvailablePred);
577 }
578 
579 
580 /// Main access point - returns next instructions
581 /// to be placed in scheduling sequence.
582 SUnit *ResourcePriorityQueue::pop() {
583   if (empty())
584     return nullptr;
585 
586   std::vector<SUnit *>::iterator Best = Queue.begin();
587   if (!DisableDFASched) {
588     int BestCost = SUSchedulingCost(*Best);
589     for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {
590 
591       if (SUSchedulingCost(*I) > BestCost) {
592         BestCost = SUSchedulingCost(*I);
593         Best = I;
594       }
595     }
596   }
597   // Use default TD scheduling mechanism.
598   else {
599     for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
600       if (Picker(*Best, *I))
601         Best = I;
602   }
603 
604   SUnit *V = *Best;
605   if (Best != std::prev(Queue.end()))
606     std::swap(*Best, Queue.back());
607 
608   Queue.pop_back();
609 
610   return V;
611 }
612 
613 
614 void ResourcePriorityQueue::remove(SUnit *SU) {
615   assert(!Queue.empty() && "Queue is empty!");
616   std::vector<SUnit *>::iterator I = find(Queue, SU);
617   if (I != std::prev(Queue.end()))
618     std::swap(*I, Queue.back());
619 
620   Queue.pop_back();
621 }
622