1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the ResourcePriorityQueue class, which is a
11 // SchedulingPriorityQueue that prioritizes instructions using DFA state to
12 // reduce the length of the critical path through the basic block
13 // on VLIW platforms.
14 // The scheduler is basically a top-down adaptable list scheduler with DFA
15 // resource tracking added to the cost function.
16 // DFA is queried as a state machine to model "packets/bundles" during
17 // schedule. Currently packets/bundles are discarded at the end of
18 // scheduling, affecting only order of instructions.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/CodeGen/ResourcePriorityQueue.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/SelectionDAGNodes.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include "llvm/Target/TargetLowering.h"
29 #include "llvm/Target/TargetMachine.h"
30 #include "llvm/Target/TargetSubtargetInfo.h"
31 
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "scheduler"
35 
36 static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
37   cl::ZeroOrMore, cl::init(false),
38   cl::desc("Disable use of DFA during scheduling"));
39 
40 static cl::opt<int> RegPressureThreshold(
41   "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
42   cl::desc("Track reg pressure and switch priority to in-depth"));
43 
44 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
45     : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
46   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
47   TRI = STI.getRegisterInfo();
48   TLI = IS->TLI;
49   TII = STI.getInstrInfo();
50   ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
51   // This hard requirement could be relaxed, but for now
52   // do not let it proceed.
53   assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
54 
55   unsigned NumRC = TRI->getNumRegClasses();
56   RegLimit.resize(NumRC);
57   RegPressure.resize(NumRC);
58   std::fill(RegLimit.begin(), RegLimit.end(), 0);
59   std::fill(RegPressure.begin(), RegPressure.end(), 0);
60   for (const TargetRegisterClass *RC : TRI->regclasses())
61     RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
62 
63   ParallelLiveRanges = 0;
64   HorizontalVerticalBalance = 0;
65 }
66 
67 unsigned
68 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
69   unsigned NumberDeps = 0;
70   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
71        I != E; ++I) {
72     if (I->isCtrl())
73       continue;
74 
75     SUnit *PredSU = I->getSUnit();
76     const SDNode *ScegN = PredSU->getNode();
77 
78     if (!ScegN)
79       continue;
80 
81     // If value is passed to CopyToReg, it is probably
82     // live outside BB.
83     switch (ScegN->getOpcode()) {
84       default:  break;
85       case ISD::TokenFactor:    break;
86       case ISD::CopyFromReg:    NumberDeps++;  break;
87       case ISD::CopyToReg:      break;
88       case ISD::INLINEASM:      break;
89     }
90     if (!ScegN->isMachineOpcode())
91       continue;
92 
93     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
94       MVT VT = ScegN->getSimpleValueType(i);
95       if (TLI->isTypeLegal(VT)
96           && (TLI->getRegClassFor(VT)->getID() == RCId)) {
97         NumberDeps++;
98         break;
99       }
100     }
101   }
102   return NumberDeps;
103 }
104 
105 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
106                                                     unsigned RCId) {
107   unsigned NumberDeps = 0;
108   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
109        I != E; ++I) {
110     if (I->isCtrl())
111       continue;
112 
113     SUnit *SuccSU = I->getSUnit();
114     const SDNode *ScegN = SuccSU->getNode();
115     if (!ScegN)
116       continue;
117 
118     // If value is passed to CopyToReg, it is probably
119     // live outside BB.
120     switch (ScegN->getOpcode()) {
121       default:  break;
122       case ISD::TokenFactor:    break;
123       case ISD::CopyFromReg:    break;
124       case ISD::CopyToReg:      NumberDeps++;  break;
125       case ISD::INLINEASM:      break;
126     }
127     if (!ScegN->isMachineOpcode())
128       continue;
129 
130     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
131       const SDValue &Op = ScegN->getOperand(i);
132       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
133       if (TLI->isTypeLegal(VT)
134           && (TLI->getRegClassFor(VT)->getID() == RCId)) {
135         NumberDeps++;
136         break;
137       }
138     }
139   }
140   return NumberDeps;
141 }
142 
143 static unsigned numberCtrlDepsInSU(SUnit *SU) {
144   unsigned NumberDeps = 0;
145   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
146        I != E; ++I)
147     if (I->isCtrl())
148       NumberDeps++;
149 
150   return NumberDeps;
151 }
152 
153 static unsigned numberCtrlPredInSU(SUnit *SU) {
154   unsigned NumberDeps = 0;
155   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
156        I != E; ++I)
157     if (I->isCtrl())
158       NumberDeps++;
159 
160   return NumberDeps;
161 }
162 
163 ///
164 /// Initialize nodes.
165 ///
166 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
167   SUnits = &sunits;
168   NumNodesSolelyBlocking.resize(SUnits->size(), 0);
169 
170   for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
171     SUnit *SU = &(*SUnits)[i];
172     initNumRegDefsLeft(SU);
173     SU->NodeQueueId = 0;
174   }
175 }
176 
177 /// This heuristic is used if DFA scheduling is not desired
178 /// for some VLIW platform.
179 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
180   // The isScheduleHigh flag allows nodes with wraparound dependencies that
181   // cannot easily be modeled as edges with latencies to be scheduled as
182   // soon as possible in a top-down schedule.
183   if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
184     return false;
185 
186   if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
187     return true;
188 
189   unsigned LHSNum = LHS->NodeNum;
190   unsigned RHSNum = RHS->NodeNum;
191 
192   // The most important heuristic is scheduling the critical path.
193   unsigned LHSLatency = PQ->getLatency(LHSNum);
194   unsigned RHSLatency = PQ->getLatency(RHSNum);
195   if (LHSLatency < RHSLatency) return true;
196   if (LHSLatency > RHSLatency) return false;
197 
198   // After that, if two nodes have identical latencies, look to see if one will
199   // unblock more other nodes than the other.
200   unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
201   unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
202   if (LHSBlocked < RHSBlocked) return true;
203   if (LHSBlocked > RHSBlocked) return false;
204 
205   // Finally, just to provide a stable ordering, use the node number as a
206   // deciding factor.
207   return LHSNum < RHSNum;
208 }
209 
210 
211 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
212 /// of SU, return it, otherwise return null.
213 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
214   SUnit *OnlyAvailablePred = nullptr;
215   for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
216        I != E; ++I) {
217     SUnit &Pred = *I->getSUnit();
218     if (!Pred.isScheduled) {
219       // We found an available, but not scheduled, predecessor.  If it's the
220       // only one we have found, keep track of it... otherwise give up.
221       if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
222         return nullptr;
223       OnlyAvailablePred = &Pred;
224     }
225   }
226   return OnlyAvailablePred;
227 }
228 
229 void ResourcePriorityQueue::push(SUnit *SU) {
230   // Look at all of the successors of this node.  Count the number of nodes that
231   // this node is the sole unscheduled node for.
232   unsigned NumNodesBlocking = 0;
233   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
234        I != E; ++I)
235     if (getSingleUnscheduledPred(I->getSUnit()) == SU)
236       ++NumNodesBlocking;
237 
238   NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
239   Queue.push_back(SU);
240 }
241 
242 /// Check if scheduling of this SU is possible
243 /// in the current packet.
244 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
245   if (!SU || !SU->getNode())
246     return false;
247 
248   // If this is a compound instruction,
249   // it is likely to be a call. Do not delay it.
250   if (SU->getNode()->getGluedNode())
251     return true;
252 
253   // First see if the pipeline could receive this instruction
254   // in the current cycle.
255   if (SU->getNode()->isMachineOpcode())
256     switch (SU->getNode()->getMachineOpcode()) {
257     default:
258       if (!ResourcesModel->canReserveResources(&TII->get(
259           SU->getNode()->getMachineOpcode())))
260            return false;
261     case TargetOpcode::EXTRACT_SUBREG:
262     case TargetOpcode::INSERT_SUBREG:
263     case TargetOpcode::SUBREG_TO_REG:
264     case TargetOpcode::REG_SEQUENCE:
265     case TargetOpcode::IMPLICIT_DEF:
266         break;
267     }
268 
269   // Now see if there are no other dependencies
270   // to instructions already in the packet.
271   for (unsigned i = 0, e = Packet.size(); i != e; ++i)
272     for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
273          E = Packet[i]->Succs.end(); I != E; ++I) {
274       // Since we do not add pseudos to packets, might as well
275       // ignore order deps.
276       if (I->isCtrl())
277         continue;
278 
279       if (I->getSUnit() == SU)
280         return false;
281     }
282 
283   return true;
284 }
285 
286 /// Keep track of available resources.
287 void ResourcePriorityQueue::reserveResources(SUnit *SU) {
288   // If this SU does not fit in the packet
289   // start a new one.
290   if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
291     ResourcesModel->clearResources();
292     Packet.clear();
293   }
294 
295   if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
296     switch (SU->getNode()->getMachineOpcode()) {
297     default:
298       ResourcesModel->reserveResources(&TII->get(
299         SU->getNode()->getMachineOpcode()));
300       break;
301     case TargetOpcode::EXTRACT_SUBREG:
302     case TargetOpcode::INSERT_SUBREG:
303     case TargetOpcode::SUBREG_TO_REG:
304     case TargetOpcode::REG_SEQUENCE:
305     case TargetOpcode::IMPLICIT_DEF:
306       break;
307     }
308     Packet.push_back(SU);
309   }
310   // Forcefully end packet for PseudoOps.
311   else {
312     ResourcesModel->clearResources();
313     Packet.clear();
314   }
315 
316   // If packet is now full, reset the state so in the next cycle
317   // we start fresh.
318   if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
319     ResourcesModel->clearResources();
320     Packet.clear();
321   }
322 }
323 
324 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
325   int RegBalance = 0;
326 
327   if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
328     return RegBalance;
329 
330   // Gen estimate.
331   for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
332       MVT VT = SU->getNode()->getSimpleValueType(i);
333       if (TLI->isTypeLegal(VT)
334           && TLI->getRegClassFor(VT)
335           && TLI->getRegClassFor(VT)->getID() == RCId)
336         RegBalance += numberRCValSuccInSU(SU, RCId);
337   }
338   // Kill estimate.
339   for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
340       const SDValue &Op = SU->getNode()->getOperand(i);
341       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
342       if (isa<ConstantSDNode>(Op.getNode()))
343         continue;
344 
345       if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
346           && TLI->getRegClassFor(VT)->getID() == RCId)
347         RegBalance -= numberRCValPredInSU(SU, RCId);
348   }
349   return RegBalance;
350 }
351 
352 /// Estimates change in reg pressure from this SU.
353 /// It is achieved by trivial tracking of defined
354 /// and used vregs in dependent instructions.
355 /// The RawPressure flag makes this function to ignore
356 /// existing reg file sizes, and report raw def/use
357 /// balance.
358 int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
359   int RegBalance = 0;
360 
361   if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
362     return RegBalance;
363 
364   if (RawPressure) {
365     for (const TargetRegisterClass *RC : TRI->regclasses())
366       RegBalance += rawRegPressureDelta(SU, RC->getID());
367   }
368   else {
369     for (const TargetRegisterClass *RC : TRI->regclasses()) {
370       if ((RegPressure[RC->getID()] +
371            rawRegPressureDelta(SU, RC->getID()) > 0) &&
372           (RegPressure[RC->getID()] +
373            rawRegPressureDelta(SU, RC->getID())  >= RegLimit[RC->getID()]))
374         RegBalance += rawRegPressureDelta(SU, RC->getID());
375     }
376   }
377 
378   return RegBalance;
379 }
380 
381 // Constants used to denote relative importance of
382 // heuristic components for cost computation.
383 static const unsigned PriorityOne = 200;
384 static const unsigned PriorityTwo = 50;
385 static const unsigned PriorityThree = 15;
386 static const unsigned PriorityFour = 5;
387 static const unsigned ScaleOne = 20;
388 static const unsigned ScaleTwo = 10;
389 static const unsigned ScaleThree = 5;
390 static const unsigned FactorOne = 2;
391 
392 /// Returns single number reflecting benefit of scheduling SU
393 /// in the current cycle.
394 int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
395   // Initial trivial priority.
396   int ResCount = 1;
397 
398   // Do not waste time on a node that is already scheduled.
399   if (SU->isScheduled)
400     return ResCount;
401 
402   // Forced priority is high.
403   if (SU->isScheduleHigh)
404     ResCount += PriorityOne;
405 
406   // Adaptable scheduling
407   // A small, but very parallel
408   // region, where reg pressure is an issue.
409   if (HorizontalVerticalBalance > RegPressureThreshold) {
410     // Critical path first
411     ResCount += (SU->getHeight() * ScaleTwo);
412     // If resources are available for it, multiply the
413     // chance of scheduling.
414     if (isResourceAvailable(SU))
415       ResCount <<= FactorOne;
416 
417     // Consider change to reg pressure from scheduling
418     // this SU.
419     ResCount -= (regPressureDelta(SU,true) * ScaleOne);
420   }
421   // Default heuristic, greeady and
422   // critical path driven.
423   else {
424     // Critical path first.
425     ResCount += (SU->getHeight() * ScaleTwo);
426     // Now see how many instructions is blocked by this SU.
427     ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
428     // If resources are available for it, multiply the
429     // chance of scheduling.
430     if (isResourceAvailable(SU))
431       ResCount <<= FactorOne;
432 
433     ResCount -= (regPressureDelta(SU) * ScaleTwo);
434   }
435 
436   // These are platform-specific things.
437   // Will need to go into the back end
438   // and accessed from here via a hook.
439   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
440     if (N->isMachineOpcode()) {
441       const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
442       if (TID.isCall())
443         ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
444     }
445     else
446       switch (N->getOpcode()) {
447       default:  break;
448       case ISD::TokenFactor:
449       case ISD::CopyFromReg:
450       case ISD::CopyToReg:
451         ResCount += PriorityFour;
452         break;
453 
454       case ISD::INLINEASM:
455         ResCount += PriorityThree;
456         break;
457       }
458   }
459   return ResCount;
460 }
461 
462 
463 /// Main resource tracking point.
464 void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
465   // Use NULL entry as an event marker to reset
466   // the DFA state.
467   if (!SU) {
468     ResourcesModel->clearResources();
469     Packet.clear();
470     return;
471   }
472 
473   const SDNode *ScegN = SU->getNode();
474   // Update reg pressure tracking.
475   // First update current node.
476   if (ScegN->isMachineOpcode()) {
477     // Estimate generated regs.
478     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
479       MVT VT = ScegN->getSimpleValueType(i);
480 
481       if (TLI->isTypeLegal(VT)) {
482         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
483         if (RC)
484           RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
485       }
486     }
487     // Estimate killed regs.
488     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
489       const SDValue &Op = ScegN->getOperand(i);
490       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
491 
492       if (TLI->isTypeLegal(VT)) {
493         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
494         if (RC) {
495           if (RegPressure[RC->getID()] >
496             (numberRCValPredInSU(SU, RC->getID())))
497             RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
498           else RegPressure[RC->getID()] = 0;
499         }
500       }
501     }
502     for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
503                               I != E; ++I) {
504       if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
505         continue;
506       --I->getSUnit()->NumRegDefsLeft;
507     }
508   }
509 
510   // Reserve resources for this SU.
511   reserveResources(SU);
512 
513   // Adjust number of parallel live ranges.
514   // Heuristic is simple - node with no data successors reduces
515   // number of live ranges. All others, increase it.
516   unsigned NumberNonControlDeps = 0;
517 
518   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
519                                   I != E; ++I) {
520     adjustPriorityOfUnscheduledPreds(I->getSUnit());
521     if (!I->isCtrl())
522       NumberNonControlDeps++;
523   }
524 
525   if (!NumberNonControlDeps) {
526     if (ParallelLiveRanges >= SU->NumPreds)
527       ParallelLiveRanges -= SU->NumPreds;
528     else
529       ParallelLiveRanges = 0;
530 
531   }
532   else
533     ParallelLiveRanges += SU->NumRegDefsLeft;
534 
535   // Track parallel live chains.
536   HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
537   HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
538 }
539 
540 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
541   unsigned  NodeNumDefs = 0;
542   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
543     if (N->isMachineOpcode()) {
544       const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
545       // No register need be allocated for this.
546       if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
547         NodeNumDefs = 0;
548         break;
549       }
550       NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
551     }
552     else
553       switch(N->getOpcode()) {
554         default:     break;
555         case ISD::CopyFromReg:
556           NodeNumDefs++;
557           break;
558         case ISD::INLINEASM:
559           NodeNumDefs++;
560           break;
561       }
562 
563   SU->NumRegDefsLeft = NodeNumDefs;
564 }
565 
566 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
567 /// scheduled.  If SU is not itself available, then there is at least one
568 /// predecessor node that has not been scheduled yet.  If SU has exactly ONE
569 /// unscheduled predecessor, we want to increase its priority: it getting
570 /// scheduled will make this node available, so it is better than some other
571 /// node of the same priority that will not make a node available.
572 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
573   if (SU->isAvailable) return;  // All preds scheduled.
574 
575   SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
576   if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
577     return;
578 
579   // Okay, we found a single predecessor that is available, but not scheduled.
580   // Since it is available, it must be in the priority queue.  First remove it.
581   remove(OnlyAvailablePred);
582 
583   // Reinsert the node into the priority queue, which recomputes its
584   // NumNodesSolelyBlocking value.
585   push(OnlyAvailablePred);
586 }
587 
588 
589 /// Main access point - returns next instructions
590 /// to be placed in scheduling sequence.
591 SUnit *ResourcePriorityQueue::pop() {
592   if (empty())
593     return nullptr;
594 
595   std::vector<SUnit *>::iterator Best = Queue.begin();
596   if (!DisableDFASched) {
597     int BestCost = SUSchedulingCost(*Best);
598     for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
599            E = Queue.end(); I != E; ++I) {
600 
601       if (SUSchedulingCost(*I) > BestCost) {
602         BestCost = SUSchedulingCost(*I);
603         Best = I;
604       }
605     }
606   }
607   // Use default TD scheduling mechanism.
608   else {
609     for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
610        E = Queue.end(); I != E; ++I)
611       if (Picker(*Best, *I))
612         Best = I;
613   }
614 
615   SUnit *V = *Best;
616   if (Best != std::prev(Queue.end()))
617     std::swap(*Best, Queue.back());
618 
619   Queue.pop_back();
620 
621   return V;
622 }
623 
624 
625 void ResourcePriorityQueue::remove(SUnit *SU) {
626   assert(!Queue.empty() && "Queue is empty!");
627   std::vector<SUnit *>::iterator I = find(Queue, SU);
628   if (I != std::prev(Queue.end()))
629     std::swap(*I, Queue.back());
630 
631   Queue.pop_back();
632 }
633