1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a hazard recognizer for the SystemZ scheduler.
11 //
12 // This class is used by the SystemZ scheduling strategy to maintain
13 // the state during scheduling, and provide cost functions for
14 // scheduling candidates. This includes:
15 //
16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
17 // instructions that always begin a new group should be scheduled when
18 // the current decoder group is empty.
19 // * Processor resources usage. It is beneficial to balance the use of
20 // resources.
21 //
22 // A goal is to consider all instructions, also those outside of any
23 // scheduling region. Such instructions are "advanced" past and include
24 // single instructions before a scheduling region, branches etc.
25 //
26 // A block that has only one predecessor continues scheduling with the state
27 // of it (which may be updated by emitting branches).
28 //
29 // ===---------------------------------------------------------------------===//
30 
31 #include "SystemZHazardRecognizer.h"
32 #include "llvm/ADT/Statistic.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "machine-scheduler"
37 
38 // This is the limit of processor resource usage at which the
39 // scheduler should try to look for other instructions (not using the
40 // critical resource).
41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
42                                    cl::desc("The OOO window for processor "
43                                             "resources during scheduling."),
44                                    cl::init(8));
45 
46 unsigned SystemZHazardRecognizer::
47 getNumDecoderSlots(SUnit *SU) const {
48   const MCSchedClassDesc *SC = getSchedClass(SU);
49   if (!SC->isValid())
50     return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
51 
52   if (SC->BeginGroup) {
53     if (!SC->EndGroup)
54       return 2; // Cracked instruction
55     else
56       return 3; // Expanded/group-alone instruction
57   }
58 
59   return 1; // Normal instruction
60 }
61 
62 unsigned SystemZHazardRecognizer::getCurrCycleIdx() const {
63   unsigned Idx = CurrGroupSize;
64   if (GrpCount % 2)
65     Idx += 3;
66   return Idx;
67 }
68 
69 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
70 getHazardType(SUnit *m, int Stalls) {
71   return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
72 }
73 
74 void SystemZHazardRecognizer::Reset() {
75   CurrGroupSize = 0;
76   clearProcResCounters();
77   GrpCount = 0;
78   LastFPdOpCycleIdx = UINT_MAX;
79   LastEmittedMI = nullptr;
80   DEBUG(CurGroupDbg = "";);
81 }
82 
83 bool
84 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
85   const MCSchedClassDesc *SC = getSchedClass(SU);
86   if (!SC->isValid())
87     return true;
88 
89   // A cracked instruction only fits into schedule if the current
90   // group is empty.
91   if (SC->BeginGroup)
92     return (CurrGroupSize == 0);
93 
94   // Since a full group is handled immediately in EmitInstruction(),
95   // SU should fit into current group. NumSlots should be 1 or 0,
96   // since it is not a cracked or expanded instruction.
97   assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
98           "Expected normal instruction to fit in non-full group!");
99 
100   return true;
101 }
102 
103 void SystemZHazardRecognizer::nextGroup() {
104   if (CurrGroupSize == 0)
105     return;
106 
107   DEBUG(dumpCurrGroup("Completed decode group"));
108   DEBUG(CurGroupDbg = "";);
109 
110   GrpCount++;
111 
112   // Reset counter for next group.
113   CurrGroupSize = 0;
114 
115   // Decrease counters for execution units by one.
116   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
117     if (ProcResourceCounters[i] > 0)
118       ProcResourceCounters[i]--;
119 
120   // Clear CriticalResourceIdx if it is now below the threshold.
121   if (CriticalResourceIdx != UINT_MAX &&
122       (ProcResourceCounters[CriticalResourceIdx] <=
123        ProcResCostLim))
124     CriticalResourceIdx = UINT_MAX;
125 
126   DEBUG(dumpState(););
127 }
128 
129 #ifndef NDEBUG // Debug output
130 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
131   OS << "SU(" << SU->NodeNum << "):";
132   OS << TII->getName(SU->getInstr()->getOpcode());
133 
134   const MCSchedClassDesc *SC = getSchedClass(SU);
135   if (!SC->isValid())
136     return;
137 
138   for (TargetSchedModel::ProcResIter
139          PI = SchedModel->getWriteProcResBegin(SC),
140          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
141     const MCProcResourceDesc &PRD =
142       *SchedModel->getProcResource(PI->ProcResourceIdx);
143     std::string FU(PRD.Name);
144     // trim e.g. Z13_FXaUnit -> FXa
145     FU = FU.substr(FU.find("_") + 1);
146     FU.resize(FU.find("Unit"));
147     OS << "/" << FU;
148 
149     if (PI->Cycles > 1)
150       OS << "(" << PI->Cycles << "cyc)";
151   }
152 
153   if (SC->NumMicroOps > 1)
154     OS << "/" << SC->NumMicroOps << "uops";
155   if (SC->BeginGroup && SC->EndGroup)
156     OS << "/GroupsAlone";
157   else if (SC->BeginGroup)
158     OS << "/BeginsGroup";
159   else if (SC->EndGroup)
160     OS << "/EndsGroup";
161   if (SU->isUnbuffered)
162     OS << "/Unbuffered";
163 }
164 
165 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
166   dbgs() << "++ " << Msg;
167   dbgs() << ": ";
168 
169   if (CurGroupDbg.empty())
170     dbgs() << " <empty>\n";
171   else {
172     dbgs() << "{ " << CurGroupDbg << " }";
173     dbgs() << " (" << CurrGroupSize << " decoder slot"
174            << (CurrGroupSize > 1 ? "s":"")
175            << ")\n";
176   }
177 }
178 
179 void SystemZHazardRecognizer::dumpProcResourceCounters() const {
180   bool any = false;
181 
182   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
183     if (ProcResourceCounters[i] > 0) {
184       any = true;
185       break;
186     }
187 
188   if (!any)
189     return;
190 
191   dbgs() << "++ | Resource counters: ";
192   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
193     if (ProcResourceCounters[i] > 0)
194       dbgs() << SchedModel->getProcResource(i)->Name
195              << ":" << ProcResourceCounters[i] << " ";
196   dbgs() << "\n";
197 
198   if (CriticalResourceIdx != UINT_MAX)
199     dbgs() << "++ | Critical resource: "
200            << SchedModel->getProcResource(CriticalResourceIdx)->Name
201            << "\n";
202 }
203 
204 void SystemZHazardRecognizer::dumpState() const {
205   dumpCurrGroup("| Current decoder group");
206   dbgs() << "++ | Current cycle index: "
207          << getCurrCycleIdx() << "\n";
208   dumpProcResourceCounters();
209   if (LastFPdOpCycleIdx != UINT_MAX)
210     dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
211 }
212 
213 #endif //NDEBUG
214 
215 void SystemZHazardRecognizer::clearProcResCounters() {
216   ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
217   CriticalResourceIdx = UINT_MAX;
218 }
219 
220 static inline bool isBranchRetTrap(MachineInstr *MI) {
221   return (MI->isBranch() || MI->isReturn() ||
222           MI->getOpcode() == SystemZ::CondTrap);
223 }
224 
225 // Update state with SU as the next scheduled unit.
226 void SystemZHazardRecognizer::
227 EmitInstruction(SUnit *SU) {
228   const MCSchedClassDesc *SC = getSchedClass(SU);
229   DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
230         dbgs() << "\n";);
231   DEBUG(dumpCurrGroup("Decode group before emission"););
232 
233   // If scheduling an SU that must begin a new decoder group, move on
234   // to next group.
235   if (!fitsIntoCurrentGroup(SU))
236     nextGroup();
237 
238   DEBUG(raw_string_ostream cgd(CurGroupDbg);
239         if (CurGroupDbg.length())
240           cgd << ", ";
241         dumpSU(SU, cgd););
242 
243   LastEmittedMI = SU->getInstr();
244 
245   // After returning from a call, we don't know much about the state.
246   if (SU->isCall) {
247     DEBUG(dbgs() << "++ Clearing state after call.\n";);
248     clearProcResCounters();
249     LastFPdOpCycleIdx = UINT_MAX;
250     CurrGroupSize += getNumDecoderSlots(SU);
251     assert (CurrGroupSize <= 3);
252     nextGroup();
253     return;
254   }
255 
256   // Increase counter for execution unit(s).
257   for (TargetSchedModel::ProcResIter
258          PI = SchedModel->getWriteProcResBegin(SC),
259          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
260     // Don't handle FPd together with the other resources.
261     if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
262       continue;
263     int &CurrCounter =
264       ProcResourceCounters[PI->ProcResourceIdx];
265     CurrCounter += PI->Cycles;
266     // Check if this is now the new critical resource.
267     if ((CurrCounter > ProcResCostLim) &&
268         (CriticalResourceIdx == UINT_MAX ||
269          (PI->ProcResourceIdx != CriticalResourceIdx &&
270           CurrCounter >
271           ProcResourceCounters[CriticalResourceIdx]))) {
272       DEBUG(dbgs() << "++ New critical resource: "
273             << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
274             << "\n";);
275       CriticalResourceIdx = PI->ProcResourceIdx;
276     }
277   }
278 
279   // Make note of an instruction that uses a blocking resource (FPd).
280   if (SU->isUnbuffered) {
281     LastFPdOpCycleIdx = getCurrCycleIdx();
282     DEBUG(dbgs() << "++ Last FPd cycle index: "
283           << LastFPdOpCycleIdx << "\n";);
284   }
285 
286   // Insert SU into current group by increasing number of slots used
287   // in current group.
288   CurrGroupSize += getNumDecoderSlots(SU);
289   assert (CurrGroupSize <= 3);
290 
291   // Check if current group is now full/ended. If so, move on to next
292   // group to be ready to evaluate more candidates.
293   if (CurrGroupSize == 3 || SC->EndGroup)
294     nextGroup();
295 }
296 
297 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
298   const MCSchedClassDesc *SC = getSchedClass(SU);
299   if (!SC->isValid())
300     return 0;
301 
302   // If SU begins new group, it can either break a current group early
303   // or fit naturally if current group is empty (negative cost).
304   if (SC->BeginGroup) {
305     if (CurrGroupSize)
306       return 3 - CurrGroupSize;
307     return -1;
308   }
309 
310   // Similarly, a group-ending SU may either fit well (last in group), or
311   // end the group prematurely.
312   if (SC->EndGroup) {
313     unsigned resultingGroupSize =
314       (CurrGroupSize + getNumDecoderSlots(SU));
315     if (resultingGroupSize < 3)
316       return (3 - resultingGroupSize);
317     return -1;
318   }
319 
320   // Most instructions can be placed in any decoder slot.
321   return 0;
322 }
323 
324 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(const SUnit *SU) {
325   assert (SU->isUnbuffered);
326   // If this is the first FPd op, it should be scheduled high.
327   if (LastFPdOpCycleIdx == UINT_MAX)
328     return true;
329   // If this is not the first PFd op, it should go into the other side
330   // of the processor to use the other FPd unit there. This should
331   // generally happen if two FPd ops are placed with 2 other
332   // instructions between them (modulo 6).
333   if (LastFPdOpCycleIdx > getCurrCycleIdx())
334     return ((LastFPdOpCycleIdx - getCurrCycleIdx()) == 3);
335   return ((getCurrCycleIdx() - LastFPdOpCycleIdx) == 3);
336 }
337 
338 int SystemZHazardRecognizer::
339 resourcesCost(SUnit *SU) {
340   int Cost = 0;
341 
342   const MCSchedClassDesc *SC = getSchedClass(SU);
343   if (!SC->isValid())
344     return 0;
345 
346   // For a FPd op, either return min or max value as indicated by the
347   // distance to any prior FPd op.
348   if (SU->isUnbuffered)
349     Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
350   // For other instructions, give a cost to the use of the critical resource.
351   else if (CriticalResourceIdx != UINT_MAX) {
352     for (TargetSchedModel::ProcResIter
353            PI = SchedModel->getWriteProcResBegin(SC),
354            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
355       if (PI->ProcResourceIdx == CriticalResourceIdx)
356         Cost = PI->Cycles;
357   }
358 
359   return Cost;
360 }
361 
362 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
363                                               bool TakenBranch) {
364   // Make a temporary SUnit.
365   SUnit SU(MI, 0);
366 
367   // Set interesting flags.
368   SU.isCall = MI->isCall();
369 
370   const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
371   for (const MCWriteProcResEntry &PRE :
372          make_range(SchedModel->getWriteProcResBegin(SC),
373                     SchedModel->getWriteProcResEnd(SC))) {
374     switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
375     case 0:
376       SU.hasReservedResource = true;
377       break;
378     case 1:
379       SU.isUnbuffered = true;
380       break;
381     default:
382       break;
383     }
384   }
385 
386   unsigned GroupSizeBeforeEmit = CurrGroupSize;
387   EmitInstruction(&SU);
388 
389   if (!TakenBranch && isBranchRetTrap(MI)) {
390     // NT Branch on second slot ends group.
391     if (GroupSizeBeforeEmit == 1)
392       nextGroup();
393   }
394 
395   if (TakenBranch && CurrGroupSize > 0)
396     nextGroup();
397 
398   assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
399           "Scheduler: unhandled terminator!");
400 }
401 
402 void SystemZHazardRecognizer::
403 copyState(SystemZHazardRecognizer *Incoming) {
404   // Current decoder group
405   CurrGroupSize = Incoming->CurrGroupSize;
406   DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
407 
408   // Processor resources
409   ProcResourceCounters = Incoming->ProcResourceCounters;
410   CriticalResourceIdx = Incoming->CriticalResourceIdx;
411 
412   // FPd
413   LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
414   GrpCount = Incoming->GrpCount;
415 }
416