1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a hazard recognizer for the SystemZ scheduler.
11 //
12 // This class is used by the SystemZ scheduling strategy to maintain
13 // the state during scheduling, and provide cost functions for
14 // scheduling candidates. This includes:
15 //
16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
17 // instructions that always begin a new group should be scheduled when
18 // the current decoder group is empty.
19 // * Processor resources usage. It is beneficial to balance the use of
20 // resources.
21 //
22 // A goal is to consider all instructions, also those outside of any
23 // scheduling region. Such instructions are "advanced" past and include
24 // single instructions before a scheduling region, branches etc.
25 //
26 // A block that has only one predecessor continues scheduling with the state
27 // of it (which may be updated by emitting branches).
28 //
29 // ===---------------------------------------------------------------------===//
30 
31 #include "SystemZHazardRecognizer.h"
32 #include "llvm/ADT/Statistic.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "machine-scheduler"
37 
38 // This is the limit of processor resource usage at which the
39 // scheduler should try to look for other instructions (not using the
40 // critical resource).
41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
42                                    cl::desc("The OOO window for processor "
43                                             "resources during scheduling."),
44                                    cl::init(8));
45 
46 unsigned SystemZHazardRecognizer::
47 getNumDecoderSlots(SUnit *SU) const {
48   const MCSchedClassDesc *SC = getSchedClass(SU);
49   if (!SC->isValid())
50     return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
51 
52   if (SC->BeginGroup) {
53     if (!SC->EndGroup)
54       return 2; // Cracked instruction
55     else
56       return 3; // Expanded/group-alone instruction
57   }
58 
59   return 1; // Normal instruction
60 }
61 
62 unsigned SystemZHazardRecognizer::getCurrCycleIdx() {
63   unsigned Idx = CurrGroupSize;
64   if (GrpCount % 2)
65     Idx += 3;
66   return Idx;
67 }
68 
69 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
70 getHazardType(SUnit *m, int Stalls) {
71   return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
72 }
73 
74 void SystemZHazardRecognizer::Reset() {
75   CurrGroupSize = 0;
76   clearProcResCounters();
77   GrpCount = 0;
78   LastFPdOpCycleIdx = UINT_MAX;
79   LastEmittedMI = nullptr;
80   DEBUG(CurGroupDbg = "";);
81 }
82 
83 bool
84 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
85   const MCSchedClassDesc *SC = getSchedClass(SU);
86   if (!SC->isValid())
87     return true;
88 
89   // A cracked instruction only fits into schedule if the current
90   // group is empty.
91   if (SC->BeginGroup)
92     return (CurrGroupSize == 0);
93 
94   // Since a full group is handled immediately in EmitInstruction(),
95   // SU should fit into current group. NumSlots should be 1 or 0,
96   // since it is not a cracked or expanded instruction.
97   assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
98           "Expected normal instruction to fit in non-full group!");
99 
100   return true;
101 }
102 
103 void SystemZHazardRecognizer::nextGroup(bool DbgOutput) {
104   if (CurrGroupSize > 0) {
105     DEBUG(dumpCurrGroup("Completed decode group"));
106     DEBUG(CurGroupDbg = "";);
107 
108     GrpCount++;
109 
110     // Reset counter for next group.
111     CurrGroupSize = 0;
112 
113     // Decrease counters for execution units by one.
114     for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
115       if (ProcResourceCounters[i] > 0)
116         ProcResourceCounters[i]--;
117 
118     // Clear CriticalResourceIdx if it is now below the threshold.
119     if (CriticalResourceIdx != UINT_MAX &&
120         (ProcResourceCounters[CriticalResourceIdx] <=
121          ProcResCostLim))
122       CriticalResourceIdx = UINT_MAX;
123   }
124 
125   DEBUG(if (DbgOutput)
126           dumpProcResourceCounters(););
127 }
128 
129 #ifndef NDEBUG // Debug output
130 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
131   OS << "SU(" << SU->NodeNum << "):";
132   OS << TII->getName(SU->getInstr()->getOpcode());
133 
134   const MCSchedClassDesc *SC = getSchedClass(SU);
135   if (!SC->isValid())
136     return;
137 
138   for (TargetSchedModel::ProcResIter
139          PI = SchedModel->getWriteProcResBegin(SC),
140          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
141     const MCProcResourceDesc &PRD =
142       *SchedModel->getProcResource(PI->ProcResourceIdx);
143     std::string FU(PRD.Name);
144     // trim e.g. Z13_FXaUnit -> FXa
145     FU = FU.substr(FU.find("_") + 1);
146     FU.resize(FU.find("Unit"));
147     OS << "/" << FU;
148 
149     if (PI->Cycles > 1)
150       OS << "(" << PI->Cycles << "cyc)";
151   }
152 
153   if (SC->NumMicroOps > 1)
154     OS << "/" << SC->NumMicroOps << "uops";
155   if (SC->BeginGroup && SC->EndGroup)
156     OS << "/GroupsAlone";
157   else if (SC->BeginGroup)
158     OS << "/BeginsGroup";
159   else if (SC->EndGroup)
160     OS << "/EndsGroup";
161   if (SU->isUnbuffered)
162     OS << "/Unbuffered";
163 }
164 
165 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
166   dbgs() << "+++ " << Msg;
167   dbgs() << ": ";
168 
169   if (CurGroupDbg.empty())
170     dbgs() << " <empty>\n";
171   else {
172     dbgs() << "{ " << CurGroupDbg << " }";
173     dbgs() << " (" << CurrGroupSize << " decoder slot"
174            << (CurrGroupSize > 1 ? "s":"")
175            << ")\n";
176   }
177 }
178 
179 void SystemZHazardRecognizer::dumpProcResourceCounters() const {
180   bool any = false;
181 
182   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
183     if (ProcResourceCounters[i] > 0) {
184       any = true;
185       break;
186     }
187 
188   if (!any)
189     return;
190 
191   dbgs() << "+++ Resource counters:\n";
192   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
193     if (ProcResourceCounters[i] > 0) {
194       dbgs() << "+++ Extra schedule for execution unit "
195              << SchedModel->getProcResource(i)->Name
196              << ": " << ProcResourceCounters[i] << "\n";
197       any = true;
198     }
199 }
200 #endif //NDEBUG
201 
202 void SystemZHazardRecognizer::clearProcResCounters() {
203   ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
204   CriticalResourceIdx = UINT_MAX;
205 }
206 
207 static inline bool isBranchRetTrap(MachineInstr *MI) {
208   return (MI->isBranch() || MI->isReturn() ||
209           MI->getOpcode() == SystemZ::CondTrap);
210 }
211 
212 // Update state with SU as the next scheduled unit.
213 void SystemZHazardRecognizer::
214 EmitInstruction(SUnit *SU) {
215   const MCSchedClassDesc *SC = getSchedClass(SU);
216   DEBUG( dumpCurrGroup("Decode group before emission"););
217 
218   // If scheduling an SU that must begin a new decoder group, move on
219   // to next group.
220   if (!fitsIntoCurrentGroup(SU))
221     nextGroup();
222 
223   DEBUG( dbgs() << "+++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
224          dbgs() << "\n";
225          raw_string_ostream cgd(CurGroupDbg);
226          if (CurGroupDbg.length())
227            cgd << ", ";
228          dumpSU(SU, cgd););
229 
230   LastEmittedMI = SU->getInstr();
231 
232   // After returning from a call, we don't know much about the state.
233   if (SU->isCall) {
234     DEBUG (dbgs() << "+++ Clearing state after call.\n";);
235     clearProcResCounters();
236     LastFPdOpCycleIdx = UINT_MAX;
237     CurrGroupSize += getNumDecoderSlots(SU);
238     assert (CurrGroupSize <= 3);
239     nextGroup();
240     return;
241   }
242 
243   // Increase counter for execution unit(s).
244   for (TargetSchedModel::ProcResIter
245          PI = SchedModel->getWriteProcResBegin(SC),
246          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
247     // Don't handle FPd together with the other resources.
248     if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
249       continue;
250     int &CurrCounter =
251       ProcResourceCounters[PI->ProcResourceIdx];
252     CurrCounter += PI->Cycles;
253     // Check if this is now the new critical resource.
254     if ((CurrCounter > ProcResCostLim) &&
255         (CriticalResourceIdx == UINT_MAX ||
256          (PI->ProcResourceIdx != CriticalResourceIdx &&
257           CurrCounter >
258           ProcResourceCounters[CriticalResourceIdx]))) {
259       DEBUG( dbgs() << "+++ New critical resource: "
260              << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
261              << "\n";);
262       CriticalResourceIdx = PI->ProcResourceIdx;
263     }
264   }
265 
266   // Make note of an instruction that uses a blocking resource (FPd).
267   if (SU->isUnbuffered) {
268     LastFPdOpCycleIdx = getCurrCycleIdx();
269     DEBUG (dbgs() << "+++ Last FPd cycle index: "
270            << LastFPdOpCycleIdx << "\n";);
271   }
272 
273   bool GroupEndingBranch =
274     (CurrGroupSize >= 1 && isBranchRetTrap(SU->getInstr()));
275 
276   // Insert SU into current group by increasing number of slots used
277   // in current group.
278   CurrGroupSize += getNumDecoderSlots(SU);
279   assert (CurrGroupSize <= 3);
280 
281   // Check if current group is now full/ended. If so, move on to next
282   // group to be ready to evaluate more candidates.
283   if (CurrGroupSize == 3 || SC->EndGroup || GroupEndingBranch)
284     nextGroup();
285 }
286 
287 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
288   const MCSchedClassDesc *SC = getSchedClass(SU);
289   if (!SC->isValid())
290     return 0;
291 
292   // If SU begins new group, it can either break a current group early
293   // or fit naturally if current group is empty (negative cost).
294   if (SC->BeginGroup) {
295     if (CurrGroupSize)
296       return 3 - CurrGroupSize;
297     return -1;
298   }
299 
300   // Similarly, a group-ending SU may either fit well (last in group), or
301   // end the group prematurely.
302   if (SC->EndGroup) {
303     unsigned resultingGroupSize =
304       (CurrGroupSize + getNumDecoderSlots(SU));
305     if (resultingGroupSize < 3)
306       return (3 - resultingGroupSize);
307     return -1;
308   }
309 
310   // Most instructions can be placed in any decoder slot.
311   return 0;
312 }
313 
314 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(const SUnit *SU) {
315   assert (SU->isUnbuffered);
316   // If this is the first FPd op, it should be scheduled high.
317   if (LastFPdOpCycleIdx == UINT_MAX)
318     return true;
319   // If this is not the first PFd op, it should go into the other side
320   // of the processor to use the other FPd unit there. This should
321   // generally happen if two FPd ops are placed with 2 other
322   // instructions between them (modulo 6).
323   if (LastFPdOpCycleIdx > getCurrCycleIdx())
324     return ((LastFPdOpCycleIdx - getCurrCycleIdx()) == 3);
325   return ((getCurrCycleIdx() - LastFPdOpCycleIdx) == 3);
326 }
327 
328 int SystemZHazardRecognizer::
329 resourcesCost(SUnit *SU) {
330   int Cost = 0;
331 
332   const MCSchedClassDesc *SC = getSchedClass(SU);
333   if (!SC->isValid())
334     return 0;
335 
336   // For a FPd op, either return min or max value as indicated by the
337   // distance to any prior FPd op.
338   if (SU->isUnbuffered)
339     Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
340   // For other instructions, give a cost to the use of the critical resource.
341   else if (CriticalResourceIdx != UINT_MAX) {
342     for (TargetSchedModel::ProcResIter
343            PI = SchedModel->getWriteProcResBegin(SC),
344            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
345       if (PI->ProcResourceIdx == CriticalResourceIdx)
346         Cost = PI->Cycles;
347   }
348 
349   return Cost;
350 }
351 
352 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
353                                               bool TakenBranch) {
354   // Make a temporary SUnit.
355   SUnit SU(MI, 0);
356 
357   // Set interesting flags.
358   SU.isCall = MI->isCall();
359 
360   const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
361   for (const MCWriteProcResEntry &PRE :
362          make_range(SchedModel->getWriteProcResBegin(SC),
363                     SchedModel->getWriteProcResEnd(SC))) {
364     switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
365     case 0:
366       SU.hasReservedResource = true;
367       break;
368     case 1:
369       SU.isUnbuffered = true;
370       break;
371     default:
372       break;
373     }
374   }
375 
376   EmitInstruction(&SU);
377 
378   if (TakenBranch && CurrGroupSize > 0)
379     nextGroup(false /*DbgOutput*/);
380 
381   assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
382           "Scheduler: unhandled terminator!");
383 }
384 
385 void SystemZHazardRecognizer::
386 copyState(SystemZHazardRecognizer *Incoming) {
387   // Current decoder group
388   CurrGroupSize = Incoming->CurrGroupSize;
389   DEBUG (CurGroupDbg = Incoming->CurGroupDbg;);
390 
391   // Processor resources
392   ProcResourceCounters = Incoming->ProcResourceCounters;
393   CriticalResourceIdx = Incoming->CriticalResourceIdx;
394 
395   // FPd
396   LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
397   GrpCount = Incoming->GrpCount;
398 }
399