1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a hazard recognizer for the SystemZ scheduler.
11 //
12 // This class is used by the SystemZ scheduling strategy to maintain
13 // the state during scheduling, and provide cost functions for
14 // scheduling candidates. This includes:
15 //
16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
17 // instructions that always begin a new group should be scheduled when
18 // the current decoder group is empty.
19 // * Processor resources usage. It is beneficial to balance the use of
20 // resources.
21 //
22 // A goal is to consider all instructions, also those outside of any
23 // scheduling region. Such instructions are "advanced" past and include
24 // single instructions before a scheduling region, branches etc.
25 //
26 // A block that has only one predecessor continues scheduling with the state
27 // of it (which may be updated by emitting branches).
28 //
29 // ===---------------------------------------------------------------------===//
30 
31 #include "SystemZHazardRecognizer.h"
32 #include "llvm/ADT/Statistic.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "machine-scheduler"
37 
38 // This is the limit of processor resource usage at which the
39 // scheduler should try to look for other instructions (not using the
40 // critical resource).
41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
42                                    cl::desc("The OOO window for processor "
43                                             "resources during scheduling."),
44                                    cl::init(8));
45 
46 unsigned SystemZHazardRecognizer::
47 getNumDecoderSlots(SUnit *SU) const {
48   const MCSchedClassDesc *SC = getSchedClass(SU);
49   if (!SC->isValid())
50     return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
51 
52   if (SC->BeginGroup) {
53     if (!SC->EndGroup)
54       return 2; // Cracked instruction
55     else
56       return 3; // Expanded/group-alone instruction
57   }
58 
59   return 1; // Normal instruction
60 }
61 
62 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
63   unsigned Idx = CurrGroupSize;
64   if (GrpCount % 2)
65     Idx += 3;
66 
67   if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
68     if (Idx == 1 || Idx == 2)
69       Idx = 3;
70     else if (Idx == 4 || Idx == 5)
71       Idx = 0;
72   }
73 
74   return Idx;
75 }
76 
77 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
78 getHazardType(SUnit *m, int Stalls) {
79   return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
80 }
81 
82 void SystemZHazardRecognizer::Reset() {
83   CurrGroupSize = 0;
84   clearProcResCounters();
85   GrpCount = 0;
86   LastFPdOpCycleIdx = UINT_MAX;
87   LastEmittedMI = nullptr;
88   LLVM_DEBUG(CurGroupDbg = "";);
89 }
90 
91 bool
92 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
93   const MCSchedClassDesc *SC = getSchedClass(SU);
94   if (!SC->isValid())
95     return true;
96 
97   // A cracked instruction only fits into schedule if the current
98   // group is empty.
99   if (SC->BeginGroup)
100     return (CurrGroupSize == 0);
101 
102   // Since a full group is handled immediately in EmitInstruction(),
103   // SU should fit into current group. NumSlots should be 1 or 0,
104   // since it is not a cracked or expanded instruction.
105   assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
106           "Expected normal instruction to fit in non-full group!");
107 
108   return true;
109 }
110 
111 void SystemZHazardRecognizer::nextGroup() {
112   if (CurrGroupSize == 0)
113     return;
114 
115   LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
116   LLVM_DEBUG(CurGroupDbg = "";);
117 
118   GrpCount++;
119 
120   // Reset counter for next group.
121   CurrGroupSize = 0;
122 
123   // Decrease counters for execution units by one.
124   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
125     if (ProcResourceCounters[i] > 0)
126       ProcResourceCounters[i]--;
127 
128   // Clear CriticalResourceIdx if it is now below the threshold.
129   if (CriticalResourceIdx != UINT_MAX &&
130       (ProcResourceCounters[CriticalResourceIdx] <=
131        ProcResCostLim))
132     CriticalResourceIdx = UINT_MAX;
133 
134   LLVM_DEBUG(dumpState(););
135 }
136 
137 #ifndef NDEBUG // Debug output
138 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
139   OS << "SU(" << SU->NodeNum << "):";
140   OS << TII->getName(SU->getInstr()->getOpcode());
141 
142   const MCSchedClassDesc *SC = getSchedClass(SU);
143   if (!SC->isValid())
144     return;
145 
146   for (TargetSchedModel::ProcResIter
147          PI = SchedModel->getWriteProcResBegin(SC),
148          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
149     const MCProcResourceDesc &PRD =
150       *SchedModel->getProcResource(PI->ProcResourceIdx);
151     std::string FU(PRD.Name);
152     // trim e.g. Z13_FXaUnit -> FXa
153     FU = FU.substr(FU.find("_") + 1);
154     size_t Pos = FU.find("Unit");
155     if (Pos != std::string::npos)
156       FU.resize(Pos);
157     if (FU == "LS") // LSUnit -> LSU
158       FU = "LSU";
159     OS << "/" << FU;
160 
161     if (PI->Cycles > 1)
162       OS << "(" << PI->Cycles << "cyc)";
163   }
164 
165   if (SC->NumMicroOps > 1)
166     OS << "/" << SC->NumMicroOps << "uops";
167   if (SC->BeginGroup && SC->EndGroup)
168     OS << "/GroupsAlone";
169   else if (SC->BeginGroup)
170     OS << "/BeginsGroup";
171   else if (SC->EndGroup)
172     OS << "/EndsGroup";
173   if (SU->isUnbuffered)
174     OS << "/Unbuffered";
175 }
176 
177 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
178   dbgs() << "++ " << Msg;
179   dbgs() << ": ";
180 
181   if (CurGroupDbg.empty())
182     dbgs() << " <empty>\n";
183   else {
184     dbgs() << "{ " << CurGroupDbg << " }";
185     dbgs() << " (" << CurrGroupSize << " decoder slot"
186            << (CurrGroupSize > 1 ? "s":"")
187            << ")\n";
188   }
189 }
190 
191 void SystemZHazardRecognizer::dumpProcResourceCounters() const {
192   bool any = false;
193 
194   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
195     if (ProcResourceCounters[i] > 0) {
196       any = true;
197       break;
198     }
199 
200   if (!any)
201     return;
202 
203   dbgs() << "++ | Resource counters: ";
204   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
205     if (ProcResourceCounters[i] > 0)
206       dbgs() << SchedModel->getProcResource(i)->Name
207              << ":" << ProcResourceCounters[i] << " ";
208   dbgs() << "\n";
209 
210   if (CriticalResourceIdx != UINT_MAX)
211     dbgs() << "++ | Critical resource: "
212            << SchedModel->getProcResource(CriticalResourceIdx)->Name
213            << "\n";
214 }
215 
216 void SystemZHazardRecognizer::dumpState() const {
217   dumpCurrGroup("| Current decoder group");
218   dbgs() << "++ | Current cycle index: "
219          << getCurrCycleIdx() << "\n";
220   dumpProcResourceCounters();
221   if (LastFPdOpCycleIdx != UINT_MAX)
222     dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
223 }
224 
225 #endif //NDEBUG
226 
227 void SystemZHazardRecognizer::clearProcResCounters() {
228   ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
229   CriticalResourceIdx = UINT_MAX;
230 }
231 
232 static inline bool isBranchRetTrap(MachineInstr *MI) {
233   return (MI->isBranch() || MI->isReturn() ||
234           MI->getOpcode() == SystemZ::CondTrap);
235 }
236 
237 // Update state with SU as the next scheduled unit.
238 void SystemZHazardRecognizer::
239 EmitInstruction(SUnit *SU) {
240   const MCSchedClassDesc *SC = getSchedClass(SU);
241   LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
242              dbgs() << "\n";);
243   LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
244 
245   // If scheduling an SU that must begin a new decoder group, move on
246   // to next group.
247   if (!fitsIntoCurrentGroup(SU))
248     nextGroup();
249 
250   LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg);
251              if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
252 
253   LastEmittedMI = SU->getInstr();
254 
255   // After returning from a call, we don't know much about the state.
256   if (SU->isCall) {
257     LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
258     Reset();
259     LastEmittedMI = SU->getInstr();
260     return;
261   }
262 
263   // Increase counter for execution unit(s).
264   for (TargetSchedModel::ProcResIter
265          PI = SchedModel->getWriteProcResBegin(SC),
266          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
267     // Don't handle FPd together with the other resources.
268     if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
269       continue;
270     int &CurrCounter =
271       ProcResourceCounters[PI->ProcResourceIdx];
272     CurrCounter += PI->Cycles;
273     // Check if this is now the new critical resource.
274     if ((CurrCounter > ProcResCostLim) &&
275         (CriticalResourceIdx == UINT_MAX ||
276          (PI->ProcResourceIdx != CriticalResourceIdx &&
277           CurrCounter >
278           ProcResourceCounters[CriticalResourceIdx]))) {
279       LLVM_DEBUG(
280           dbgs() << "++ New critical resource: "
281                  << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
282                  << "\n";);
283       CriticalResourceIdx = PI->ProcResourceIdx;
284     }
285   }
286 
287   // Make note of an instruction that uses a blocking resource (FPd).
288   if (SU->isUnbuffered) {
289     LastFPdOpCycleIdx = getCurrCycleIdx(SU);
290     LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
291                       << "\n";);
292   }
293 
294   // Insert SU into current group by increasing number of slots used
295   // in current group.
296   CurrGroupSize += getNumDecoderSlots(SU);
297   assert (CurrGroupSize <= 3);
298 
299   // Check if current group is now full/ended. If so, move on to next
300   // group to be ready to evaluate more candidates.
301   if (CurrGroupSize == 3 || SC->EndGroup)
302     nextGroup();
303 }
304 
305 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
306   const MCSchedClassDesc *SC = getSchedClass(SU);
307   if (!SC->isValid())
308     return 0;
309 
310   // If SU begins new group, it can either break a current group early
311   // or fit naturally if current group is empty (negative cost).
312   if (SC->BeginGroup) {
313     if (CurrGroupSize)
314       return 3 - CurrGroupSize;
315     return -1;
316   }
317 
318   // Similarly, a group-ending SU may either fit well (last in group), or
319   // end the group prematurely.
320   if (SC->EndGroup) {
321     unsigned resultingGroupSize =
322       (CurrGroupSize + getNumDecoderSlots(SU));
323     if (resultingGroupSize < 3)
324       return (3 - resultingGroupSize);
325     return -1;
326   }
327 
328   // Most instructions can be placed in any decoder slot.
329   return 0;
330 }
331 
332 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
333   assert (SU->isUnbuffered);
334   // If this is the first FPd op, it should be scheduled high.
335   if (LastFPdOpCycleIdx == UINT_MAX)
336     return true;
337   // If this is not the first PFd op, it should go into the other side
338   // of the processor to use the other FPd unit there. This should
339   // generally happen if two FPd ops are placed with 2 other
340   // instructions between them (modulo 6).
341   unsigned SUCycleIdx = getCurrCycleIdx(SU);
342   if (LastFPdOpCycleIdx > SUCycleIdx)
343     return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
344   return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
345 }
346 
347 int SystemZHazardRecognizer::
348 resourcesCost(SUnit *SU) {
349   int Cost = 0;
350 
351   const MCSchedClassDesc *SC = getSchedClass(SU);
352   if (!SC->isValid())
353     return 0;
354 
355   // For a FPd op, either return min or max value as indicated by the
356   // distance to any prior FPd op.
357   if (SU->isUnbuffered)
358     Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
359   // For other instructions, give a cost to the use of the critical resource.
360   else if (CriticalResourceIdx != UINT_MAX) {
361     for (TargetSchedModel::ProcResIter
362            PI = SchedModel->getWriteProcResBegin(SC),
363            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
364       if (PI->ProcResourceIdx == CriticalResourceIdx)
365         Cost = PI->Cycles;
366   }
367 
368   return Cost;
369 }
370 
371 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
372                                               bool TakenBranch) {
373   // Make a temporary SUnit.
374   SUnit SU(MI, 0);
375 
376   // Set interesting flags.
377   SU.isCall = MI->isCall();
378 
379   const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
380   for (const MCWriteProcResEntry &PRE :
381          make_range(SchedModel->getWriteProcResBegin(SC),
382                     SchedModel->getWriteProcResEnd(SC))) {
383     switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
384     case 0:
385       SU.hasReservedResource = true;
386       break;
387     case 1:
388       SU.isUnbuffered = true;
389       break;
390     default:
391       break;
392     }
393   }
394 
395   unsigned GroupSizeBeforeEmit = CurrGroupSize;
396   EmitInstruction(&SU);
397 
398   if (!TakenBranch && isBranchRetTrap(MI)) {
399     // NT Branch on second slot ends group.
400     if (GroupSizeBeforeEmit == 1)
401       nextGroup();
402   }
403 
404   if (TakenBranch && CurrGroupSize > 0)
405     nextGroup();
406 
407   assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
408           "Scheduler: unhandled terminator!");
409 }
410 
411 void SystemZHazardRecognizer::
412 copyState(SystemZHazardRecognizer *Incoming) {
413   // Current decoder group
414   CurrGroupSize = Incoming->CurrGroupSize;
415   LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
416 
417   // Processor resources
418   ProcResourceCounters = Incoming->ProcResourceCounters;
419   CriticalResourceIdx = Incoming->CriticalResourceIdx;
420 
421   // FPd
422   LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
423   GrpCount = Incoming->GrpCount;
424 }
425