1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a hazard recognizer for the SystemZ scheduler.
11 //
12 // This class is used by the SystemZ scheduling strategy to maintain
13 // the state during scheduling, and provide cost functions for
14 // scheduling candidates. This includes:
15 //
16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
17 // instructions that always begin a new group should be scheduled when
18 // the current decoder group is empty.
19 // * Processor resources usage. It is beneficial to balance the use of
20 // resources.
21 //
22 // A goal is to consider all instructions, also those outside of any
23 // scheduling region. Such instructions are "advanced" past and include
24 // single instructions before a scheduling region, branches etc.
25 //
26 // A block that has only one predecessor continues scheduling with the state
27 // of it (which may be updated by emitting branches).
28 //
29 // ===---------------------------------------------------------------------===//
30 
31 #include "SystemZHazardRecognizer.h"
32 #include "llvm/ADT/Statistic.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "machine-scheduler"
37 
38 // This is the limit of processor resource usage at which the
39 // scheduler should try to look for other instructions (not using the
40 // critical resource).
41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
42                                    cl::desc("The OOO window for processor "
43                                             "resources during scheduling."),
44                                    cl::init(8));
45 
46 unsigned SystemZHazardRecognizer::
47 getNumDecoderSlots(SUnit *SU) const {
48   const MCSchedClassDesc *SC = getSchedClass(SU);
49   if (!SC->isValid())
50     return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
51 
52   assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
53          "Only cracked instruction can have 2 uops.");
54   assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
55          "Expanded instructions always group alone.");
56   assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
57          "Expanded instructions fill the group(s).");
58 
59   return SC->NumMicroOps;
60 }
61 
62 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
63   unsigned Idx = CurrGroupSize;
64   if (GrpCount % 2)
65     Idx += 3;
66 
67   if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
68     if (Idx == 1 || Idx == 2)
69       Idx = 3;
70     else if (Idx == 4 || Idx == 5)
71       Idx = 0;
72   }
73 
74   return Idx;
75 }
76 
77 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
78 getHazardType(SUnit *m, int Stalls) {
79   return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
80 }
81 
82 void SystemZHazardRecognizer::Reset() {
83   CurrGroupSize = 0;
84   CurrGroupHas4RegOps = false;
85   clearProcResCounters();
86   GrpCount = 0;
87   LastFPdOpCycleIdx = UINT_MAX;
88   LastEmittedMI = nullptr;
89   LLVM_DEBUG(CurGroupDbg = "";);
90 }
91 
92 bool
93 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
94   const MCSchedClassDesc *SC = getSchedClass(SU);
95   if (!SC->isValid())
96     return true;
97 
98   // A cracked instruction only fits into schedule if the current
99   // group is empty.
100   if (SC->BeginGroup)
101     return (CurrGroupSize == 0);
102 
103   // An instruction with 4 register operands will not fit in last slot.
104   assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
105           "Current decoder group is already full!");
106   if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
107     return false;
108 
109   // Since a full group is handled immediately in EmitInstruction(),
110   // SU should fit into current group. NumSlots should be 1 or 0,
111   // since it is not a cracked or expanded instruction.
112   assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
113           "Expected normal instruction to fit in non-full group!");
114 
115   return true;
116 }
117 
118 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
119   const MachineFunction &MF = *MI->getParent()->getParent();
120   const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
121   const MCInstrDesc &MID = MI->getDesc();
122   unsigned Count = 0;
123   for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
124     const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
125     if (RC == nullptr)
126       continue;
127     if (OpIdx >= MID.getNumDefs() &&
128         MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
129       continue;
130     Count++;
131   }
132   return Count >= 4;
133 }
134 
135 void SystemZHazardRecognizer::nextGroup() {
136   if (CurrGroupSize == 0)
137     return;
138 
139   LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
140   LLVM_DEBUG(CurGroupDbg = "";);
141 
142   int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
143   assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
144          "Current decoder group bad.");
145 
146   // Reset counter for next group.
147   CurrGroupSize = 0;
148   CurrGroupHas4RegOps = false;
149 
150   GrpCount += ((unsigned) NumGroups);
151 
152   // Decrease counters for execution units.
153   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
154     ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
155                                    ? (ProcResourceCounters[i] - NumGroups)
156                                    : 0);
157 
158   // Clear CriticalResourceIdx if it is now below the threshold.
159   if (CriticalResourceIdx != UINT_MAX &&
160       (ProcResourceCounters[CriticalResourceIdx] <=
161        ProcResCostLim))
162     CriticalResourceIdx = UINT_MAX;
163 
164   LLVM_DEBUG(dumpState(););
165 }
166 
167 #ifndef NDEBUG // Debug output
168 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
169   OS << "SU(" << SU->NodeNum << "):";
170   OS << TII->getName(SU->getInstr()->getOpcode());
171 
172   const MCSchedClassDesc *SC = getSchedClass(SU);
173   if (!SC->isValid())
174     return;
175 
176   for (TargetSchedModel::ProcResIter
177          PI = SchedModel->getWriteProcResBegin(SC),
178          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
179     const MCProcResourceDesc &PRD =
180       *SchedModel->getProcResource(PI->ProcResourceIdx);
181     std::string FU(PRD.Name);
182     // trim e.g. Z13_FXaUnit -> FXa
183     FU = FU.substr(FU.find("_") + 1);
184     size_t Pos = FU.find("Unit");
185     if (Pos != std::string::npos)
186       FU.resize(Pos);
187     if (FU == "LS") // LSUnit -> LSU
188       FU = "LSU";
189     OS << "/" << FU;
190 
191     if (PI->Cycles > 1)
192       OS << "(" << PI->Cycles << "cyc)";
193   }
194 
195   if (SC->NumMicroOps > 1)
196     OS << "/" << SC->NumMicroOps << "uops";
197   if (SC->BeginGroup && SC->EndGroup)
198     OS << "/GroupsAlone";
199   else if (SC->BeginGroup)
200     OS << "/BeginsGroup";
201   else if (SC->EndGroup)
202     OS << "/EndsGroup";
203   if (SU->isUnbuffered)
204     OS << "/Unbuffered";
205   if (has4RegOps(SU->getInstr()))
206     OS << "/4RegOps";
207 }
208 
209 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
210   dbgs() << "++ " << Msg;
211   dbgs() << ": ";
212 
213   if (CurGroupDbg.empty())
214     dbgs() << " <empty>\n";
215   else {
216     dbgs() << "{ " << CurGroupDbg << " }";
217     dbgs() << " (" << CurrGroupSize << " decoder slot"
218            << (CurrGroupSize > 1 ? "s":"")
219            << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
220            << ")\n";
221   }
222 }
223 
224 void SystemZHazardRecognizer::dumpProcResourceCounters() const {
225   bool any = false;
226 
227   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
228     if (ProcResourceCounters[i] > 0) {
229       any = true;
230       break;
231     }
232 
233   if (!any)
234     return;
235 
236   dbgs() << "++ | Resource counters: ";
237   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
238     if (ProcResourceCounters[i] > 0)
239       dbgs() << SchedModel->getProcResource(i)->Name
240              << ":" << ProcResourceCounters[i] << " ";
241   dbgs() << "\n";
242 
243   if (CriticalResourceIdx != UINT_MAX)
244     dbgs() << "++ | Critical resource: "
245            << SchedModel->getProcResource(CriticalResourceIdx)->Name
246            << "\n";
247 }
248 
249 void SystemZHazardRecognizer::dumpState() const {
250   dumpCurrGroup("| Current decoder group");
251   dbgs() << "++ | Current cycle index: "
252          << getCurrCycleIdx() << "\n";
253   dumpProcResourceCounters();
254   if (LastFPdOpCycleIdx != UINT_MAX)
255     dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
256 }
257 
258 #endif //NDEBUG
259 
260 void SystemZHazardRecognizer::clearProcResCounters() {
261   ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
262   CriticalResourceIdx = UINT_MAX;
263 }
264 
265 static inline bool isBranchRetTrap(MachineInstr *MI) {
266   return (MI->isBranch() || MI->isReturn() ||
267           MI->getOpcode() == SystemZ::CondTrap);
268 }
269 
270 // Update state with SU as the next scheduled unit.
271 void SystemZHazardRecognizer::
272 EmitInstruction(SUnit *SU) {
273   const MCSchedClassDesc *SC = getSchedClass(SU);
274   LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
275              dbgs() << "\n";);
276   LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
277 
278   // If scheduling an SU that must begin a new decoder group, move on
279   // to next group.
280   if (!fitsIntoCurrentGroup(SU))
281     nextGroup();
282 
283   LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg);
284              if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
285 
286   LastEmittedMI = SU->getInstr();
287 
288   // After returning from a call, we don't know much about the state.
289   if (SU->isCall) {
290     LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
291     Reset();
292     LastEmittedMI = SU->getInstr();
293     return;
294   }
295 
296   // Increase counter for execution unit(s).
297   for (TargetSchedModel::ProcResIter
298          PI = SchedModel->getWriteProcResBegin(SC),
299          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
300     // Don't handle FPd together with the other resources.
301     if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
302       continue;
303     int &CurrCounter =
304       ProcResourceCounters[PI->ProcResourceIdx];
305     CurrCounter += PI->Cycles;
306     // Check if this is now the new critical resource.
307     if ((CurrCounter > ProcResCostLim) &&
308         (CriticalResourceIdx == UINT_MAX ||
309          (PI->ProcResourceIdx != CriticalResourceIdx &&
310           CurrCounter >
311           ProcResourceCounters[CriticalResourceIdx]))) {
312       LLVM_DEBUG(
313           dbgs() << "++ New critical resource: "
314                  << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
315                  << "\n";);
316       CriticalResourceIdx = PI->ProcResourceIdx;
317     }
318   }
319 
320   // Make note of an instruction that uses a blocking resource (FPd).
321   if (SU->isUnbuffered) {
322     LastFPdOpCycleIdx = getCurrCycleIdx(SU);
323     LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
324                       << "\n";);
325   }
326 
327   // Insert SU into current group by increasing number of slots used
328   // in current group.
329   CurrGroupSize += getNumDecoderSlots(SU);
330   CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
331   unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
332   assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
333          && "SU does not fit into decoder group!");
334 
335   // Check if current group is now full/ended. If so, move on to next
336   // group to be ready to evaluate more candidates.
337   if (CurrGroupSize >= GroupLim || SC->EndGroup)
338     nextGroup();
339 }
340 
341 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
342   const MCSchedClassDesc *SC = getSchedClass(SU);
343   if (!SC->isValid())
344     return 0;
345 
346   // If SU begins new group, it can either break a current group early
347   // or fit naturally if current group is empty (negative cost).
348   if (SC->BeginGroup) {
349     if (CurrGroupSize)
350       return 3 - CurrGroupSize;
351     return -1;
352   }
353 
354   // Similarly, a group-ending SU may either fit well (last in group), or
355   // end the group prematurely.
356   if (SC->EndGroup) {
357     unsigned resultingGroupSize =
358       (CurrGroupSize + getNumDecoderSlots(SU));
359     if (resultingGroupSize < 3)
360       return (3 - resultingGroupSize);
361     return -1;
362   }
363 
364   // An instruction with 4 register operands will not fit in last slot.
365   if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
366     return 1;
367 
368   // Most instructions can be placed in any decoder slot.
369   return 0;
370 }
371 
372 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
373   assert (SU->isUnbuffered);
374   // If this is the first FPd op, it should be scheduled high.
375   if (LastFPdOpCycleIdx == UINT_MAX)
376     return true;
377   // If this is not the first PFd op, it should go into the other side
378   // of the processor to use the other FPd unit there. This should
379   // generally happen if two FPd ops are placed with 2 other
380   // instructions between them (modulo 6).
381   unsigned SUCycleIdx = getCurrCycleIdx(SU);
382   if (LastFPdOpCycleIdx > SUCycleIdx)
383     return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
384   return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
385 }
386 
387 int SystemZHazardRecognizer::
388 resourcesCost(SUnit *SU) {
389   int Cost = 0;
390 
391   const MCSchedClassDesc *SC = getSchedClass(SU);
392   if (!SC->isValid())
393     return 0;
394 
395   // For a FPd op, either return min or max value as indicated by the
396   // distance to any prior FPd op.
397   if (SU->isUnbuffered)
398     Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
399   // For other instructions, give a cost to the use of the critical resource.
400   else if (CriticalResourceIdx != UINT_MAX) {
401     for (TargetSchedModel::ProcResIter
402            PI = SchedModel->getWriteProcResBegin(SC),
403            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
404       if (PI->ProcResourceIdx == CriticalResourceIdx)
405         Cost = PI->Cycles;
406   }
407 
408   return Cost;
409 }
410 
411 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
412                                               bool TakenBranch) {
413   // Make a temporary SUnit.
414   SUnit SU(MI, 0);
415 
416   // Set interesting flags.
417   SU.isCall = MI->isCall();
418 
419   const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
420   for (const MCWriteProcResEntry &PRE :
421          make_range(SchedModel->getWriteProcResBegin(SC),
422                     SchedModel->getWriteProcResEnd(SC))) {
423     switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
424     case 0:
425       SU.hasReservedResource = true;
426       break;
427     case 1:
428       SU.isUnbuffered = true;
429       break;
430     default:
431       break;
432     }
433   }
434 
435   unsigned GroupSizeBeforeEmit = CurrGroupSize;
436   EmitInstruction(&SU);
437 
438   if (!TakenBranch && isBranchRetTrap(MI)) {
439     // NT Branch on second slot ends group.
440     if (GroupSizeBeforeEmit == 1)
441       nextGroup();
442   }
443 
444   if (TakenBranch && CurrGroupSize > 0)
445     nextGroup();
446 
447   assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
448           "Scheduler: unhandled terminator!");
449 }
450 
451 void SystemZHazardRecognizer::
452 copyState(SystemZHazardRecognizer *Incoming) {
453   // Current decoder group
454   CurrGroupSize = Incoming->CurrGroupSize;
455   LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
456 
457   // Processor resources
458   ProcResourceCounters = Incoming->ProcResourceCounters;
459   CriticalResourceIdx = Incoming->CriticalResourceIdx;
460 
461   // FPd
462   LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
463   GrpCount = Incoming->GrpCount;
464 }
465