1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a hazard recognizer for the SystemZ scheduler.
11 //
12 // This class is used by the SystemZ scheduling strategy to maintain
13 // the state during scheduling, and provide cost functions for
14 // scheduling candidates. This includes:
15 //
16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
17 // instructions that always begin a new group should be scheduled when
18 // the current decoder group is empty.
19 // * Processor resources usage. It is beneficial to balance the use of
20 // resources.
21 //
22 // A goal is to consider all instructions, also those outside of any
23 // scheduling region. Such instructions are "advanced" past and include
24 // single instructions before a scheduling region, branches etc.
25 //
26 // A block that has only one predecessor continues scheduling with the state
27 // of it (which may be updated by emitting branches).
28 //
29 // ===---------------------------------------------------------------------===//
30 
31 #include "SystemZHazardRecognizer.h"
32 #include "llvm/ADT/Statistic.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "machine-scheduler"
37 
38 // This is the limit of processor resource usage at which the
39 // scheduler should try to look for other instructions (not using the
40 // critical resource).
41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
42                                    cl::desc("The OOO window for processor "
43                                             "resources during scheduling."),
44                                    cl::init(8));
45 
46 unsigned SystemZHazardRecognizer::
47 getNumDecoderSlots(SUnit *SU) const {
48   const MCSchedClassDesc *SC = getSchedClass(SU);
49   if (!SC->isValid())
50     return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
51 
52   assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
53          "Only cracked instruction can have 2 uops.");
54   assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
55          "Expanded instructions always group alone.");
56   assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
57          "Expanded instructions fill the group(s).");
58 
59   return SC->NumMicroOps;
60 }
61 
62 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
63   unsigned Idx = CurrGroupSize;
64   if (GrpCount % 2)
65     Idx += 3;
66 
67   if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
68     if (Idx == 1 || Idx == 2)
69       Idx = 3;
70     else if (Idx == 4 || Idx == 5)
71       Idx = 0;
72   }
73 
74   return Idx;
75 }
76 
77 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
78 getHazardType(SUnit *m, int Stalls) {
79   return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
80 }
81 
82 void SystemZHazardRecognizer::Reset() {
83   CurrGroupSize = 0;
84   CurrGroupHas4RegOps = false;
85   clearProcResCounters();
86   GrpCount = 0;
87   LastFPdOpCycleIdx = UINT_MAX;
88   LastEmittedMI = nullptr;
89   LLVM_DEBUG(CurGroupDbg = "";);
90 }
91 
92 bool
93 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
94   const MCSchedClassDesc *SC = getSchedClass(SU);
95   if (!SC->isValid())
96     return true;
97 
98   // A cracked instruction only fits into schedule if the current
99   // group is empty.
100   if (SC->BeginGroup)
101     return (CurrGroupSize == 0);
102 
103   // An instruction with 4 register operands will not fit in last slot.
104   assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
105           "Current decoder group is already full!");
106   if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
107     return false;
108 
109   // Since a full group is handled immediately in EmitInstruction(),
110   // SU should fit into current group. NumSlots should be 1 or 0,
111   // since it is not a cracked or expanded instruction.
112   assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
113           "Expected normal instruction to fit in non-full group!");
114 
115   return true;
116 }
117 
118 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
119   const MachineFunction &MF = *MI->getParent()->getParent();
120   const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
121   const MCInstrDesc &MID = MI->getDesc();
122   unsigned Count = 0;
123   for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
124     const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
125     if (RC == nullptr)
126       continue;
127     if (OpIdx >= MID.getNumDefs() &&
128         MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
129       continue;
130     Count++;
131   }
132   return Count >= 4;
133 }
134 
135 void SystemZHazardRecognizer::nextGroup() {
136   if (CurrGroupSize == 0)
137     return;
138 
139   LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
140   LLVM_DEBUG(CurGroupDbg = "";);
141 
142   int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
143   assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
144          "Current decoder group bad.");
145 
146   // Reset counter for next group.
147   CurrGroupSize = 0;
148   CurrGroupHas4RegOps = false;
149 
150   GrpCount += ((unsigned) NumGroups);
151 
152   // Decrease counters for execution units by one.
153   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
154     if (ProcResourceCounters[i] > 0)
155       ProcResourceCounters[i] =
156         ((ProcResourceCounters[i] > NumGroups) ?
157          (ProcResourceCounters[i] - NumGroups) : 0);
158 
159   // Clear CriticalResourceIdx if it is now below the threshold.
160   if (CriticalResourceIdx != UINT_MAX &&
161       (ProcResourceCounters[CriticalResourceIdx] <=
162        ProcResCostLim))
163     CriticalResourceIdx = UINT_MAX;
164 
165   LLVM_DEBUG(dumpState(););
166 }
167 
168 #ifndef NDEBUG // Debug output
169 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
170   OS << "SU(" << SU->NodeNum << "):";
171   OS << TII->getName(SU->getInstr()->getOpcode());
172 
173   const MCSchedClassDesc *SC = getSchedClass(SU);
174   if (!SC->isValid())
175     return;
176 
177   for (TargetSchedModel::ProcResIter
178          PI = SchedModel->getWriteProcResBegin(SC),
179          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
180     const MCProcResourceDesc &PRD =
181       *SchedModel->getProcResource(PI->ProcResourceIdx);
182     std::string FU(PRD.Name);
183     // trim e.g. Z13_FXaUnit -> FXa
184     FU = FU.substr(FU.find("_") + 1);
185     size_t Pos = FU.find("Unit");
186     if (Pos != std::string::npos)
187       FU.resize(Pos);
188     if (FU == "LS") // LSUnit -> LSU
189       FU = "LSU";
190     OS << "/" << FU;
191 
192     if (PI->Cycles > 1)
193       OS << "(" << PI->Cycles << "cyc)";
194   }
195 
196   if (SC->NumMicroOps > 1)
197     OS << "/" << SC->NumMicroOps << "uops";
198   if (SC->BeginGroup && SC->EndGroup)
199     OS << "/GroupsAlone";
200   else if (SC->BeginGroup)
201     OS << "/BeginsGroup";
202   else if (SC->EndGroup)
203     OS << "/EndsGroup";
204   if (SU->isUnbuffered)
205     OS << "/Unbuffered";
206   if (has4RegOps(SU->getInstr()))
207     OS << "/4RegOps";
208 }
209 
210 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
211   dbgs() << "++ " << Msg;
212   dbgs() << ": ";
213 
214   if (CurGroupDbg.empty())
215     dbgs() << " <empty>\n";
216   else {
217     dbgs() << "{ " << CurGroupDbg << " }";
218     dbgs() << " (" << CurrGroupSize << " decoder slot"
219            << (CurrGroupSize > 1 ? "s":"")
220            << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
221            << ")\n";
222   }
223 }
224 
225 void SystemZHazardRecognizer::dumpProcResourceCounters() const {
226   bool any = false;
227 
228   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
229     if (ProcResourceCounters[i] > 0) {
230       any = true;
231       break;
232     }
233 
234   if (!any)
235     return;
236 
237   dbgs() << "++ | Resource counters: ";
238   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
239     if (ProcResourceCounters[i] > 0)
240       dbgs() << SchedModel->getProcResource(i)->Name
241              << ":" << ProcResourceCounters[i] << " ";
242   dbgs() << "\n";
243 
244   if (CriticalResourceIdx != UINT_MAX)
245     dbgs() << "++ | Critical resource: "
246            << SchedModel->getProcResource(CriticalResourceIdx)->Name
247            << "\n";
248 }
249 
250 void SystemZHazardRecognizer::dumpState() const {
251   dumpCurrGroup("| Current decoder group");
252   dbgs() << "++ | Current cycle index: "
253          << getCurrCycleIdx() << "\n";
254   dumpProcResourceCounters();
255   if (LastFPdOpCycleIdx != UINT_MAX)
256     dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
257 }
258 
259 #endif //NDEBUG
260 
261 void SystemZHazardRecognizer::clearProcResCounters() {
262   ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
263   CriticalResourceIdx = UINT_MAX;
264 }
265 
266 static inline bool isBranchRetTrap(MachineInstr *MI) {
267   return (MI->isBranch() || MI->isReturn() ||
268           MI->getOpcode() == SystemZ::CondTrap);
269 }
270 
271 // Update state with SU as the next scheduled unit.
272 void SystemZHazardRecognizer::
273 EmitInstruction(SUnit *SU) {
274   const MCSchedClassDesc *SC = getSchedClass(SU);
275   LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
276              dbgs() << "\n";);
277   LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
278 
279   // If scheduling an SU that must begin a new decoder group, move on
280   // to next group.
281   if (!fitsIntoCurrentGroup(SU))
282     nextGroup();
283 
284   LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg);
285              if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
286 
287   LastEmittedMI = SU->getInstr();
288 
289   // After returning from a call, we don't know much about the state.
290   if (SU->isCall) {
291     LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
292     Reset();
293     LastEmittedMI = SU->getInstr();
294     return;
295   }
296 
297   // Increase counter for execution unit(s).
298   for (TargetSchedModel::ProcResIter
299          PI = SchedModel->getWriteProcResBegin(SC),
300          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
301     // Don't handle FPd together with the other resources.
302     if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
303       continue;
304     int &CurrCounter =
305       ProcResourceCounters[PI->ProcResourceIdx];
306     CurrCounter += PI->Cycles;
307     // Check if this is now the new critical resource.
308     if ((CurrCounter > ProcResCostLim) &&
309         (CriticalResourceIdx == UINT_MAX ||
310          (PI->ProcResourceIdx != CriticalResourceIdx &&
311           CurrCounter >
312           ProcResourceCounters[CriticalResourceIdx]))) {
313       LLVM_DEBUG(
314           dbgs() << "++ New critical resource: "
315                  << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
316                  << "\n";);
317       CriticalResourceIdx = PI->ProcResourceIdx;
318     }
319   }
320 
321   // Make note of an instruction that uses a blocking resource (FPd).
322   if (SU->isUnbuffered) {
323     LastFPdOpCycleIdx = getCurrCycleIdx(SU);
324     LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
325                       << "\n";);
326   }
327 
328   // Insert SU into current group by increasing number of slots used
329   // in current group.
330   CurrGroupSize += getNumDecoderSlots(SU);
331   CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
332   unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
333   assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
334          && "SU does not fit into decoder group!");
335 
336   // Check if current group is now full/ended. If so, move on to next
337   // group to be ready to evaluate more candidates.
338   if (CurrGroupSize >= GroupLim || SC->EndGroup)
339     nextGroup();
340 }
341 
342 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
343   const MCSchedClassDesc *SC = getSchedClass(SU);
344   if (!SC->isValid())
345     return 0;
346 
347   // If SU begins new group, it can either break a current group early
348   // or fit naturally if current group is empty (negative cost).
349   if (SC->BeginGroup) {
350     if (CurrGroupSize)
351       return 3 - CurrGroupSize;
352     return -1;
353   }
354 
355   // Similarly, a group-ending SU may either fit well (last in group), or
356   // end the group prematurely.
357   if (SC->EndGroup) {
358     unsigned resultingGroupSize =
359       (CurrGroupSize + getNumDecoderSlots(SU));
360     if (resultingGroupSize < 3)
361       return (3 - resultingGroupSize);
362     return -1;
363   }
364 
365   // An instruction with 4 register operands will not fit in last slot.
366   if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
367     return 1;
368 
369   // Most instructions can be placed in any decoder slot.
370   return 0;
371 }
372 
373 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
374   assert (SU->isUnbuffered);
375   // If this is the first FPd op, it should be scheduled high.
376   if (LastFPdOpCycleIdx == UINT_MAX)
377     return true;
378   // If this is not the first PFd op, it should go into the other side
379   // of the processor to use the other FPd unit there. This should
380   // generally happen if two FPd ops are placed with 2 other
381   // instructions between them (modulo 6).
382   unsigned SUCycleIdx = getCurrCycleIdx(SU);
383   if (LastFPdOpCycleIdx > SUCycleIdx)
384     return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
385   return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
386 }
387 
388 int SystemZHazardRecognizer::
389 resourcesCost(SUnit *SU) {
390   int Cost = 0;
391 
392   const MCSchedClassDesc *SC = getSchedClass(SU);
393   if (!SC->isValid())
394     return 0;
395 
396   // For a FPd op, either return min or max value as indicated by the
397   // distance to any prior FPd op.
398   if (SU->isUnbuffered)
399     Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
400   // For other instructions, give a cost to the use of the critical resource.
401   else if (CriticalResourceIdx != UINT_MAX) {
402     for (TargetSchedModel::ProcResIter
403            PI = SchedModel->getWriteProcResBegin(SC),
404            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
405       if (PI->ProcResourceIdx == CriticalResourceIdx)
406         Cost = PI->Cycles;
407   }
408 
409   return Cost;
410 }
411 
412 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
413                                               bool TakenBranch) {
414   // Make a temporary SUnit.
415   SUnit SU(MI, 0);
416 
417   // Set interesting flags.
418   SU.isCall = MI->isCall();
419 
420   const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
421   for (const MCWriteProcResEntry &PRE :
422          make_range(SchedModel->getWriteProcResBegin(SC),
423                     SchedModel->getWriteProcResEnd(SC))) {
424     switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
425     case 0:
426       SU.hasReservedResource = true;
427       break;
428     case 1:
429       SU.isUnbuffered = true;
430       break;
431     default:
432       break;
433     }
434   }
435 
436   unsigned GroupSizeBeforeEmit = CurrGroupSize;
437   EmitInstruction(&SU);
438 
439   if (!TakenBranch && isBranchRetTrap(MI)) {
440     // NT Branch on second slot ends group.
441     if (GroupSizeBeforeEmit == 1)
442       nextGroup();
443   }
444 
445   if (TakenBranch && CurrGroupSize > 0)
446     nextGroup();
447 
448   assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
449           "Scheduler: unhandled terminator!");
450 }
451 
452 void SystemZHazardRecognizer::
453 copyState(SystemZHazardRecognizer *Incoming) {
454   // Current decoder group
455   CurrGroupSize = Incoming->CurrGroupSize;
456   LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
457 
458   // Processor resources
459   ProcResourceCounters = Incoming->ProcResourceCounters;
460   CriticalResourceIdx = Incoming->CriticalResourceIdx;
461 
462   // FPd
463   LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
464   GrpCount = Incoming->GrpCount;
465 }
466