1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a hazard recognizer for the SystemZ scheduler.
11 //
12 // This class is used by the SystemZ scheduling strategy to maintain
13 // the state during scheduling, and provide cost functions for
14 // scheduling candidates. This includes:
15 //
16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
17 // instructions that always begin a new group should be scheduled when
18 // the current decoder group is empty.
19 // * Processor resources usage. It is beneficial to balance the use of
20 // resources.
21 //
22 // A goal is to consider all instructions, also those outside of any
23 // scheduling region. Such instructions are "advanced" past and include
24 // single instructions before a scheduling region, branches etc.
25 //
26 // A block that has only one predecessor continues scheduling with the state
27 // of it (which may be updated by emitting branches).
28 //
29 // ===---------------------------------------------------------------------===//
30
31 #include "SystemZHazardRecognizer.h"
32 #include "llvm/ADT/Statistic.h"
33
34 using namespace llvm;
35
36 #define DEBUG_TYPE "machine-scheduler"
37
38 // This is the limit of processor resource usage at which the
39 // scheduler should try to look for other instructions (not using the
40 // critical resource).
41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
42 cl::desc("The OOO window for processor "
43 "resources during scheduling."),
44 cl::init(8));
45
46 unsigned SystemZHazardRecognizer::
getNumDecoderSlots(SUnit * SU) const47 getNumDecoderSlots(SUnit *SU) const {
48 const MCSchedClassDesc *SC = getSchedClass(SU);
49 if (!SC->isValid())
50 return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
51
52 assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
53 "Only cracked instruction can have 2 uops.");
54 assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
55 "Expanded instructions always group alone.");
56 assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
57 "Expanded instructions fill the group(s).");
58
59 return SC->NumMicroOps;
60 }
61
getCurrCycleIdx(SUnit * SU) const62 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
63 unsigned Idx = CurrGroupSize;
64 if (GrpCount % 2)
65 Idx += 3;
66
67 if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
68 if (Idx == 1 || Idx == 2)
69 Idx = 3;
70 else if (Idx == 4 || Idx == 5)
71 Idx = 0;
72 }
73
74 return Idx;
75 }
76
77 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
getHazardType(SUnit * m,int Stalls)78 getHazardType(SUnit *m, int Stalls) {
79 return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
80 }
81
Reset()82 void SystemZHazardRecognizer::Reset() {
83 CurrGroupSize = 0;
84 CurrGroupHas4RegOps = false;
85 clearProcResCounters();
86 GrpCount = 0;
87 LastFPdOpCycleIdx = UINT_MAX;
88 LastEmittedMI = nullptr;
89 LLVM_DEBUG(CurGroupDbg = "";);
90 }
91
92 bool
fitsIntoCurrentGroup(SUnit * SU) const93 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
94 const MCSchedClassDesc *SC = getSchedClass(SU);
95 if (!SC->isValid())
96 return true;
97
98 // A cracked instruction only fits into schedule if the current
99 // group is empty.
100 if (SC->BeginGroup)
101 return (CurrGroupSize == 0);
102
103 // An instruction with 4 register operands will not fit in last slot.
104 assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
105 "Current decoder group is already full!");
106 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
107 return false;
108
109 // Since a full group is handled immediately in EmitInstruction(),
110 // SU should fit into current group. NumSlots should be 1 or 0,
111 // since it is not a cracked or expanded instruction.
112 assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
113 "Expected normal instruction to fit in non-full group!");
114
115 return true;
116 }
117
has4RegOps(const MachineInstr * MI) const118 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
119 const MachineFunction &MF = *MI->getParent()->getParent();
120 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
121 const MCInstrDesc &MID = MI->getDesc();
122 unsigned Count = 0;
123 for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
124 const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
125 if (RC == nullptr)
126 continue;
127 if (OpIdx >= MID.getNumDefs() &&
128 MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
129 continue;
130 Count++;
131 }
132 return Count >= 4;
133 }
134
nextGroup()135 void SystemZHazardRecognizer::nextGroup() {
136 if (CurrGroupSize == 0)
137 return;
138
139 LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
140 LLVM_DEBUG(CurGroupDbg = "";);
141
142 int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
143 assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
144 "Current decoder group bad.");
145
146 // Reset counter for next group.
147 CurrGroupSize = 0;
148 CurrGroupHas4RegOps = false;
149
150 GrpCount += ((unsigned) NumGroups);
151
152 // Decrease counters for execution units.
153 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
154 ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
155 ? (ProcResourceCounters[i] - NumGroups)
156 : 0);
157
158 // Clear CriticalResourceIdx if it is now below the threshold.
159 if (CriticalResourceIdx != UINT_MAX &&
160 (ProcResourceCounters[CriticalResourceIdx] <=
161 ProcResCostLim))
162 CriticalResourceIdx = UINT_MAX;
163
164 LLVM_DEBUG(dumpState(););
165 }
166
167 #ifndef NDEBUG // Debug output
dumpSU(SUnit * SU,raw_ostream & OS) const168 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
169 OS << "SU(" << SU->NodeNum << "):";
170 OS << TII->getName(SU->getInstr()->getOpcode());
171
172 const MCSchedClassDesc *SC = getSchedClass(SU);
173 if (!SC->isValid())
174 return;
175
176 for (TargetSchedModel::ProcResIter
177 PI = SchedModel->getWriteProcResBegin(SC),
178 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
179 const MCProcResourceDesc &PRD =
180 *SchedModel->getProcResource(PI->ProcResourceIdx);
181 std::string FU(PRD.Name);
182 // trim e.g. Z13_FXaUnit -> FXa
183 FU = FU.substr(FU.find("_") + 1);
184 size_t Pos = FU.find("Unit");
185 if (Pos != std::string::npos)
186 FU.resize(Pos);
187 if (FU == "LS") // LSUnit -> LSU
188 FU = "LSU";
189 OS << "/" << FU;
190
191 if (PI->Cycles > 1)
192 OS << "(" << PI->Cycles << "cyc)";
193 }
194
195 if (SC->NumMicroOps > 1)
196 OS << "/" << SC->NumMicroOps << "uops";
197 if (SC->BeginGroup && SC->EndGroup)
198 OS << "/GroupsAlone";
199 else if (SC->BeginGroup)
200 OS << "/BeginsGroup";
201 else if (SC->EndGroup)
202 OS << "/EndsGroup";
203 if (SU->isUnbuffered)
204 OS << "/Unbuffered";
205 if (has4RegOps(SU->getInstr()))
206 OS << "/4RegOps";
207 }
208
dumpCurrGroup(std::string Msg) const209 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
210 dbgs() << "++ " << Msg;
211 dbgs() << ": ";
212
213 if (CurGroupDbg.empty())
214 dbgs() << " <empty>\n";
215 else {
216 dbgs() << "{ " << CurGroupDbg << " }";
217 dbgs() << " (" << CurrGroupSize << " decoder slot"
218 << (CurrGroupSize > 1 ? "s":"")
219 << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
220 << ")\n";
221 }
222 }
223
dumpProcResourceCounters() const224 void SystemZHazardRecognizer::dumpProcResourceCounters() const {
225 bool any = false;
226
227 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
228 if (ProcResourceCounters[i] > 0) {
229 any = true;
230 break;
231 }
232
233 if (!any)
234 return;
235
236 dbgs() << "++ | Resource counters: ";
237 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
238 if (ProcResourceCounters[i] > 0)
239 dbgs() << SchedModel->getProcResource(i)->Name
240 << ":" << ProcResourceCounters[i] << " ";
241 dbgs() << "\n";
242
243 if (CriticalResourceIdx != UINT_MAX)
244 dbgs() << "++ | Critical resource: "
245 << SchedModel->getProcResource(CriticalResourceIdx)->Name
246 << "\n";
247 }
248
dumpState() const249 void SystemZHazardRecognizer::dumpState() const {
250 dumpCurrGroup("| Current decoder group");
251 dbgs() << "++ | Current cycle index: "
252 << getCurrCycleIdx() << "\n";
253 dumpProcResourceCounters();
254 if (LastFPdOpCycleIdx != UINT_MAX)
255 dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
256 }
257
258 #endif //NDEBUG
259
clearProcResCounters()260 void SystemZHazardRecognizer::clearProcResCounters() {
261 ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
262 CriticalResourceIdx = UINT_MAX;
263 }
264
isBranchRetTrap(MachineInstr * MI)265 static inline bool isBranchRetTrap(MachineInstr *MI) {
266 return (MI->isBranch() || MI->isReturn() ||
267 MI->getOpcode() == SystemZ::CondTrap);
268 }
269
270 // Update state with SU as the next scheduled unit.
271 void SystemZHazardRecognizer::
EmitInstruction(SUnit * SU)272 EmitInstruction(SUnit *SU) {
273 const MCSchedClassDesc *SC = getSchedClass(SU);
274 LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
275 dbgs() << "\n";);
276 LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
277
278 // If scheduling an SU that must begin a new decoder group, move on
279 // to next group.
280 if (!fitsIntoCurrentGroup(SU))
281 nextGroup();
282
283 LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg);
284 if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
285
286 LastEmittedMI = SU->getInstr();
287
288 // After returning from a call, we don't know much about the state.
289 if (SU->isCall) {
290 LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
291 Reset();
292 LastEmittedMI = SU->getInstr();
293 return;
294 }
295
296 // Increase counter for execution unit(s).
297 for (TargetSchedModel::ProcResIter
298 PI = SchedModel->getWriteProcResBegin(SC),
299 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
300 // Don't handle FPd together with the other resources.
301 if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
302 continue;
303 int &CurrCounter =
304 ProcResourceCounters[PI->ProcResourceIdx];
305 CurrCounter += PI->Cycles;
306 // Check if this is now the new critical resource.
307 if ((CurrCounter > ProcResCostLim) &&
308 (CriticalResourceIdx == UINT_MAX ||
309 (PI->ProcResourceIdx != CriticalResourceIdx &&
310 CurrCounter >
311 ProcResourceCounters[CriticalResourceIdx]))) {
312 LLVM_DEBUG(
313 dbgs() << "++ New critical resource: "
314 << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
315 << "\n";);
316 CriticalResourceIdx = PI->ProcResourceIdx;
317 }
318 }
319
320 // Make note of an instruction that uses a blocking resource (FPd).
321 if (SU->isUnbuffered) {
322 LastFPdOpCycleIdx = getCurrCycleIdx(SU);
323 LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
324 << "\n";);
325 }
326
327 // Insert SU into current group by increasing number of slots used
328 // in current group.
329 CurrGroupSize += getNumDecoderSlots(SU);
330 CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
331 unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
332 assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
333 && "SU does not fit into decoder group!");
334
335 // Check if current group is now full/ended. If so, move on to next
336 // group to be ready to evaluate more candidates.
337 if (CurrGroupSize >= GroupLim || SC->EndGroup)
338 nextGroup();
339 }
340
groupingCost(SUnit * SU) const341 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
342 const MCSchedClassDesc *SC = getSchedClass(SU);
343 if (!SC->isValid())
344 return 0;
345
346 // If SU begins new group, it can either break a current group early
347 // or fit naturally if current group is empty (negative cost).
348 if (SC->BeginGroup) {
349 if (CurrGroupSize)
350 return 3 - CurrGroupSize;
351 return -1;
352 }
353
354 // Similarly, a group-ending SU may either fit well (last in group), or
355 // end the group prematurely.
356 if (SC->EndGroup) {
357 unsigned resultingGroupSize =
358 (CurrGroupSize + getNumDecoderSlots(SU));
359 if (resultingGroupSize < 3)
360 return (3 - resultingGroupSize);
361 return -1;
362 }
363
364 // An instruction with 4 register operands will not fit in last slot.
365 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
366 return 1;
367
368 // Most instructions can be placed in any decoder slot.
369 return 0;
370 }
371
isFPdOpPreferred_distance(SUnit * SU) const372 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
373 assert (SU->isUnbuffered);
374 // If this is the first FPd op, it should be scheduled high.
375 if (LastFPdOpCycleIdx == UINT_MAX)
376 return true;
377 // If this is not the first PFd op, it should go into the other side
378 // of the processor to use the other FPd unit there. This should
379 // generally happen if two FPd ops are placed with 2 other
380 // instructions between them (modulo 6).
381 unsigned SUCycleIdx = getCurrCycleIdx(SU);
382 if (LastFPdOpCycleIdx > SUCycleIdx)
383 return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
384 return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
385 }
386
387 int SystemZHazardRecognizer::
resourcesCost(SUnit * SU)388 resourcesCost(SUnit *SU) {
389 int Cost = 0;
390
391 const MCSchedClassDesc *SC = getSchedClass(SU);
392 if (!SC->isValid())
393 return 0;
394
395 // For a FPd op, either return min or max value as indicated by the
396 // distance to any prior FPd op.
397 if (SU->isUnbuffered)
398 Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
399 // For other instructions, give a cost to the use of the critical resource.
400 else if (CriticalResourceIdx != UINT_MAX) {
401 for (TargetSchedModel::ProcResIter
402 PI = SchedModel->getWriteProcResBegin(SC),
403 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
404 if (PI->ProcResourceIdx == CriticalResourceIdx)
405 Cost = PI->Cycles;
406 }
407
408 return Cost;
409 }
410
emitInstruction(MachineInstr * MI,bool TakenBranch)411 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
412 bool TakenBranch) {
413 // Make a temporary SUnit.
414 SUnit SU(MI, 0);
415
416 // Set interesting flags.
417 SU.isCall = MI->isCall();
418
419 const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
420 for (const MCWriteProcResEntry &PRE :
421 make_range(SchedModel->getWriteProcResBegin(SC),
422 SchedModel->getWriteProcResEnd(SC))) {
423 switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
424 case 0:
425 SU.hasReservedResource = true;
426 break;
427 case 1:
428 SU.isUnbuffered = true;
429 break;
430 default:
431 break;
432 }
433 }
434
435 unsigned GroupSizeBeforeEmit = CurrGroupSize;
436 EmitInstruction(&SU);
437
438 if (!TakenBranch && isBranchRetTrap(MI)) {
439 // NT Branch on second slot ends group.
440 if (GroupSizeBeforeEmit == 1)
441 nextGroup();
442 }
443
444 if (TakenBranch && CurrGroupSize > 0)
445 nextGroup();
446
447 assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
448 "Scheduler: unhandled terminator!");
449 }
450
451 void SystemZHazardRecognizer::
copyState(SystemZHazardRecognizer * Incoming)452 copyState(SystemZHazardRecognizer *Incoming) {
453 // Current decoder group
454 CurrGroupSize = Incoming->CurrGroupSize;
455 LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
456
457 // Processor resources
458 ProcResourceCounters = Incoming->ProcResourceCounters;
459 CriticalResourceIdx = Incoming->CriticalResourceIdx;
460
461 // FPd
462 LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
463 GrpCount = Incoming->GrpCount;
464 }
465