1 //===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a wrapper around MCSchedModel that allows the interface
11 // to benefit from information currently only available in TargetInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/MachineFunction.h"
16 #include "llvm/CodeGen/MachineInstr.h"
17 #include "llvm/CodeGen/MachineOperand.h"
18 #include "llvm/CodeGen/TargetSchedule.h"
19 #include "llvm/MC/MCInstrDesc.h"
20 #include "llvm/MC/MCInstrItineraries.h"
21 #include "llvm/MC/MCSchedule.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/Target/TargetInstrInfo.h"
26 #include "llvm/Target/TargetRegisterInfo.h"
27 #include "llvm/Target/TargetSubtargetInfo.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstdint>
31 
32 using namespace llvm;
33 
34 static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
35   cl::desc("Use TargetSchedModel for latency lookup"));
36 
37 static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
38   cl::desc("Use InstrItineraryData for latency lookup"));
39 
40 bool TargetSchedModel::hasInstrSchedModel() const {
41   return EnableSchedModel && SchedModel.hasInstrSchedModel();
42 }
43 
44 bool TargetSchedModel::hasInstrItineraries() const {
45   return EnableSchedItins && !InstrItins.isEmpty();
46 }
47 
48 static unsigned gcd(unsigned Dividend, unsigned Divisor) {
49   // Dividend and Divisor will be naturally swapped as needed.
50   while (Divisor) {
51     unsigned Rem = Dividend % Divisor;
52     Dividend = Divisor;
53     Divisor = Rem;
54   };
55   return Dividend;
56 }
57 
58 static unsigned lcm(unsigned A, unsigned B) {
59   unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
60   assert((LCM >= A && LCM >= B) && "LCM overflow");
61   return LCM;
62 }
63 
64 void TargetSchedModel::init(const MCSchedModel &sm,
65                             const TargetSubtargetInfo *sti,
66                             const TargetInstrInfo *tii) {
67   SchedModel = sm;
68   STI = sti;
69   TII = tii;
70   STI->initInstrItins(InstrItins);
71 
72   unsigned NumRes = SchedModel.getNumProcResourceKinds();
73   ResourceFactors.resize(NumRes);
74   ResourceLCM = SchedModel.IssueWidth;
75   for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
76     unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
77     if (NumUnits > 0)
78       ResourceLCM = lcm(ResourceLCM, NumUnits);
79   }
80   MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
81   for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
82     unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
83     ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0;
84   }
85 }
86 
87 unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
88                                           const MCSchedClassDesc *SC) const {
89   if (hasInstrItineraries()) {
90     int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
91     return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, *MI);
92   }
93   if (hasInstrSchedModel()) {
94     if (!SC)
95       SC = resolveSchedClass(MI);
96     if (SC->isValid())
97       return SC->NumMicroOps;
98   }
99   return MI->isTransient() ? 0 : 1;
100 }
101 
102 // The machine model may explicitly specify an invalid latency, which
103 // effectively means infinite latency. Since users of the TargetSchedule API
104 // don't know how to handle this, we convert it to a very large latency that is
105 // easy to distinguish when debugging the DAG but won't induce overflow.
106 static unsigned capLatency(int Cycles) {
107   return Cycles >= 0 ? Cycles : 1000;
108 }
109 
110 /// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
111 /// evaluation of predicates that depend on instruction operands or flags.
112 const MCSchedClassDesc *TargetSchedModel::
113 resolveSchedClass(const MachineInstr *MI) const {
114   // Get the definition's scheduling class descriptor from this machine model.
115   unsigned SchedClass = MI->getDesc().getSchedClass();
116   const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
117   if (!SCDesc->isValid())
118     return SCDesc;
119 
120 #ifndef NDEBUG
121   unsigned NIter = 0;
122 #endif
123   while (SCDesc->isVariant()) {
124     assert(++NIter < 6 && "Variants are nested deeper than the magic number");
125 
126     SchedClass = STI->resolveSchedClass(SchedClass, MI, this);
127     SCDesc = SchedModel.getSchedClassDesc(SchedClass);
128   }
129   return SCDesc;
130 }
131 
132 /// Find the def index of this operand. This index maps to the machine model and
133 /// is independent of use operands. Def operands may be reordered with uses or
134 /// merged with uses without affecting the def index (e.g. before/after
135 /// regalloc). However, an instruction's def operands must never be reordered
136 /// with respect to each other.
137 static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
138   unsigned DefIdx = 0;
139   for (unsigned i = 0; i != DefOperIdx; ++i) {
140     const MachineOperand &MO = MI->getOperand(i);
141     if (MO.isReg() && MO.isDef())
142       ++DefIdx;
143   }
144   return DefIdx;
145 }
146 
147 /// Find the use index of this operand. This is independent of the instruction's
148 /// def operands.
149 ///
150 /// Note that uses are not determined by the operand's isUse property, which
151 /// is simply the inverse of isDef. Here we consider any readsReg operand to be
152 /// a "use". The machine model allows an operand to be both a Def and Use.
153 static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
154   unsigned UseIdx = 0;
155   for (unsigned i = 0; i != UseOperIdx; ++i) {
156     const MachineOperand &MO = MI->getOperand(i);
157     if (MO.isReg() && MO.readsReg() && !MO.isDef())
158       ++UseIdx;
159   }
160   return UseIdx;
161 }
162 
163 // Top-level API for clients that know the operand indices.
164 unsigned TargetSchedModel::computeOperandLatency(
165   const MachineInstr *DefMI, unsigned DefOperIdx,
166   const MachineInstr *UseMI, unsigned UseOperIdx) const {
167 
168   if (!hasInstrSchedModel() && !hasInstrItineraries())
169     return TII->defaultDefLatency(SchedModel, *DefMI);
170 
171   if (hasInstrItineraries()) {
172     int OperLatency = 0;
173     if (UseMI) {
174       OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
175                                            *UseMI, UseOperIdx);
176     }
177     else {
178       unsigned DefClass = DefMI->getDesc().getSchedClass();
179       OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
180     }
181     if (OperLatency >= 0)
182       return OperLatency;
183 
184     // No operand latency was found.
185     unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI);
186 
187     // Expected latency is the max of the stage latency and itinerary props.
188     // Rather than directly querying InstrItins stage latency, we call a TII
189     // hook to allow subtargets to specialize latency. This hook is only
190     // applicable to the InstrItins model. InstrSchedModel should model all
191     // special cases without TII hooks.
192     InstrLatency =
193         std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI));
194     return InstrLatency;
195   }
196   // hasInstrSchedModel()
197   const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
198   unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
199   if (DefIdx < SCDesc->NumWriteLatencyEntries) {
200     // Lookup the definition's write latency in SubtargetInfo.
201     const MCWriteLatencyEntry *WLEntry =
202       STI->getWriteLatencyEntry(SCDesc, DefIdx);
203     unsigned WriteID = WLEntry->WriteResourceID;
204     unsigned Latency = capLatency(WLEntry->Cycles);
205     if (!UseMI)
206       return Latency;
207 
208     // Lookup the use's latency adjustment in SubtargetInfo.
209     const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
210     if (UseDesc->NumReadAdvanceEntries == 0)
211       return Latency;
212     unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
213     int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
214     if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap
215       return 0;
216     return Latency - Advance;
217   }
218   // If DefIdx does not exist in the model (e.g. implicit defs), then return
219   // unit latency (defaultDefLatency may be too conservative).
220 #ifndef NDEBUG
221   if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
222       && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
223       && SchedModel.isComplete()) {
224     errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
225            << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
226     llvm_unreachable("incomplete machine model");
227   }
228 #endif
229   // FIXME: Automatically giving all implicit defs defaultDefLatency is
230   // undesirable. We should only do it for defs that are known to the MC
231   // desc like flags. Truly implicit defs should get 1 cycle latency.
232   return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI);
233 }
234 
235 unsigned
236 TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const {
237   unsigned Latency = 0;
238   for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries;
239        DefIdx != DefEnd; ++DefIdx) {
240     // Lookup the definition's write latency in SubtargetInfo.
241     const MCWriteLatencyEntry *WLEntry =
242       STI->getWriteLatencyEntry(&SCDesc, DefIdx);
243     Latency = std::max(Latency, capLatency(WLEntry->Cycles));
244   }
245   return Latency;
246 }
247 
248 unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
249   assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
250 
251   unsigned SCIdx = TII->get(Opcode).getSchedClass();
252   const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx);
253 
254   if (SCDesc->isValid() && !SCDesc->isVariant())
255     return computeInstrLatency(*SCDesc);
256 
257   llvm_unreachable("No MI sched latency");
258 }
259 
260 unsigned
261 TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
262                                       bool UseDefaultDefLatency) const {
263   // For the itinerary model, fall back to the old subtarget hook.
264   // Allow subtargets to compute Bundle latencies outside the machine model.
265   if (hasInstrItineraries() || MI->isBundle() ||
266       (!hasInstrSchedModel() && !UseDefaultDefLatency))
267     return TII->getInstrLatency(&InstrItins, *MI);
268 
269   if (hasInstrSchedModel()) {
270     const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
271     if (SCDesc->isValid())
272       return computeInstrLatency(*SCDesc);
273   }
274   return TII->defaultDefLatency(SchedModel, *MI);
275 }
276 
277 unsigned TargetSchedModel::
278 computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
279                      const MachineInstr *DepMI) const {
280   if (!SchedModel.isOutOfOrder())
281     return 1;
282 
283   // Out-of-order processor can dispatch WAW dependencies in the same cycle.
284 
285   // Treat predication as a data dependency for out-of-order cpus. In-order
286   // cpus do not need to treat predicated writes specially.
287   //
288   // TODO: The following hack exists because predication passes do not
289   // correctly append imp-use operands, and readsReg() strangely returns false
290   // for predicated defs.
291   unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
292   const MachineFunction &MF = *DefMI->getParent()->getParent();
293   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
294   if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI))
295     return computeInstrLatency(DefMI);
296 
297   // If we have a per operand scheduling model, check if this def is writing
298   // an unbuffered resource. If so, it treated like an in-order cpu.
299   if (hasInstrSchedModel()) {
300     const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
301     if (SCDesc->isValid()) {
302       for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
303              *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
304         if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize)
305           return 1;
306       }
307     }
308   }
309   return 0;
310 }
311