1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the InstrBuilder interface.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/MCA/InstrBuilder.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/WithColor.h"
21 #include "llvm/Support/raw_ostream.h"
22
23 #define DEBUG_TYPE "llvm-mca-instrbuilder"
24
25 namespace llvm {
26 namespace mca {
27
28 char RecycledInstErr::ID = 0;
29
InstrBuilder(const llvm::MCSubtargetInfo & sti,const llvm::MCInstrInfo & mcii,const llvm::MCRegisterInfo & mri,const llvm::MCInstrAnalysis * mcia)30 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
31 const llvm::MCInstrInfo &mcii,
32 const llvm::MCRegisterInfo &mri,
33 const llvm::MCInstrAnalysis *mcia)
34 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
35 FirstReturnInst(true) {
36 const MCSchedModel &SM = STI.getSchedModel();
37 ProcResourceMasks.resize(SM.getNumProcResourceKinds());
38 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
39 }
40
initializeUsedResources(InstrDesc & ID,const MCSchedClassDesc & SCDesc,const MCSubtargetInfo & STI,ArrayRef<uint64_t> ProcResourceMasks)41 static void initializeUsedResources(InstrDesc &ID,
42 const MCSchedClassDesc &SCDesc,
43 const MCSubtargetInfo &STI,
44 ArrayRef<uint64_t> ProcResourceMasks) {
45 const MCSchedModel &SM = STI.getSchedModel();
46
47 // Populate resources consumed.
48 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
49 SmallVector<ResourcePlusCycles, 4> Worklist;
50
51 // Track cycles contributed by resources that are in a "Super" relationship.
52 // This is required if we want to correctly match the behavior of method
53 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
54 // of "consumed" processor resources and resource cycles, the logic in
55 // ExpandProcResource() doesn't update the number of resource cycles
56 // contributed by a "Super" resource to a group.
57 // We need to take this into account when we find that a processor resource is
58 // part of a group, and it is also used as the "Super" of other resources.
59 // This map stores the number of cycles contributed by sub-resources that are
60 // part of a "Super" resource. The key value is the "Super" resource mask ID.
61 DenseMap<uint64_t, unsigned> SuperResources;
62
63 unsigned NumProcResources = SM.getNumProcResourceKinds();
64 APInt Buffers(NumProcResources, 0);
65
66 bool AllInOrderResources = true;
67 bool AnyDispatchHazards = false;
68 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
69 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
70 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
71 if (!PRE->Cycles) {
72 #ifndef NDEBUG
73 WithColor::warning()
74 << "Ignoring invalid write of zero cycles on processor resource "
75 << PR.Name << "\n";
76 WithColor::note() << "found in scheduling class " << SCDesc.Name
77 << " (write index #" << I << ")\n";
78 #endif
79 continue;
80 }
81
82 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
83 if (PR.BufferSize < 0) {
84 AllInOrderResources = false;
85 } else {
86 Buffers.setBit(getResourceStateIndex(Mask));
87 AnyDispatchHazards |= (PR.BufferSize == 0);
88 AllInOrderResources &= (PR.BufferSize <= 1);
89 }
90
91 CycleSegment RCy(0, PRE->Cycles, false);
92 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
93 if (PR.SuperIdx) {
94 uint64_t Super = ProcResourceMasks[PR.SuperIdx];
95 SuperResources[Super] += PRE->Cycles;
96 }
97 }
98
99 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
100
101 // Sort elements by mask popcount, so that we prioritize resource units over
102 // resource groups, and smaller groups over larger groups.
103 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
104 unsigned popcntA = countPopulation(A.first);
105 unsigned popcntB = countPopulation(B.first);
106 if (popcntA < popcntB)
107 return true;
108 if (popcntA > popcntB)
109 return false;
110 return A.first < B.first;
111 });
112
113 uint64_t UsedResourceUnits = 0;
114 uint64_t UsedResourceGroups = 0;
115 auto GroupIt = find_if(Worklist, [](const ResourcePlusCycles &Elt) {
116 return countPopulation(Elt.first) > 1;
117 });
118 unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt);
119 uint64_t ImpliedUsesOfResourceUnits = 0;
120
121 // Remove cycles contributed by smaller resources.
122 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
123 ResourcePlusCycles &A = Worklist[I];
124 if (!A.second.size()) {
125 assert(countPopulation(A.first) > 1 && "Expected a group!");
126 UsedResourceGroups |= PowerOf2Floor(A.first);
127 continue;
128 }
129
130 ID.Resources.emplace_back(A);
131 uint64_t NormalizedMask = A.first;
132 if (countPopulation(A.first) == 1) {
133 UsedResourceUnits |= A.first;
134 } else {
135 // Remove the leading 1 from the resource group mask.
136 NormalizedMask ^= PowerOf2Floor(NormalizedMask);
137 UsedResourceGroups |= (A.first ^ NormalizedMask);
138
139 uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
140 if ((NormalizedMask != AvailableMask) &&
141 countPopulation(AvailableMask) == 1) {
142 // At simulation time, this resource group use will decay into a simple
143 // use of the resource unit identified by `AvailableMask`.
144 ImpliedUsesOfResourceUnits |= AvailableMask;
145 UsedResourceUnits |= AvailableMask;
146 }
147 }
148
149 for (unsigned J = I + 1; J < E; ++J) {
150 ResourcePlusCycles &B = Worklist[J];
151 if ((NormalizedMask & B.first) == NormalizedMask) {
152 B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
153 if (countPopulation(B.first) > 1)
154 B.second.NumUnits++;
155 }
156 }
157 }
158
159 // Look for implicit uses of processor resource units. These are resource
160 // units which are indirectly consumed by resource groups, and that must be
161 // always available on instruction issue.
162 while (ImpliedUsesOfResourceUnits) {
163 ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits;
164 ImpliedUsesOfResourceUnits = 0;
165 for (unsigned I = FirstGroupIdx, E = Worklist.size(); I < E; ++I) {
166 ResourcePlusCycles &A = Worklist[I];
167 if (!A.second.size())
168 continue;
169
170 uint64_t NormalizedMask = A.first;
171 assert(countPopulation(NormalizedMask) > 1);
172 // Remove the leading 1 from the resource group mask.
173 NormalizedMask ^= PowerOf2Floor(NormalizedMask);
174 uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
175 if ((NormalizedMask != AvailableMask) &&
176 countPopulation(AvailableMask) != 1)
177 continue;
178
179 UsedResourceUnits |= AvailableMask;
180 ImpliedUsesOfResourceUnits |= AvailableMask;
181 }
182 }
183
184 // A SchedWrite may specify a number of cycles in which a resource group
185 // is reserved. For example (on target x86; cpu Haswell):
186 //
187 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
188 // let ResourceCycles = [2, 2, 3];
189 // }
190 //
191 // This means:
192 // Resource units HWPort0 and HWPort1 are both used for 2cy.
193 // Resource group HWPort01 is the union of HWPort0 and HWPort1.
194 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
195 // will not be usable for 2 entire cycles from instruction issue.
196 //
197 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
198 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
199 // extra delay on top of the 2 cycles latency.
200 // During those extra cycles, HWPort01 is not usable by other instructions.
201 for (ResourcePlusCycles &RPC : ID.Resources) {
202 if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
203 // Remove the leading 1 from the resource group mask.
204 uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
205 uint64_t MaxResourceUnits = countPopulation(Mask);
206 if (RPC.second.NumUnits > countPopulation(Mask)) {
207 RPC.second.setReserved();
208 RPC.second.NumUnits = MaxResourceUnits;
209 }
210 }
211 }
212
213 // Identify extra buffers that are consumed through super resources.
214 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
215 for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
216 const MCProcResourceDesc &PR = *SM.getProcResource(I);
217 if (PR.BufferSize == -1)
218 continue;
219
220 uint64_t Mask = ProcResourceMasks[I];
221 if (Mask != SR.first && ((Mask & SR.first) == SR.first))
222 Buffers.setBit(getResourceStateIndex(Mask));
223 }
224 }
225
226 ID.UsedBuffers = Buffers.getZExtValue();
227 ID.UsedProcResUnits = UsedResourceUnits;
228 ID.UsedProcResGroups = UsedResourceGroups;
229
230 LLVM_DEBUG({
231 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
232 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
233 << "Reserved=" << R.second.isReserved() << ", "
234 << "#Units=" << R.second.NumUnits << ", "
235 << "cy=" << R.second.size() << '\n';
236 uint64_t BufferIDs = ID.UsedBuffers;
237 while (BufferIDs) {
238 uint64_t Current = BufferIDs & (-BufferIDs);
239 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';
240 BufferIDs ^= Current;
241 }
242 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
243 dbgs() << "\t\tImplicitly Used Units="
244 << format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n';
245 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
246 << '\n';
247 });
248 }
249
computeMaxLatency(InstrDesc & ID,const MCInstrDesc & MCDesc,const MCSchedClassDesc & SCDesc,const MCSubtargetInfo & STI)250 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
251 const MCSchedClassDesc &SCDesc,
252 const MCSubtargetInfo &STI) {
253 if (MCDesc.isCall()) {
254 // We cannot estimate how long this call will take.
255 // Artificially set an arbitrarily high latency (100cy).
256 ID.MaxLatency = 100U;
257 return;
258 }
259
260 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
261 // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
262 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
263 }
264
verifyOperands(const MCInstrDesc & MCDesc,const MCInst & MCI)265 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
266 // Count register definitions, and skip non register operands in the process.
267 unsigned I, E;
268 unsigned NumExplicitDefs = MCDesc.getNumDefs();
269 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
270 const MCOperand &Op = MCI.getOperand(I);
271 if (Op.isReg())
272 --NumExplicitDefs;
273 }
274
275 if (NumExplicitDefs) {
276 return make_error<InstructionError<MCInst>>(
277 "Expected more register operand definitions.", MCI);
278 }
279
280 if (MCDesc.hasOptionalDef()) {
281 // Always assume that the optional definition is the last operand.
282 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
283 if (I == MCI.getNumOperands() || !Op.isReg()) {
284 std::string Message =
285 "expected a register operand for an optional definition. Instruction "
286 "has not been correctly analyzed.";
287 return make_error<InstructionError<MCInst>>(Message, MCI);
288 }
289 }
290
291 return ErrorSuccess();
292 }
293
populateWrites(InstrDesc & ID,const MCInst & MCI,unsigned SchedClassID)294 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
295 unsigned SchedClassID) {
296 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
297 const MCSchedModel &SM = STI.getSchedModel();
298 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
299
300 // Assumptions made by this algorithm:
301 // 1. The number of explicit and implicit register definitions in a MCInst
302 // matches the number of explicit and implicit definitions according to
303 // the opcode descriptor (MCInstrDesc).
304 // 2. Uses start at index #(MCDesc.getNumDefs()).
305 // 3. There can only be a single optional register definition, an it is
306 // either the last operand of the sequence (excluding extra operands
307 // contributed by variadic opcodes) or one of the explicit register
308 // definitions. The latter occurs for some Thumb1 instructions.
309 //
310 // These assumptions work quite well for most out-of-order in-tree targets
311 // like x86. This is mainly because the vast majority of instructions is
312 // expanded to MCInst using a straightforward lowering logic that preserves
313 // the ordering of the operands.
314 //
315 // About assumption 1.
316 // The algorithm allows non-register operands between register operand
317 // definitions. This helps to handle some special ARM instructions with
318 // implicit operand increment (-mtriple=armv7):
319 //
320 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
321 // @ <MCOperand Reg:59>
322 // @ <MCOperand Imm:0> (!!)
323 // @ <MCOperand Reg:67>
324 // @ <MCOperand Imm:0>
325 // @ <MCOperand Imm:14>
326 // @ <MCOperand Reg:0>>
327 //
328 // MCDesc reports:
329 // 6 explicit operands.
330 // 1 optional definition
331 // 2 explicit definitions (!!)
332 //
333 // The presence of an 'Imm' operand between the two register definitions
334 // breaks the assumption that "register definitions are always at the
335 // beginning of the operand sequence".
336 //
337 // To workaround this issue, this algorithm ignores (i.e. skips) any
338 // non-register operands between register definitions. The optional
339 // definition is still at index #(NumOperands-1).
340 //
341 // According to assumption 2. register reads start at #(NumExplicitDefs-1).
342 // That means, register R1 from the example is both read and written.
343 unsigned NumExplicitDefs = MCDesc.getNumDefs();
344 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
345 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
346 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
347 if (MCDesc.hasOptionalDef())
348 TotalDefs++;
349
350 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
351 ID.Writes.resize(TotalDefs + NumVariadicOps);
352 // Iterate over the operands list, and skip non-register operands.
353 // The first NumExplicitDefs register operands are expected to be register
354 // definitions.
355 unsigned CurrentDef = 0;
356 unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
357 unsigned i = 0;
358 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
359 const MCOperand &Op = MCI.getOperand(i);
360 if (!Op.isReg())
361 continue;
362
363 if (MCDesc.OpInfo[CurrentDef].isOptionalDef()) {
364 OptionalDefIdx = CurrentDef++;
365 continue;
366 }
367
368 WriteDescriptor &Write = ID.Writes[CurrentDef];
369 Write.OpIndex = i;
370 if (CurrentDef < NumWriteLatencyEntries) {
371 const MCWriteLatencyEntry &WLE =
372 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
373 // Conservatively default to MaxLatency.
374 Write.Latency =
375 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
376 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
377 } else {
378 // Assign a default latency for this write.
379 Write.Latency = ID.MaxLatency;
380 Write.SClassOrWriteResourceID = 0;
381 }
382 Write.IsOptionalDef = false;
383 LLVM_DEBUG({
384 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
385 << ", Latency=" << Write.Latency
386 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
387 });
388 CurrentDef++;
389 }
390
391 assert(CurrentDef == NumExplicitDefs &&
392 "Expected more register operand definitions.");
393 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
394 unsigned Index = NumExplicitDefs + CurrentDef;
395 WriteDescriptor &Write = ID.Writes[Index];
396 Write.OpIndex = ~CurrentDef;
397 Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
398 if (Index < NumWriteLatencyEntries) {
399 const MCWriteLatencyEntry &WLE =
400 *STI.getWriteLatencyEntry(&SCDesc, Index);
401 // Conservatively default to MaxLatency.
402 Write.Latency =
403 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
404 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
405 } else {
406 // Assign a default latency for this write.
407 Write.Latency = ID.MaxLatency;
408 Write.SClassOrWriteResourceID = 0;
409 }
410
411 Write.IsOptionalDef = false;
412 assert(Write.RegisterID != 0 && "Expected a valid phys register!");
413 LLVM_DEBUG({
414 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
415 << ", PhysReg=" << MRI.getName(Write.RegisterID)
416 << ", Latency=" << Write.Latency
417 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
418 });
419 }
420
421 if (MCDesc.hasOptionalDef()) {
422 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
423 Write.OpIndex = OptionalDefIdx;
424 // Assign a default latency for this write.
425 Write.Latency = ID.MaxLatency;
426 Write.SClassOrWriteResourceID = 0;
427 Write.IsOptionalDef = true;
428 LLVM_DEBUG({
429 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
430 << ", Latency=" << Write.Latency
431 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
432 });
433 }
434
435 if (!NumVariadicOps)
436 return;
437
438 bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs();
439 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
440 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
441 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
442 const MCOperand &Op = MCI.getOperand(OpIndex);
443 if (!Op.isReg())
444 continue;
445
446 WriteDescriptor &Write = ID.Writes[CurrentDef];
447 Write.OpIndex = OpIndex;
448 // Assign a default latency for this write.
449 Write.Latency = ID.MaxLatency;
450 Write.SClassOrWriteResourceID = 0;
451 Write.IsOptionalDef = false;
452 ++CurrentDef;
453 LLVM_DEBUG({
454 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
455 << ", Latency=" << Write.Latency
456 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
457 });
458 }
459
460 ID.Writes.resize(CurrentDef);
461 }
462
populateReads(InstrDesc & ID,const MCInst & MCI,unsigned SchedClassID)463 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
464 unsigned SchedClassID) {
465 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
466 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
467 unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
468 // Remove the optional definition.
469 if (MCDesc.hasOptionalDef())
470 --NumExplicitUses;
471 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
472 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
473 ID.Reads.resize(TotalUses);
474 unsigned CurrentUse = 0;
475 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
476 ++I, ++OpIndex) {
477 const MCOperand &Op = MCI.getOperand(OpIndex);
478 if (!Op.isReg())
479 continue;
480
481 ReadDescriptor &Read = ID.Reads[CurrentUse];
482 Read.OpIndex = OpIndex;
483 Read.UseIndex = I;
484 Read.SchedClassID = SchedClassID;
485 ++CurrentUse;
486 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
487 << ", UseIndex=" << Read.UseIndex << '\n');
488 }
489
490 // For the purpose of ReadAdvance, implicit uses come directly after explicit
491 // uses. The "UseIndex" must be updated according to that implicit layout.
492 for (unsigned I = 0; I < NumImplicitUses; ++I) {
493 ReadDescriptor &Read = ID.Reads[CurrentUse + I];
494 Read.OpIndex = ~I;
495 Read.UseIndex = NumExplicitUses + I;
496 Read.RegisterID = MCDesc.getImplicitUses()[I];
497 Read.SchedClassID = SchedClassID;
498 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
499 << ", UseIndex=" << Read.UseIndex << ", RegisterID="
500 << MRI.getName(Read.RegisterID) << '\n');
501 }
502
503 CurrentUse += NumImplicitUses;
504
505 bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
506 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
507 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
508 const MCOperand &Op = MCI.getOperand(OpIndex);
509 if (!Op.isReg())
510 continue;
511
512 ReadDescriptor &Read = ID.Reads[CurrentUse];
513 Read.OpIndex = OpIndex;
514 Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
515 Read.SchedClassID = SchedClassID;
516 ++CurrentUse;
517 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
518 << ", UseIndex=" << Read.UseIndex << '\n');
519 }
520
521 ID.Reads.resize(CurrentUse);
522 }
523
verifyInstrDesc(const InstrDesc & ID,const MCInst & MCI) const524 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
525 const MCInst &MCI) const {
526 if (ID.NumMicroOps != 0)
527 return ErrorSuccess();
528
529 bool UsesBuffers = ID.UsedBuffers;
530 bool UsesResources = !ID.Resources.empty();
531 if (!UsesBuffers && !UsesResources)
532 return ErrorSuccess();
533
534 // FIXME: see PR44797. We should revisit these checks and possibly move them
535 // in CodeGenSchedule.cpp.
536 StringRef Message = "found an inconsistent instruction that decodes to zero "
537 "opcodes and that consumes scheduler resources.";
538 return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
539 }
540
541 Expected<const InstrDesc &>
createInstrDescImpl(const MCInst & MCI)542 InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
543 assert(STI.getSchedModel().hasInstrSchedModel() &&
544 "Itineraries are not yet supported!");
545
546 // Obtain the instruction descriptor from the opcode.
547 unsigned short Opcode = MCI.getOpcode();
548 const MCInstrDesc &MCDesc = MCII.get(Opcode);
549 const MCSchedModel &SM = STI.getSchedModel();
550
551 // Then obtain the scheduling class information from the instruction.
552 unsigned SchedClassID = MCDesc.getSchedClass();
553 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
554
555 // Try to solve variant scheduling classes.
556 if (IsVariant) {
557 unsigned CPUID = SM.getProcessorID();
558 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
559 SchedClassID =
560 STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
561
562 if (!SchedClassID) {
563 return make_error<InstructionError<MCInst>>(
564 "unable to resolve scheduling class for write variant.", MCI);
565 }
566 }
567
568 // Check if this instruction is supported. Otherwise, report an error.
569 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
570 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
571 return make_error<InstructionError<MCInst>>(
572 "found an unsupported instruction in the input assembly sequence.",
573 MCI);
574 }
575
576 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
577 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
578 LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n');
579
580 // Create a new empty descriptor.
581 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
582 ID->NumMicroOps = SCDesc.NumMicroOps;
583 ID->SchedClassID = SchedClassID;
584
585 if (MCDesc.isCall() && FirstCallInst) {
586 // We don't correctly model calls.
587 WithColor::warning() << "found a call in the input assembly sequence.\n";
588 WithColor::note() << "call instructions are not correctly modeled. "
589 << "Assume a latency of 100cy.\n";
590 FirstCallInst = false;
591 }
592
593 if (MCDesc.isReturn() && FirstReturnInst) {
594 WithColor::warning() << "found a return instruction in the input"
595 << " assembly sequence.\n";
596 WithColor::note() << "program counter updates are ignored.\n";
597 FirstReturnInst = false;
598 }
599
600 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
601 computeMaxLatency(*ID, MCDesc, SCDesc, STI);
602
603 if (Error Err = verifyOperands(MCDesc, MCI))
604 return std::move(Err);
605
606 populateWrites(*ID, MCI, SchedClassID);
607 populateReads(*ID, MCI, SchedClassID);
608
609 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
610 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
611
612 // Validation check on the instruction descriptor.
613 if (Error Err = verifyInstrDesc(*ID, MCI))
614 return std::move(Err);
615
616 // Now add the new descriptor.
617 bool IsVariadic = MCDesc.isVariadic();
618 if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) {
619 Descriptors[MCI.getOpcode()] = std::move(ID);
620 return *Descriptors[MCI.getOpcode()];
621 }
622
623 VariantDescriptors[&MCI] = std::move(ID);
624 return *VariantDescriptors[&MCI];
625 }
626
627 Expected<const InstrDesc &>
getOrCreateInstrDesc(const MCInst & MCI)628 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
629 if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
630 return *Descriptors[MCI.getOpcode()];
631
632 if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
633 return *VariantDescriptors[&MCI];
634
635 return createInstrDescImpl(MCI);
636 }
637
638 STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");
639
640 Expected<std::unique_ptr<Instruction>>
createInstruction(const MCInst & MCI)641 InstrBuilder::createInstruction(const MCInst &MCI) {
642 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
643 if (!DescOrErr)
644 return DescOrErr.takeError();
645 const InstrDesc &D = *DescOrErr;
646 Instruction *NewIS = nullptr;
647 std::unique_ptr<Instruction> CreatedIS;
648 bool IsInstRecycled = false;
649
650 if (!D.IsRecyclable)
651 ++NumVariantInst;
652
653 if (D.IsRecyclable && InstRecycleCB) {
654 if (auto *I = InstRecycleCB(D)) {
655 NewIS = I;
656 NewIS->reset();
657 IsInstRecycled = true;
658 }
659 }
660 if (!IsInstRecycled) {
661 CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode());
662 NewIS = CreatedIS.get();
663 }
664
665 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
666 const MCSchedClassDesc &SCDesc =
667 *STI.getSchedModel().getSchedClassDesc(D.SchedClassID);
668
669 NewIS->setMayLoad(MCDesc.mayLoad());
670 NewIS->setMayStore(MCDesc.mayStore());
671 NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects());
672 NewIS->setBeginGroup(SCDesc.BeginGroup);
673 NewIS->setEndGroup(SCDesc.EndGroup);
674 NewIS->setRetireOOO(SCDesc.RetireOOO);
675
676 // Check if this is a dependency breaking instruction.
677 APInt Mask;
678
679 bool IsZeroIdiom = false;
680 bool IsDepBreaking = false;
681 if (MCIA) {
682 unsigned ProcID = STI.getSchedModel().getProcessorID();
683 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
684 IsDepBreaking =
685 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
686 if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
687 NewIS->setOptimizableMove();
688 }
689
690 // Initialize Reads first.
691 MCPhysReg RegID = 0;
692 size_t Idx = 0U;
693 for (const ReadDescriptor &RD : D.Reads) {
694 if (!RD.isImplicitRead()) {
695 // explicit read.
696 const MCOperand &Op = MCI.getOperand(RD.OpIndex);
697 // Skip non-register operands.
698 if (!Op.isReg())
699 continue;
700 RegID = Op.getReg();
701 } else {
702 // Implicit read.
703 RegID = RD.RegisterID;
704 }
705
706 // Skip invalid register operands.
707 if (!RegID)
708 continue;
709
710 // Okay, this is a register operand. Create a ReadState for it.
711 ReadState *RS = nullptr;
712 if (IsInstRecycled && Idx < NewIS->getUses().size()) {
713 NewIS->getUses()[Idx] = ReadState(RD, RegID);
714 RS = &NewIS->getUses()[Idx++];
715 } else {
716 NewIS->getUses().emplace_back(RD, RegID);
717 RS = &NewIS->getUses().back();
718 ++Idx;
719 }
720
721 if (IsDepBreaking) {
722 // A mask of all zeroes means: explicit input operands are not
723 // independent.
724 if (Mask.isZero()) {
725 if (!RD.isImplicitRead())
726 RS->setIndependentFromDef();
727 } else {
728 // Check if this register operand is independent according to `Mask`.
729 // Note that Mask may not have enough bits to describe all explicit and
730 // implicit input operands. If this register operand doesn't have a
731 // corresponding bit in Mask, then conservatively assume that it is
732 // dependent.
733 if (Mask.getBitWidth() > RD.UseIndex) {
734 // Okay. This map describe register use `RD.UseIndex`.
735 if (Mask[RD.UseIndex])
736 RS->setIndependentFromDef();
737 }
738 }
739 }
740 }
741 if (IsInstRecycled && Idx < NewIS->getUses().size())
742 NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx);
743
744 // Early exit if there are no writes.
745 if (D.Writes.empty()) {
746 if (IsInstRecycled)
747 return llvm::make_error<RecycledInstErr>(NewIS);
748 else
749 return std::move(CreatedIS);
750 }
751
752 // Track register writes that implicitly clear the upper portion of the
753 // underlying super-registers using an APInt.
754 APInt WriteMask(D.Writes.size(), 0);
755
756 // Now query the MCInstrAnalysis object to obtain information about which
757 // register writes implicitly clear the upper portion of a super-register.
758 if (MCIA)
759 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
760
761 // Initialize writes.
762 unsigned WriteIndex = 0;
763 Idx = 0U;
764 for (const WriteDescriptor &WD : D.Writes) {
765 RegID = WD.isImplicitWrite() ? WD.RegisterID
766 : MCI.getOperand(WD.OpIndex).getReg();
767 // Check if this is a optional definition that references NoReg.
768 if (WD.IsOptionalDef && !RegID) {
769 ++WriteIndex;
770 continue;
771 }
772
773 assert(RegID && "Expected a valid register ID!");
774 if (IsInstRecycled && Idx < NewIS->getDefs().size()) {
775 NewIS->getDefs()[Idx++] =
776 WriteState(WD, RegID,
777 /* ClearsSuperRegs */ WriteMask[WriteIndex],
778 /* WritesZero */ IsZeroIdiom);
779 } else {
780 NewIS->getDefs().emplace_back(WD, RegID,
781 /* ClearsSuperRegs */ WriteMask[WriteIndex],
782 /* WritesZero */ IsZeroIdiom);
783 ++Idx;
784 }
785 ++WriteIndex;
786 }
787 if (IsInstRecycled && Idx < NewIS->getDefs().size())
788 NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx);
789
790 if (IsInstRecycled)
791 return llvm::make_error<RecycledInstErr>(NewIS);
792 else
793 return std::move(CreatedIS);
794 }
795 } // namespace mca
796 } // namespace llvm
797