1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the InstrBuilder interface. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/MCA/InstrBuilder.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/MC/MCInst.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/WithColor.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #define DEBUG_TYPE "llvm-mca" 23 24 namespace llvm { 25 namespace mca { 26 27 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, 28 const llvm::MCInstrInfo &mcii, 29 const llvm::MCRegisterInfo &mri, 30 const llvm::MCInstrAnalysis *mcia) 31 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), 32 FirstReturnInst(true) { 33 const MCSchedModel &SM = STI.getSchedModel(); 34 ProcResourceMasks.resize(SM.getNumProcResourceKinds()); 35 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); 36 } 37 38 static void initializeUsedResources(InstrDesc &ID, 39 const MCSchedClassDesc &SCDesc, 40 const MCSubtargetInfo &STI, 41 ArrayRef<uint64_t> ProcResourceMasks) { 42 const MCSchedModel &SM = STI.getSchedModel(); 43 44 // Populate resources consumed. 45 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; 46 std::vector<ResourcePlusCycles> Worklist; 47 48 // Track cycles contributed by resources that are in a "Super" relationship. 49 // This is required if we want to correctly match the behavior of method 50 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set 51 // of "consumed" processor resources and resource cycles, the logic in 52 // ExpandProcResource() doesn't update the number of resource cycles 53 // contributed by a "Super" resource to a group. 54 // We need to take this into account when we find that a processor resource is 55 // part of a group, and it is also used as the "Super" of other resources. 56 // This map stores the number of cycles contributed by sub-resources that are 57 // part of a "Super" resource. The key value is the "Super" resource mask ID. 58 DenseMap<uint64_t, unsigned> SuperResources; 59 60 unsigned NumProcResources = SM.getNumProcResourceKinds(); 61 APInt Buffers(NumProcResources, 0); 62 63 bool AllInOrderResources = true; 64 bool AnyDispatchHazards = false; 65 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { 66 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; 67 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); 68 if (!PRE->Cycles) { 69 #ifndef NDEBUG 70 WithColor::warning() 71 << "Ignoring invalid write of zero cycles on processor resource " 72 << PR.Name << "\n"; 73 WithColor::note() << "found in scheduling class " << SCDesc.Name 74 << " (write index #" << I << ")\n"; 75 #endif 76 continue; 77 } 78 79 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; 80 if (PR.BufferSize < 0) { 81 AllInOrderResources = false; 82 } else { 83 Buffers.setBit(getResourceStateIndex(Mask)); 84 AnyDispatchHazards |= (PR.BufferSize == 0); 85 AllInOrderResources &= (PR.BufferSize <= 1); 86 } 87 88 CycleSegment RCy(0, PRE->Cycles, false); 89 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); 90 if (PR.SuperIdx) { 91 uint64_t Super = ProcResourceMasks[PR.SuperIdx]; 92 SuperResources[Super] += PRE->Cycles; 93 } 94 } 95 96 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; 97 98 // Sort elements by mask popcount, so that we prioritize resource units over 99 // resource groups, and smaller groups over larger groups. 100 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { 101 unsigned popcntA = countPopulation(A.first); 102 unsigned popcntB = countPopulation(B.first); 103 if (popcntA < popcntB) 104 return true; 105 if (popcntA > popcntB) 106 return false; 107 return A.first < B.first; 108 }); 109 110 uint64_t UsedResourceUnits = 0; 111 uint64_t UsedResourceGroups = 0; 112 113 // Remove cycles contributed by smaller resources. 114 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { 115 ResourcePlusCycles &A = Worklist[I]; 116 if (!A.second.size()) { 117 assert(countPopulation(A.first) > 1 && "Expected a group!"); 118 UsedResourceGroups |= PowerOf2Floor(A.first); 119 continue; 120 } 121 122 ID.Resources.emplace_back(A); 123 uint64_t NormalizedMask = A.first; 124 if (countPopulation(A.first) == 1) { 125 UsedResourceUnits |= A.first; 126 } else { 127 // Remove the leading 1 from the resource group mask. 128 NormalizedMask ^= PowerOf2Floor(NormalizedMask); 129 UsedResourceGroups |= (A.first ^ NormalizedMask); 130 } 131 132 for (unsigned J = I + 1; J < E; ++J) { 133 ResourcePlusCycles &B = Worklist[J]; 134 if ((NormalizedMask & B.first) == NormalizedMask) { 135 B.second.CS.subtract(A.second.size() - SuperResources[A.first]); 136 if (countPopulation(B.first) > 1) 137 B.second.NumUnits++; 138 } 139 } 140 } 141 142 // A SchedWrite may specify a number of cycles in which a resource group 143 // is reserved. For example (on target x86; cpu Haswell): 144 // 145 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { 146 // let ResourceCycles = [2, 2, 3]; 147 // } 148 // 149 // This means: 150 // Resource units HWPort0 and HWPort1 are both used for 2cy. 151 // Resource group HWPort01 is the union of HWPort0 and HWPort1. 152 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 153 // will not be usable for 2 entire cycles from instruction issue. 154 // 155 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency 156 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an 157 // extra delay on top of the 2 cycles latency. 158 // During those extra cycles, HWPort01 is not usable by other instructions. 159 for (ResourcePlusCycles &RPC : ID.Resources) { 160 if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { 161 // Remove the leading 1 from the resource group mask. 162 uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); 163 if ((Mask & UsedResourceUnits) == Mask) 164 RPC.second.setReserved(); 165 } 166 } 167 168 // Identify extra buffers that are consumed through super resources. 169 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { 170 for (unsigned I = 1, E = NumProcResources; I < E; ++I) { 171 const MCProcResourceDesc &PR = *SM.getProcResource(I); 172 if (PR.BufferSize == -1) 173 continue; 174 175 uint64_t Mask = ProcResourceMasks[I]; 176 if (Mask != SR.first && ((Mask & SR.first) == SR.first)) 177 Buffers.setBit(getResourceStateIndex(Mask)); 178 } 179 } 180 181 ID.UsedBuffers = Buffers.getZExtValue(); 182 ID.UsedProcResUnits = UsedResourceUnits; 183 ID.UsedProcResGroups = UsedResourceGroups; 184 185 LLVM_DEBUG({ 186 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) 187 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " 188 << "Reserved=" << R.second.isReserved() << ", " 189 << "#Units=" << R.second.NumUnits << ", " 190 << "cy=" << R.second.size() << '\n'; 191 uint64_t BufferIDs = ID.UsedBuffers; 192 while (BufferIDs) { 193 uint64_t Current = BufferIDs & (-BufferIDs); 194 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; 195 BufferIDs ^= Current; 196 } 197 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; 198 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) 199 << '\n'; 200 }); 201 } 202 203 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, 204 const MCSchedClassDesc &SCDesc, 205 const MCSubtargetInfo &STI) { 206 if (MCDesc.isCall()) { 207 // We cannot estimate how long this call will take. 208 // Artificially set an arbitrarily high latency (100cy). 209 ID.MaxLatency = 100U; 210 return; 211 } 212 213 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); 214 // If latency is unknown, then conservatively assume a MaxLatency of 100cy. 215 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); 216 } 217 218 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { 219 // Count register definitions, and skip non register operands in the process. 220 unsigned I, E; 221 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 222 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { 223 const MCOperand &Op = MCI.getOperand(I); 224 if (Op.isReg()) 225 --NumExplicitDefs; 226 } 227 228 if (NumExplicitDefs) { 229 return make_error<InstructionError<MCInst>>( 230 "Expected more register operand definitions.", MCI); 231 } 232 233 if (MCDesc.hasOptionalDef()) { 234 // Always assume that the optional definition is the last operand. 235 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); 236 if (I == MCI.getNumOperands() || !Op.isReg()) { 237 std::string Message = 238 "expected a register operand for an optional definition. Instruction " 239 "has not been correctly analyzed."; 240 return make_error<InstructionError<MCInst>>(Message, MCI); 241 } 242 } 243 244 return ErrorSuccess(); 245 } 246 247 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, 248 unsigned SchedClassID) { 249 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 250 const MCSchedModel &SM = STI.getSchedModel(); 251 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 252 253 // Assumptions made by this algorithm: 254 // 1. The number of explicit and implicit register definitions in a MCInst 255 // matches the number of explicit and implicit definitions according to 256 // the opcode descriptor (MCInstrDesc). 257 // 2. Uses start at index #(MCDesc.getNumDefs()). 258 // 3. There can only be a single optional register definition, an it is 259 // always the last operand of the sequence (excluding extra operands 260 // contributed by variadic opcodes). 261 // 262 // These assumptions work quite well for most out-of-order in-tree targets 263 // like x86. This is mainly because the vast majority of instructions is 264 // expanded to MCInst using a straightforward lowering logic that preserves 265 // the ordering of the operands. 266 // 267 // About assumption 1. 268 // The algorithm allows non-register operands between register operand 269 // definitions. This helps to handle some special ARM instructions with 270 // implicit operand increment (-mtriple=armv7): 271 // 272 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed 273 // @ <MCOperand Reg:59> 274 // @ <MCOperand Imm:0> (!!) 275 // @ <MCOperand Reg:67> 276 // @ <MCOperand Imm:0> 277 // @ <MCOperand Imm:14> 278 // @ <MCOperand Reg:0>> 279 // 280 // MCDesc reports: 281 // 6 explicit operands. 282 // 1 optional definition 283 // 2 explicit definitions (!!) 284 // 285 // The presence of an 'Imm' operand between the two register definitions 286 // breaks the assumption that "register definitions are always at the 287 // beginning of the operand sequence". 288 // 289 // To workaround this issue, this algorithm ignores (i.e. skips) any 290 // non-register operands between register definitions. The optional 291 // definition is still at index #(NumOperands-1). 292 // 293 // According to assumption 2. register reads start at #(NumExplicitDefs-1). 294 // That means, register R1 from the example is both read and written. 295 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 296 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); 297 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; 298 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; 299 if (MCDesc.hasOptionalDef()) 300 TotalDefs++; 301 302 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 303 ID.Writes.resize(TotalDefs + NumVariadicOps); 304 // Iterate over the operands list, and skip non-register operands. 305 // The first NumExplictDefs register operands are expected to be register 306 // definitions. 307 unsigned CurrentDef = 0; 308 unsigned i = 0; 309 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { 310 const MCOperand &Op = MCI.getOperand(i); 311 if (!Op.isReg()) 312 continue; 313 314 WriteDescriptor &Write = ID.Writes[CurrentDef]; 315 Write.OpIndex = i; 316 if (CurrentDef < NumWriteLatencyEntries) { 317 const MCWriteLatencyEntry &WLE = 318 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); 319 // Conservatively default to MaxLatency. 320 Write.Latency = 321 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 322 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 323 } else { 324 // Assign a default latency for this write. 325 Write.Latency = ID.MaxLatency; 326 Write.SClassOrWriteResourceID = 0; 327 } 328 Write.IsOptionalDef = false; 329 LLVM_DEBUG({ 330 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex 331 << ", Latency=" << Write.Latency 332 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 333 }); 334 CurrentDef++; 335 } 336 337 assert(CurrentDef == NumExplicitDefs && 338 "Expected more register operand definitions."); 339 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { 340 unsigned Index = NumExplicitDefs + CurrentDef; 341 WriteDescriptor &Write = ID.Writes[Index]; 342 Write.OpIndex = ~CurrentDef; 343 Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; 344 if (Index < NumWriteLatencyEntries) { 345 const MCWriteLatencyEntry &WLE = 346 *STI.getWriteLatencyEntry(&SCDesc, Index); 347 // Conservatively default to MaxLatency. 348 Write.Latency = 349 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 350 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 351 } else { 352 // Assign a default latency for this write. 353 Write.Latency = ID.MaxLatency; 354 Write.SClassOrWriteResourceID = 0; 355 } 356 357 Write.IsOptionalDef = false; 358 assert(Write.RegisterID != 0 && "Expected a valid phys register!"); 359 LLVM_DEBUG({ 360 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex 361 << ", PhysReg=" << MRI.getName(Write.RegisterID) 362 << ", Latency=" << Write.Latency 363 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 364 }); 365 } 366 367 if (MCDesc.hasOptionalDef()) { 368 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; 369 Write.OpIndex = MCDesc.getNumOperands() - 1; 370 // Assign a default latency for this write. 371 Write.Latency = ID.MaxLatency; 372 Write.SClassOrWriteResourceID = 0; 373 Write.IsOptionalDef = true; 374 LLVM_DEBUG({ 375 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex 376 << ", Latency=" << Write.Latency 377 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 378 }); 379 } 380 381 if (!NumVariadicOps) 382 return; 383 384 // FIXME: if an instruction opcode is flagged 'mayStore', and it has no 385 // "unmodeledSideEffects', then this logic optimistically assumes that any 386 // extra register operands in the variadic sequence is not a register 387 // definition. 388 // 389 // Otherwise, we conservatively assume that any register operand from the 390 // variadic sequence is both a register read and a register write. 391 bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() && 392 !MCDesc.hasUnmodeledSideEffects(); 393 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); 394 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 395 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { 396 const MCOperand &Op = MCI.getOperand(OpIndex); 397 if (!Op.isReg()) 398 continue; 399 400 WriteDescriptor &Write = ID.Writes[CurrentDef]; 401 Write.OpIndex = OpIndex; 402 // Assign a default latency for this write. 403 Write.Latency = ID.MaxLatency; 404 Write.SClassOrWriteResourceID = 0; 405 Write.IsOptionalDef = false; 406 ++CurrentDef; 407 LLVM_DEBUG({ 408 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex 409 << ", Latency=" << Write.Latency 410 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 411 }); 412 } 413 414 ID.Writes.resize(CurrentDef); 415 } 416 417 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, 418 unsigned SchedClassID) { 419 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 420 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); 421 unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); 422 // Remove the optional definition. 423 if (MCDesc.hasOptionalDef()) 424 --NumExplicitUses; 425 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 426 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; 427 ID.Reads.resize(TotalUses); 428 unsigned CurrentUse = 0; 429 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; 430 ++I, ++OpIndex) { 431 const MCOperand &Op = MCI.getOperand(OpIndex); 432 if (!Op.isReg()) 433 continue; 434 435 ReadDescriptor &Read = ID.Reads[CurrentUse]; 436 Read.OpIndex = OpIndex; 437 Read.UseIndex = I; 438 Read.SchedClassID = SchedClassID; 439 ++CurrentUse; 440 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex 441 << ", UseIndex=" << Read.UseIndex << '\n'); 442 } 443 444 // For the purpose of ReadAdvance, implicit uses come directly after explicit 445 // uses. The "UseIndex" must be updated according to that implicit layout. 446 for (unsigned I = 0; I < NumImplicitUses; ++I) { 447 ReadDescriptor &Read = ID.Reads[CurrentUse + I]; 448 Read.OpIndex = ~I; 449 Read.UseIndex = NumExplicitUses + I; 450 Read.RegisterID = MCDesc.getImplicitUses()[I]; 451 Read.SchedClassID = SchedClassID; 452 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex 453 << ", UseIndex=" << Read.UseIndex << ", RegisterID=" 454 << MRI.getName(Read.RegisterID) << '\n'); 455 } 456 457 CurrentUse += NumImplicitUses; 458 459 // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no 460 // "unmodeledSideEffects", then this logic optimistically assumes that any 461 // extra register operands in the variadic sequence are not register 462 // definition. 463 464 bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && 465 !MCDesc.hasUnmodeledSideEffects(); 466 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 467 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { 468 const MCOperand &Op = MCI.getOperand(OpIndex); 469 if (!Op.isReg()) 470 continue; 471 472 ReadDescriptor &Read = ID.Reads[CurrentUse]; 473 Read.OpIndex = OpIndex; 474 Read.UseIndex = NumExplicitUses + NumImplicitUses + I; 475 Read.SchedClassID = SchedClassID; 476 ++CurrentUse; 477 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex 478 << ", UseIndex=" << Read.UseIndex << '\n'); 479 } 480 481 ID.Reads.resize(CurrentUse); 482 } 483 484 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, 485 const MCInst &MCI) const { 486 if (ID.NumMicroOps != 0) 487 return ErrorSuccess(); 488 489 bool UsesMemory = ID.MayLoad || ID.MayStore; 490 bool UsesBuffers = ID.UsedBuffers; 491 bool UsesResources = !ID.Resources.empty(); 492 if (!UsesMemory && !UsesBuffers && !UsesResources) 493 return ErrorSuccess(); 494 495 StringRef Message; 496 if (UsesMemory) { 497 Message = "found an inconsistent instruction that decodes " 498 "into zero opcodes and that consumes load/store " 499 "unit resources."; 500 } else { 501 Message = "found an inconsistent instruction that decodes " 502 "to zero opcodes and that consumes scheduler " 503 "resources."; 504 } 505 506 return make_error<InstructionError<MCInst>>(Message, MCI); 507 } 508 509 Expected<const InstrDesc &> 510 InstrBuilder::createInstrDescImpl(const MCInst &MCI) { 511 assert(STI.getSchedModel().hasInstrSchedModel() && 512 "Itineraries are not yet supported!"); 513 514 // Obtain the instruction descriptor from the opcode. 515 unsigned short Opcode = MCI.getOpcode(); 516 const MCInstrDesc &MCDesc = MCII.get(Opcode); 517 const MCSchedModel &SM = STI.getSchedModel(); 518 519 // Then obtain the scheduling class information from the instruction. 520 unsigned SchedClassID = MCDesc.getSchedClass(); 521 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); 522 523 // Try to solve variant scheduling classes. 524 if (IsVariant) { 525 unsigned CPUID = SM.getProcessorID(); 526 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) 527 SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID); 528 529 if (!SchedClassID) { 530 return make_error<InstructionError<MCInst>>( 531 "unable to resolve scheduling class for write variant.", MCI); 532 } 533 } 534 535 // Check if this instruction is supported. Otherwise, report an error. 536 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 537 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { 538 return make_error<InstructionError<MCInst>>( 539 "found an unsupported instruction in the input assembly sequence.", 540 MCI); 541 } 542 543 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); 544 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); 545 546 // Create a new empty descriptor. 547 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>(); 548 ID->NumMicroOps = SCDesc.NumMicroOps; 549 ID->SchedClassID = SchedClassID; 550 551 if (MCDesc.isCall() && FirstCallInst) { 552 // We don't correctly model calls. 553 WithColor::warning() << "found a call in the input assembly sequence.\n"; 554 WithColor::note() << "call instructions are not correctly modeled. " 555 << "Assume a latency of 100cy.\n"; 556 FirstCallInst = false; 557 } 558 559 if (MCDesc.isReturn() && FirstReturnInst) { 560 WithColor::warning() << "found a return instruction in the input" 561 << " assembly sequence.\n"; 562 WithColor::note() << "program counter updates are ignored.\n"; 563 FirstReturnInst = false; 564 } 565 566 ID->MayLoad = MCDesc.mayLoad(); 567 ID->MayStore = MCDesc.mayStore(); 568 ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); 569 ID->BeginGroup = SCDesc.BeginGroup; 570 ID->EndGroup = SCDesc.EndGroup; 571 572 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); 573 computeMaxLatency(*ID, MCDesc, SCDesc, STI); 574 575 if (Error Err = verifyOperands(MCDesc, MCI)) 576 return std::move(Err); 577 578 populateWrites(*ID, MCI, SchedClassID); 579 populateReads(*ID, MCI, SchedClassID); 580 581 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); 582 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); 583 584 // Sanity check on the instruction descriptor. 585 if (Error Err = verifyInstrDesc(*ID, MCI)) 586 return std::move(Err); 587 588 // Now add the new descriptor. 589 bool IsVariadic = MCDesc.isVariadic(); 590 if (!IsVariadic && !IsVariant) { 591 Descriptors[MCI.getOpcode()] = std::move(ID); 592 return *Descriptors[MCI.getOpcode()]; 593 } 594 595 VariantDescriptors[&MCI] = std::move(ID); 596 return *VariantDescriptors[&MCI]; 597 } 598 599 Expected<const InstrDesc &> 600 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { 601 if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) 602 return *Descriptors[MCI.getOpcode()]; 603 604 if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) 605 return *VariantDescriptors[&MCI]; 606 607 return createInstrDescImpl(MCI); 608 } 609 610 Expected<std::unique_ptr<Instruction>> 611 InstrBuilder::createInstruction(const MCInst &MCI) { 612 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI); 613 if (!DescOrErr) 614 return DescOrErr.takeError(); 615 const InstrDesc &D = *DescOrErr; 616 std::unique_ptr<Instruction> NewIS = std::make_unique<Instruction>(D); 617 618 // Check if this is a dependency breaking instruction. 619 APInt Mask; 620 621 bool IsZeroIdiom = false; 622 bool IsDepBreaking = false; 623 if (MCIA) { 624 unsigned ProcID = STI.getSchedModel().getProcessorID(); 625 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID); 626 IsDepBreaking = 627 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID); 628 if (MCIA->isOptimizableRegisterMove(MCI, ProcID)) 629 NewIS->setOptimizableMove(); 630 } 631 632 // Initialize Reads first. 633 for (const ReadDescriptor &RD : D.Reads) { 634 int RegID = -1; 635 if (!RD.isImplicitRead()) { 636 // explicit read. 637 const MCOperand &Op = MCI.getOperand(RD.OpIndex); 638 // Skip non-register operands. 639 if (!Op.isReg()) 640 continue; 641 RegID = Op.getReg(); 642 } else { 643 // Implicit read. 644 RegID = RD.RegisterID; 645 } 646 647 // Skip invalid register operands. 648 if (!RegID) 649 continue; 650 651 // Okay, this is a register operand. Create a ReadState for it. 652 assert(RegID > 0 && "Invalid register ID found!"); 653 NewIS->getUses().emplace_back(RD, RegID); 654 ReadState &RS = NewIS->getUses().back(); 655 656 if (IsDepBreaking) { 657 // A mask of all zeroes means: explicit input operands are not 658 // independent. 659 if (Mask.isNullValue()) { 660 if (!RD.isImplicitRead()) 661 RS.setIndependentFromDef(); 662 } else { 663 // Check if this register operand is independent according to `Mask`. 664 // Note that Mask may not have enough bits to describe all explicit and 665 // implicit input operands. If this register operand doesn't have a 666 // corresponding bit in Mask, then conservatively assume that it is 667 // dependent. 668 if (Mask.getBitWidth() > RD.UseIndex) { 669 // Okay. This map describe register use `RD.UseIndex`. 670 if (Mask[RD.UseIndex]) 671 RS.setIndependentFromDef(); 672 } 673 } 674 } 675 } 676 677 // Early exit if there are no writes. 678 if (D.Writes.empty()) 679 return std::move(NewIS); 680 681 // Track register writes that implicitly clear the upper portion of the 682 // underlying super-registers using an APInt. 683 APInt WriteMask(D.Writes.size(), 0); 684 685 // Now query the MCInstrAnalysis object to obtain information about which 686 // register writes implicitly clear the upper portion of a super-register. 687 if (MCIA) 688 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask); 689 690 // Initialize writes. 691 unsigned WriteIndex = 0; 692 for (const WriteDescriptor &WD : D.Writes) { 693 unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID 694 : MCI.getOperand(WD.OpIndex).getReg(); 695 // Check if this is a optional definition that references NoReg. 696 if (WD.IsOptionalDef && !RegID) { 697 ++WriteIndex; 698 continue; 699 } 700 701 assert(RegID && "Expected a valid register ID!"); 702 NewIS->getDefs().emplace_back(WD, RegID, 703 /* ClearsSuperRegs */ WriteMask[WriteIndex], 704 /* WritesZero */ IsZeroIdiom); 705 ++WriteIndex; 706 } 707 708 return std::move(NewIS); 709 } 710 } // namespace mca 711 } // namespace llvm 712