1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines a hazard recognizer for the SystemZ scheduler. 11 // 12 // This class is used by the SystemZ scheduling strategy to maintain 13 // the state during scheduling, and provide cost functions for 14 // scheduling candidates. This includes: 15 // 16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and 17 // instructions that always begin a new group should be scheduled when 18 // the current decoder group is empty. 19 // * Processor resources usage. It is beneficial to balance the use of 20 // resources. 21 // 22 // A goal is to consider all instructions, also those outside of any 23 // scheduling region. Such instructions are "advanced" past and include 24 // single instructions before a scheduling region, branches etc. 25 // 26 // A block that has only one predecessor continues scheduling with the state 27 // of it (which may be updated by emitting branches). 28 // 29 // ===---------------------------------------------------------------------===// 30 31 #include "SystemZHazardRecognizer.h" 32 #include "llvm/ADT/Statistic.h" 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "machine-scheduler" 37 38 // This is the limit of processor resource usage at which the 39 // scheduler should try to look for other instructions (not using the 40 // critical resource). 41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, 42 cl::desc("The OOO window for processor " 43 "resources during scheduling."), 44 cl::init(8)); 45 46 unsigned SystemZHazardRecognizer:: 47 getNumDecoderSlots(SUnit *SU) const { 48 const MCSchedClassDesc *SC = getSchedClass(SU); 49 if (!SC->isValid()) 50 return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. 51 52 assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) && 53 "Only cracked instruction can have 2 uops."); 54 assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) && 55 "Expanded instructions always group alone."); 56 assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) && 57 "Expanded instructions fill the group(s)."); 58 59 return SC->NumMicroOps; 60 } 61 62 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const { 63 unsigned Idx = CurrGroupSize; 64 if (GrpCount % 2) 65 Idx += 3; 66 67 if (SU != nullptr && !fitsIntoCurrentGroup(SU)) { 68 if (Idx == 1 || Idx == 2) 69 Idx = 3; 70 else if (Idx == 4 || Idx == 5) 71 Idx = 0; 72 } 73 74 return Idx; 75 } 76 77 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer:: 78 getHazardType(SUnit *m, int Stalls) { 79 return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard); 80 } 81 82 void SystemZHazardRecognizer::Reset() { 83 CurrGroupSize = 0; 84 CurrGroupHas4RegOps = false; 85 clearProcResCounters(); 86 GrpCount = 0; 87 LastFPdOpCycleIdx = UINT_MAX; 88 LastEmittedMI = nullptr; 89 LLVM_DEBUG(CurGroupDbg = "";); 90 } 91 92 bool 93 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { 94 const MCSchedClassDesc *SC = getSchedClass(SU); 95 if (!SC->isValid()) 96 return true; 97 98 // A cracked instruction only fits into schedule if the current 99 // group is empty. 100 if (SC->BeginGroup) 101 return (CurrGroupSize == 0); 102 103 // An instruction with 4 register operands will not fit in last slot. 104 assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) && 105 "Current decoder group is already full!"); 106 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) 107 return false; 108 109 // Since a full group is handled immediately in EmitInstruction(), 110 // SU should fit into current group. NumSlots should be 1 or 0, 111 // since it is not a cracked or expanded instruction. 112 assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) && 113 "Expected normal instruction to fit in non-full group!"); 114 115 return true; 116 } 117 118 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const { 119 const MachineFunction &MF = *MI->getParent()->getParent(); 120 const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); 121 const MCInstrDesc &MID = MI->getDesc(); 122 unsigned Count = 0; 123 for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) { 124 const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF); 125 if (RC == nullptr) 126 continue; 127 if (OpIdx >= MID.getNumDefs() && 128 MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) 129 continue; 130 Count++; 131 } 132 return Count >= 4; 133 } 134 135 void SystemZHazardRecognizer::nextGroup() { 136 if (CurrGroupSize == 0) 137 return; 138 139 LLVM_DEBUG(dumpCurrGroup("Completed decode group")); 140 LLVM_DEBUG(CurGroupDbg = "";); 141 142 int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1); 143 assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) && 144 "Current decoder group bad."); 145 146 // Reset counter for next group. 147 CurrGroupSize = 0; 148 CurrGroupHas4RegOps = false; 149 150 GrpCount += ((unsigned) NumGroups); 151 152 // Decrease counters for execution units. 153 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 154 ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups) 155 ? (ProcResourceCounters[i] - NumGroups) 156 : 0); 157 158 // Clear CriticalResourceIdx if it is now below the threshold. 159 if (CriticalResourceIdx != UINT_MAX && 160 (ProcResourceCounters[CriticalResourceIdx] <= 161 ProcResCostLim)) 162 CriticalResourceIdx = UINT_MAX; 163 164 LLVM_DEBUG(dumpState();); 165 } 166 167 #ifndef NDEBUG // Debug output 168 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { 169 OS << "SU(" << SU->NodeNum << "):"; 170 OS << TII->getName(SU->getInstr()->getOpcode()); 171 172 const MCSchedClassDesc *SC = getSchedClass(SU); 173 if (!SC->isValid()) 174 return; 175 176 for (TargetSchedModel::ProcResIter 177 PI = SchedModel->getWriteProcResBegin(SC), 178 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 179 const MCProcResourceDesc &PRD = 180 *SchedModel->getProcResource(PI->ProcResourceIdx); 181 std::string FU(PRD.Name); 182 // trim e.g. Z13_FXaUnit -> FXa 183 FU = FU.substr(FU.find("_") + 1); 184 size_t Pos = FU.find("Unit"); 185 if (Pos != std::string::npos) 186 FU.resize(Pos); 187 if (FU == "LS") // LSUnit -> LSU 188 FU = "LSU"; 189 OS << "/" << FU; 190 191 if (PI->Cycles > 1) 192 OS << "(" << PI->Cycles << "cyc)"; 193 } 194 195 if (SC->NumMicroOps > 1) 196 OS << "/" << SC->NumMicroOps << "uops"; 197 if (SC->BeginGroup && SC->EndGroup) 198 OS << "/GroupsAlone"; 199 else if (SC->BeginGroup) 200 OS << "/BeginsGroup"; 201 else if (SC->EndGroup) 202 OS << "/EndsGroup"; 203 if (SU->isUnbuffered) 204 OS << "/Unbuffered"; 205 if (has4RegOps(SU->getInstr())) 206 OS << "/4RegOps"; 207 } 208 209 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { 210 dbgs() << "++ " << Msg; 211 dbgs() << ": "; 212 213 if (CurGroupDbg.empty()) 214 dbgs() << " <empty>\n"; 215 else { 216 dbgs() << "{ " << CurGroupDbg << " }"; 217 dbgs() << " (" << CurrGroupSize << " decoder slot" 218 << (CurrGroupSize > 1 ? "s":"") 219 << (CurrGroupHas4RegOps ? ", 4RegOps" : "") 220 << ")\n"; 221 } 222 } 223 224 void SystemZHazardRecognizer::dumpProcResourceCounters() const { 225 bool any = false; 226 227 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 228 if (ProcResourceCounters[i] > 0) { 229 any = true; 230 break; 231 } 232 233 if (!any) 234 return; 235 236 dbgs() << "++ | Resource counters: "; 237 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 238 if (ProcResourceCounters[i] > 0) 239 dbgs() << SchedModel->getProcResource(i)->Name 240 << ":" << ProcResourceCounters[i] << " "; 241 dbgs() << "\n"; 242 243 if (CriticalResourceIdx != UINT_MAX) 244 dbgs() << "++ | Critical resource: " 245 << SchedModel->getProcResource(CriticalResourceIdx)->Name 246 << "\n"; 247 } 248 249 void SystemZHazardRecognizer::dumpState() const { 250 dumpCurrGroup("| Current decoder group"); 251 dbgs() << "++ | Current cycle index: " 252 << getCurrCycleIdx() << "\n"; 253 dumpProcResourceCounters(); 254 if (LastFPdOpCycleIdx != UINT_MAX) 255 dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n"; 256 } 257 258 #endif //NDEBUG 259 260 void SystemZHazardRecognizer::clearProcResCounters() { 261 ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0); 262 CriticalResourceIdx = UINT_MAX; 263 } 264 265 static inline bool isBranchRetTrap(MachineInstr *MI) { 266 return (MI->isBranch() || MI->isReturn() || 267 MI->getOpcode() == SystemZ::CondTrap); 268 } 269 270 // Update state with SU as the next scheduled unit. 271 void SystemZHazardRecognizer:: 272 EmitInstruction(SUnit *SU) { 273 const MCSchedClassDesc *SC = getSchedClass(SU); 274 LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs()); 275 dbgs() << "\n";); 276 LLVM_DEBUG(dumpCurrGroup("Decode group before emission");); 277 278 // If scheduling an SU that must begin a new decoder group, move on 279 // to next group. 280 if (!fitsIntoCurrentGroup(SU)) 281 nextGroup(); 282 283 LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg); 284 if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd);); 285 286 LastEmittedMI = SU->getInstr(); 287 288 // After returning from a call, we don't know much about the state. 289 if (SU->isCall) { 290 LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";); 291 Reset(); 292 LastEmittedMI = SU->getInstr(); 293 return; 294 } 295 296 // Increase counter for execution unit(s). 297 for (TargetSchedModel::ProcResIter 298 PI = SchedModel->getWriteProcResBegin(SC), 299 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 300 // Don't handle FPd together with the other resources. 301 if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1) 302 continue; 303 int &CurrCounter = 304 ProcResourceCounters[PI->ProcResourceIdx]; 305 CurrCounter += PI->Cycles; 306 // Check if this is now the new critical resource. 307 if ((CurrCounter > ProcResCostLim) && 308 (CriticalResourceIdx == UINT_MAX || 309 (PI->ProcResourceIdx != CriticalResourceIdx && 310 CurrCounter > 311 ProcResourceCounters[CriticalResourceIdx]))) { 312 LLVM_DEBUG( 313 dbgs() << "++ New critical resource: " 314 << SchedModel->getProcResource(PI->ProcResourceIdx)->Name 315 << "\n";); 316 CriticalResourceIdx = PI->ProcResourceIdx; 317 } 318 } 319 320 // Make note of an instruction that uses a blocking resource (FPd). 321 if (SU->isUnbuffered) { 322 LastFPdOpCycleIdx = getCurrCycleIdx(SU); 323 LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx 324 << "\n";); 325 } 326 327 // Insert SU into current group by increasing number of slots used 328 // in current group. 329 CurrGroupSize += getNumDecoderSlots(SU); 330 CurrGroupHas4RegOps |= has4RegOps(SU->getInstr()); 331 unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3); 332 assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU)) 333 && "SU does not fit into decoder group!"); 334 335 // Check if current group is now full/ended. If so, move on to next 336 // group to be ready to evaluate more candidates. 337 if (CurrGroupSize >= GroupLim || SC->EndGroup) 338 nextGroup(); 339 } 340 341 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { 342 const MCSchedClassDesc *SC = getSchedClass(SU); 343 if (!SC->isValid()) 344 return 0; 345 346 // If SU begins new group, it can either break a current group early 347 // or fit naturally if current group is empty (negative cost). 348 if (SC->BeginGroup) { 349 if (CurrGroupSize) 350 return 3 - CurrGroupSize; 351 return -1; 352 } 353 354 // Similarly, a group-ending SU may either fit well (last in group), or 355 // end the group prematurely. 356 if (SC->EndGroup) { 357 unsigned resultingGroupSize = 358 (CurrGroupSize + getNumDecoderSlots(SU)); 359 if (resultingGroupSize < 3) 360 return (3 - resultingGroupSize); 361 return -1; 362 } 363 364 // An instruction with 4 register operands will not fit in last slot. 365 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) 366 return 1; 367 368 // Most instructions can be placed in any decoder slot. 369 return 0; 370 } 371 372 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const { 373 assert (SU->isUnbuffered); 374 // If this is the first FPd op, it should be scheduled high. 375 if (LastFPdOpCycleIdx == UINT_MAX) 376 return true; 377 // If this is not the first PFd op, it should go into the other side 378 // of the processor to use the other FPd unit there. This should 379 // generally happen if two FPd ops are placed with 2 other 380 // instructions between them (modulo 6). 381 unsigned SUCycleIdx = getCurrCycleIdx(SU); 382 if (LastFPdOpCycleIdx > SUCycleIdx) 383 return ((LastFPdOpCycleIdx - SUCycleIdx) == 3); 384 return ((SUCycleIdx - LastFPdOpCycleIdx) == 3); 385 } 386 387 int SystemZHazardRecognizer:: 388 resourcesCost(SUnit *SU) { 389 int Cost = 0; 390 391 const MCSchedClassDesc *SC = getSchedClass(SU); 392 if (!SC->isValid()) 393 return 0; 394 395 // For a FPd op, either return min or max value as indicated by the 396 // distance to any prior FPd op. 397 if (SU->isUnbuffered) 398 Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX); 399 // For other instructions, give a cost to the use of the critical resource. 400 else if (CriticalResourceIdx != UINT_MAX) { 401 for (TargetSchedModel::ProcResIter 402 PI = SchedModel->getWriteProcResBegin(SC), 403 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) 404 if (PI->ProcResourceIdx == CriticalResourceIdx) 405 Cost = PI->Cycles; 406 } 407 408 return Cost; 409 } 410 411 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, 412 bool TakenBranch) { 413 // Make a temporary SUnit. 414 SUnit SU(MI, 0); 415 416 // Set interesting flags. 417 SU.isCall = MI->isCall(); 418 419 const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); 420 for (const MCWriteProcResEntry &PRE : 421 make_range(SchedModel->getWriteProcResBegin(SC), 422 SchedModel->getWriteProcResEnd(SC))) { 423 switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { 424 case 0: 425 SU.hasReservedResource = true; 426 break; 427 case 1: 428 SU.isUnbuffered = true; 429 break; 430 default: 431 break; 432 } 433 } 434 435 unsigned GroupSizeBeforeEmit = CurrGroupSize; 436 EmitInstruction(&SU); 437 438 if (!TakenBranch && isBranchRetTrap(MI)) { 439 // NT Branch on second slot ends group. 440 if (GroupSizeBeforeEmit == 1) 441 nextGroup(); 442 } 443 444 if (TakenBranch && CurrGroupSize > 0) 445 nextGroup(); 446 447 assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && 448 "Scheduler: unhandled terminator!"); 449 } 450 451 void SystemZHazardRecognizer:: 452 copyState(SystemZHazardRecognizer *Incoming) { 453 // Current decoder group 454 CurrGroupSize = Incoming->CurrGroupSize; 455 LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;); 456 457 // Processor resources 458 ProcResourceCounters = Incoming->ProcResourceCounters; 459 CriticalResourceIdx = Incoming->CriticalResourceIdx; 460 461 // FPd 462 LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; 463 GrpCount = Incoming->GrpCount; 464 } 465