1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines a hazard recognizer for the SystemZ scheduler. 11 // 12 // This class is used by the SystemZ scheduling strategy to maintain 13 // the state during scheduling, and provide cost functions for 14 // scheduling candidates. This includes: 15 // 16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and 17 // instructions that always begin a new group should be scheduled when 18 // the current decoder group is empty. 19 // * Processor resources usage. It is beneficial to balance the use of 20 // resources. 21 // 22 // A goal is to consider all instructions, also those outside of any 23 // scheduling region. Such instructions are "advanced" past and include 24 // single instructions before a scheduling region, branches etc. 25 // 26 // A block that has only one predecessor continues scheduling with the state 27 // of it (which may be updated by emitting branches). 28 // 29 // ===---------------------------------------------------------------------===// 30 31 #include "SystemZHazardRecognizer.h" 32 #include "llvm/ADT/Statistic.h" 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "machine-scheduler" 37 38 // This is the limit of processor resource usage at which the 39 // scheduler should try to look for other instructions (not using the 40 // critical resource). 41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, 42 cl::desc("The OOO window for processor " 43 "resources during scheduling."), 44 cl::init(8)); 45 46 unsigned SystemZHazardRecognizer:: 47 getNumDecoderSlots(SUnit *SU) const { 48 const MCSchedClassDesc *SC = getSchedClass(SU); 49 if (!SC->isValid()) 50 return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. 51 52 if (SC->BeginGroup) { 53 if (!SC->EndGroup) 54 return 2; // Cracked instruction 55 else 56 return 3; // Expanded/group-alone instruction 57 } 58 59 return 1; // Normal instruction 60 } 61 62 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const { 63 unsigned Idx = CurrGroupSize; 64 if (GrpCount % 2) 65 Idx += 3; 66 67 if (SU != nullptr && !fitsIntoCurrentGroup(SU)) { 68 if (Idx == 1 || Idx == 2) 69 Idx = 3; 70 else if (Idx == 4 || Idx == 5) 71 Idx = 0; 72 } 73 74 return Idx; 75 } 76 77 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer:: 78 getHazardType(SUnit *m, int Stalls) { 79 return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard); 80 } 81 82 void SystemZHazardRecognizer::Reset() { 83 CurrGroupSize = 0; 84 clearProcResCounters(); 85 GrpCount = 0; 86 LastFPdOpCycleIdx = UINT_MAX; 87 LastEmittedMI = nullptr; 88 LLVM_DEBUG(CurGroupDbg = "";); 89 } 90 91 bool 92 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { 93 const MCSchedClassDesc *SC = getSchedClass(SU); 94 if (!SC->isValid()) 95 return true; 96 97 // A cracked instruction only fits into schedule if the current 98 // group is empty. 99 if (SC->BeginGroup) 100 return (CurrGroupSize == 0); 101 102 // Since a full group is handled immediately in EmitInstruction(), 103 // SU should fit into current group. NumSlots should be 1 or 0, 104 // since it is not a cracked or expanded instruction. 105 assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) && 106 "Expected normal instruction to fit in non-full group!"); 107 108 return true; 109 } 110 111 void SystemZHazardRecognizer::nextGroup() { 112 if (CurrGroupSize == 0) 113 return; 114 115 LLVM_DEBUG(dumpCurrGroup("Completed decode group")); 116 LLVM_DEBUG(CurGroupDbg = "";); 117 118 GrpCount++; 119 120 // Reset counter for next group. 121 CurrGroupSize = 0; 122 123 // Decrease counters for execution units by one. 124 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 125 if (ProcResourceCounters[i] > 0) 126 ProcResourceCounters[i]--; 127 128 // Clear CriticalResourceIdx if it is now below the threshold. 129 if (CriticalResourceIdx != UINT_MAX && 130 (ProcResourceCounters[CriticalResourceIdx] <= 131 ProcResCostLim)) 132 CriticalResourceIdx = UINT_MAX; 133 134 LLVM_DEBUG(dumpState();); 135 } 136 137 #ifndef NDEBUG // Debug output 138 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { 139 OS << "SU(" << SU->NodeNum << "):"; 140 OS << TII->getName(SU->getInstr()->getOpcode()); 141 142 const MCSchedClassDesc *SC = getSchedClass(SU); 143 if (!SC->isValid()) 144 return; 145 146 for (TargetSchedModel::ProcResIter 147 PI = SchedModel->getWriteProcResBegin(SC), 148 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 149 const MCProcResourceDesc &PRD = 150 *SchedModel->getProcResource(PI->ProcResourceIdx); 151 std::string FU(PRD.Name); 152 // trim e.g. Z13_FXaUnit -> FXa 153 FU = FU.substr(FU.find("_") + 1); 154 size_t Pos = FU.find("Unit"); 155 if (Pos != std::string::npos) 156 FU.resize(Pos); 157 if (FU == "LS") // LSUnit -> LSU 158 FU = "LSU"; 159 OS << "/" << FU; 160 161 if (PI->Cycles > 1) 162 OS << "(" << PI->Cycles << "cyc)"; 163 } 164 165 if (SC->NumMicroOps > 1) 166 OS << "/" << SC->NumMicroOps << "uops"; 167 if (SC->BeginGroup && SC->EndGroup) 168 OS << "/GroupsAlone"; 169 else if (SC->BeginGroup) 170 OS << "/BeginsGroup"; 171 else if (SC->EndGroup) 172 OS << "/EndsGroup"; 173 if (SU->isUnbuffered) 174 OS << "/Unbuffered"; 175 } 176 177 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { 178 dbgs() << "++ " << Msg; 179 dbgs() << ": "; 180 181 if (CurGroupDbg.empty()) 182 dbgs() << " <empty>\n"; 183 else { 184 dbgs() << "{ " << CurGroupDbg << " }"; 185 dbgs() << " (" << CurrGroupSize << " decoder slot" 186 << (CurrGroupSize > 1 ? "s":"") 187 << ")\n"; 188 } 189 } 190 191 void SystemZHazardRecognizer::dumpProcResourceCounters() const { 192 bool any = false; 193 194 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 195 if (ProcResourceCounters[i] > 0) { 196 any = true; 197 break; 198 } 199 200 if (!any) 201 return; 202 203 dbgs() << "++ | Resource counters: "; 204 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 205 if (ProcResourceCounters[i] > 0) 206 dbgs() << SchedModel->getProcResource(i)->Name 207 << ":" << ProcResourceCounters[i] << " "; 208 dbgs() << "\n"; 209 210 if (CriticalResourceIdx != UINT_MAX) 211 dbgs() << "++ | Critical resource: " 212 << SchedModel->getProcResource(CriticalResourceIdx)->Name 213 << "\n"; 214 } 215 216 void SystemZHazardRecognizer::dumpState() const { 217 dumpCurrGroup("| Current decoder group"); 218 dbgs() << "++ | Current cycle index: " 219 << getCurrCycleIdx() << "\n"; 220 dumpProcResourceCounters(); 221 if (LastFPdOpCycleIdx != UINT_MAX) 222 dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n"; 223 } 224 225 #endif //NDEBUG 226 227 void SystemZHazardRecognizer::clearProcResCounters() { 228 ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0); 229 CriticalResourceIdx = UINT_MAX; 230 } 231 232 static inline bool isBranchRetTrap(MachineInstr *MI) { 233 return (MI->isBranch() || MI->isReturn() || 234 MI->getOpcode() == SystemZ::CondTrap); 235 } 236 237 // Update state with SU as the next scheduled unit. 238 void SystemZHazardRecognizer:: 239 EmitInstruction(SUnit *SU) { 240 const MCSchedClassDesc *SC = getSchedClass(SU); 241 LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs()); 242 dbgs() << "\n";); 243 LLVM_DEBUG(dumpCurrGroup("Decode group before emission");); 244 245 // If scheduling an SU that must begin a new decoder group, move on 246 // to next group. 247 if (!fitsIntoCurrentGroup(SU)) 248 nextGroup(); 249 250 LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg); 251 if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd);); 252 253 LastEmittedMI = SU->getInstr(); 254 255 // After returning from a call, we don't know much about the state. 256 if (SU->isCall) { 257 LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";); 258 Reset(); 259 LastEmittedMI = SU->getInstr(); 260 return; 261 } 262 263 // Increase counter for execution unit(s). 264 for (TargetSchedModel::ProcResIter 265 PI = SchedModel->getWriteProcResBegin(SC), 266 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 267 // Don't handle FPd together with the other resources. 268 if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1) 269 continue; 270 int &CurrCounter = 271 ProcResourceCounters[PI->ProcResourceIdx]; 272 CurrCounter += PI->Cycles; 273 // Check if this is now the new critical resource. 274 if ((CurrCounter > ProcResCostLim) && 275 (CriticalResourceIdx == UINT_MAX || 276 (PI->ProcResourceIdx != CriticalResourceIdx && 277 CurrCounter > 278 ProcResourceCounters[CriticalResourceIdx]))) { 279 LLVM_DEBUG( 280 dbgs() << "++ New critical resource: " 281 << SchedModel->getProcResource(PI->ProcResourceIdx)->Name 282 << "\n";); 283 CriticalResourceIdx = PI->ProcResourceIdx; 284 } 285 } 286 287 // Make note of an instruction that uses a blocking resource (FPd). 288 if (SU->isUnbuffered) { 289 LastFPdOpCycleIdx = getCurrCycleIdx(SU); 290 LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx 291 << "\n";); 292 } 293 294 // Insert SU into current group by increasing number of slots used 295 // in current group. 296 CurrGroupSize += getNumDecoderSlots(SU); 297 assert (CurrGroupSize <= 3); 298 299 // Check if current group is now full/ended. If so, move on to next 300 // group to be ready to evaluate more candidates. 301 if (CurrGroupSize == 3 || SC->EndGroup) 302 nextGroup(); 303 } 304 305 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { 306 const MCSchedClassDesc *SC = getSchedClass(SU); 307 if (!SC->isValid()) 308 return 0; 309 310 // If SU begins new group, it can either break a current group early 311 // or fit naturally if current group is empty (negative cost). 312 if (SC->BeginGroup) { 313 if (CurrGroupSize) 314 return 3 - CurrGroupSize; 315 return -1; 316 } 317 318 // Similarly, a group-ending SU may either fit well (last in group), or 319 // end the group prematurely. 320 if (SC->EndGroup) { 321 unsigned resultingGroupSize = 322 (CurrGroupSize + getNumDecoderSlots(SU)); 323 if (resultingGroupSize < 3) 324 return (3 - resultingGroupSize); 325 return -1; 326 } 327 328 // Most instructions can be placed in any decoder slot. 329 return 0; 330 } 331 332 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const { 333 assert (SU->isUnbuffered); 334 // If this is the first FPd op, it should be scheduled high. 335 if (LastFPdOpCycleIdx == UINT_MAX) 336 return true; 337 // If this is not the first PFd op, it should go into the other side 338 // of the processor to use the other FPd unit there. This should 339 // generally happen if two FPd ops are placed with 2 other 340 // instructions between them (modulo 6). 341 unsigned SUCycleIdx = getCurrCycleIdx(SU); 342 if (LastFPdOpCycleIdx > SUCycleIdx) 343 return ((LastFPdOpCycleIdx - SUCycleIdx) == 3); 344 return ((SUCycleIdx - LastFPdOpCycleIdx) == 3); 345 } 346 347 int SystemZHazardRecognizer:: 348 resourcesCost(SUnit *SU) { 349 int Cost = 0; 350 351 const MCSchedClassDesc *SC = getSchedClass(SU); 352 if (!SC->isValid()) 353 return 0; 354 355 // For a FPd op, either return min or max value as indicated by the 356 // distance to any prior FPd op. 357 if (SU->isUnbuffered) 358 Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX); 359 // For other instructions, give a cost to the use of the critical resource. 360 else if (CriticalResourceIdx != UINT_MAX) { 361 for (TargetSchedModel::ProcResIter 362 PI = SchedModel->getWriteProcResBegin(SC), 363 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) 364 if (PI->ProcResourceIdx == CriticalResourceIdx) 365 Cost = PI->Cycles; 366 } 367 368 return Cost; 369 } 370 371 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, 372 bool TakenBranch) { 373 // Make a temporary SUnit. 374 SUnit SU(MI, 0); 375 376 // Set interesting flags. 377 SU.isCall = MI->isCall(); 378 379 const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); 380 for (const MCWriteProcResEntry &PRE : 381 make_range(SchedModel->getWriteProcResBegin(SC), 382 SchedModel->getWriteProcResEnd(SC))) { 383 switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { 384 case 0: 385 SU.hasReservedResource = true; 386 break; 387 case 1: 388 SU.isUnbuffered = true; 389 break; 390 default: 391 break; 392 } 393 } 394 395 unsigned GroupSizeBeforeEmit = CurrGroupSize; 396 EmitInstruction(&SU); 397 398 if (!TakenBranch && isBranchRetTrap(MI)) { 399 // NT Branch on second slot ends group. 400 if (GroupSizeBeforeEmit == 1) 401 nextGroup(); 402 } 403 404 if (TakenBranch && CurrGroupSize > 0) 405 nextGroup(); 406 407 assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && 408 "Scheduler: unhandled terminator!"); 409 } 410 411 void SystemZHazardRecognizer:: 412 copyState(SystemZHazardRecognizer *Incoming) { 413 // Current decoder group 414 CurrGroupSize = Incoming->CurrGroupSize; 415 LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;); 416 417 // Processor resources 418 ProcResourceCounters = Incoming->ProcResourceCounters; 419 CriticalResourceIdx = Incoming->CriticalResourceIdx; 420 421 // FPd 422 LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; 423 GrpCount = Incoming->GrpCount; 424 } 425