1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines a hazard recognizer for the SystemZ scheduler. 11 // 12 // This class is used by the SystemZ scheduling strategy to maintain 13 // the state during scheduling, and provide cost functions for 14 // scheduling candidates. This includes: 15 // 16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and 17 // instructions that always begin a new group should be scheduled when 18 // the current decoder group is empty. 19 // * Processor resources usage. It is beneficial to balance the use of 20 // resources. 21 // 22 // A goal is to consider all instructions, also those outside of any 23 // scheduling region. Such instructions are "advanced" past and include 24 // single instructions before a scheduling region, branches etc. 25 // 26 // A block that has only one predecessor continues scheduling with the state 27 // of it (which may be updated by emitting branches). 28 // 29 // ===---------------------------------------------------------------------===// 30 31 #include "SystemZHazardRecognizer.h" 32 #include "llvm/ADT/Statistic.h" 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "machine-scheduler" 37 38 // This is the limit of processor resource usage at which the 39 // scheduler should try to look for other instructions (not using the 40 // critical resource). 41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, 42 cl::desc("The OOO window for processor " 43 "resources during scheduling."), 44 cl::init(8)); 45 46 unsigned SystemZHazardRecognizer:: 47 getNumDecoderSlots(SUnit *SU) const { 48 const MCSchedClassDesc *SC = getSchedClass(SU); 49 if (!SC->isValid()) 50 return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. 51 52 if (SC->BeginGroup) { 53 if (!SC->EndGroup) 54 return 2; // Cracked instruction 55 else 56 return 3; // Expanded/group-alone instruction 57 } 58 59 return 1; // Normal instruction 60 } 61 62 unsigned SystemZHazardRecognizer::getCurrCycleIdx() const { 63 unsigned Idx = CurrGroupSize; 64 if (GrpCount % 2) 65 Idx += 3; 66 return Idx; 67 } 68 69 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer:: 70 getHazardType(SUnit *m, int Stalls) { 71 return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard); 72 } 73 74 void SystemZHazardRecognizer::Reset() { 75 CurrGroupSize = 0; 76 clearProcResCounters(); 77 GrpCount = 0; 78 LastFPdOpCycleIdx = UINT_MAX; 79 LastEmittedMI = nullptr; 80 DEBUG(CurGroupDbg = "";); 81 } 82 83 bool 84 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { 85 const MCSchedClassDesc *SC = getSchedClass(SU); 86 if (!SC->isValid()) 87 return true; 88 89 // A cracked instruction only fits into schedule if the current 90 // group is empty. 91 if (SC->BeginGroup) 92 return (CurrGroupSize == 0); 93 94 // Since a full group is handled immediately in EmitInstruction(), 95 // SU should fit into current group. NumSlots should be 1 or 0, 96 // since it is not a cracked or expanded instruction. 97 assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) && 98 "Expected normal instruction to fit in non-full group!"); 99 100 return true; 101 } 102 103 void SystemZHazardRecognizer::nextGroup() { 104 if (CurrGroupSize == 0) 105 return; 106 107 DEBUG(dumpCurrGroup("Completed decode group")); 108 DEBUG(CurGroupDbg = "";); 109 110 GrpCount++; 111 112 // Reset counter for next group. 113 CurrGroupSize = 0; 114 115 // Decrease counters for execution units by one. 116 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 117 if (ProcResourceCounters[i] > 0) 118 ProcResourceCounters[i]--; 119 120 // Clear CriticalResourceIdx if it is now below the threshold. 121 if (CriticalResourceIdx != UINT_MAX && 122 (ProcResourceCounters[CriticalResourceIdx] <= 123 ProcResCostLim)) 124 CriticalResourceIdx = UINT_MAX; 125 126 DEBUG(dumpState();); 127 } 128 129 #ifndef NDEBUG // Debug output 130 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { 131 OS << "SU(" << SU->NodeNum << "):"; 132 OS << TII->getName(SU->getInstr()->getOpcode()); 133 134 const MCSchedClassDesc *SC = getSchedClass(SU); 135 if (!SC->isValid()) 136 return; 137 138 for (TargetSchedModel::ProcResIter 139 PI = SchedModel->getWriteProcResBegin(SC), 140 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 141 const MCProcResourceDesc &PRD = 142 *SchedModel->getProcResource(PI->ProcResourceIdx); 143 std::string FU(PRD.Name); 144 // trim e.g. Z13_FXaUnit -> FXa 145 FU = FU.substr(FU.find("_") + 1); 146 FU.resize(FU.find("Unit")); 147 OS << "/" << FU; 148 149 if (PI->Cycles > 1) 150 OS << "(" << PI->Cycles << "cyc)"; 151 } 152 153 if (SC->NumMicroOps > 1) 154 OS << "/" << SC->NumMicroOps << "uops"; 155 if (SC->BeginGroup && SC->EndGroup) 156 OS << "/GroupsAlone"; 157 else if (SC->BeginGroup) 158 OS << "/BeginsGroup"; 159 else if (SC->EndGroup) 160 OS << "/EndsGroup"; 161 if (SU->isUnbuffered) 162 OS << "/Unbuffered"; 163 } 164 165 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { 166 dbgs() << "++ " << Msg; 167 dbgs() << ": "; 168 169 if (CurGroupDbg.empty()) 170 dbgs() << " <empty>\n"; 171 else { 172 dbgs() << "{ " << CurGroupDbg << " }"; 173 dbgs() << " (" << CurrGroupSize << " decoder slot" 174 << (CurrGroupSize > 1 ? "s":"") 175 << ")\n"; 176 } 177 } 178 179 void SystemZHazardRecognizer::dumpProcResourceCounters() const { 180 bool any = false; 181 182 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 183 if (ProcResourceCounters[i] > 0) { 184 any = true; 185 break; 186 } 187 188 if (!any) 189 return; 190 191 dbgs() << "++ | Resource counters: "; 192 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 193 if (ProcResourceCounters[i] > 0) 194 dbgs() << SchedModel->getProcResource(i)->Name 195 << ":" << ProcResourceCounters[i] << " "; 196 dbgs() << "\n"; 197 198 if (CriticalResourceIdx != UINT_MAX) 199 dbgs() << "++ | Critical resource: " 200 << SchedModel->getProcResource(CriticalResourceIdx)->Name 201 << "\n"; 202 } 203 204 void SystemZHazardRecognizer::dumpState() const { 205 dumpCurrGroup("| Current decoder group"); 206 dbgs() << "++ | Current cycle index: " 207 << getCurrCycleIdx() << "\n"; 208 dumpProcResourceCounters(); 209 if (LastFPdOpCycleIdx != UINT_MAX) 210 dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n"; 211 } 212 213 #endif //NDEBUG 214 215 void SystemZHazardRecognizer::clearProcResCounters() { 216 ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0); 217 CriticalResourceIdx = UINT_MAX; 218 } 219 220 static inline bool isBranchRetTrap(MachineInstr *MI) { 221 return (MI->isBranch() || MI->isReturn() || 222 MI->getOpcode() == SystemZ::CondTrap); 223 } 224 225 // Update state with SU as the next scheduled unit. 226 void SystemZHazardRecognizer:: 227 EmitInstruction(SUnit *SU) { 228 const MCSchedClassDesc *SC = getSchedClass(SU); 229 DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs()); 230 dbgs() << "\n";); 231 DEBUG(dumpCurrGroup("Decode group before emission");); 232 233 // If scheduling an SU that must begin a new decoder group, move on 234 // to next group. 235 if (!fitsIntoCurrentGroup(SU)) 236 nextGroup(); 237 238 DEBUG(raw_string_ostream cgd(CurGroupDbg); 239 if (CurGroupDbg.length()) 240 cgd << ", "; 241 dumpSU(SU, cgd);); 242 243 LastEmittedMI = SU->getInstr(); 244 245 // After returning from a call, we don't know much about the state. 246 if (SU->isCall) { 247 DEBUG(dbgs() << "++ Clearing state after call.\n";); 248 clearProcResCounters(); 249 LastFPdOpCycleIdx = UINT_MAX; 250 CurrGroupSize += getNumDecoderSlots(SU); 251 assert (CurrGroupSize <= 3); 252 nextGroup(); 253 return; 254 } 255 256 // Increase counter for execution unit(s). 257 for (TargetSchedModel::ProcResIter 258 PI = SchedModel->getWriteProcResBegin(SC), 259 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 260 // Don't handle FPd together with the other resources. 261 if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1) 262 continue; 263 int &CurrCounter = 264 ProcResourceCounters[PI->ProcResourceIdx]; 265 CurrCounter += PI->Cycles; 266 // Check if this is now the new critical resource. 267 if ((CurrCounter > ProcResCostLim) && 268 (CriticalResourceIdx == UINT_MAX || 269 (PI->ProcResourceIdx != CriticalResourceIdx && 270 CurrCounter > 271 ProcResourceCounters[CriticalResourceIdx]))) { 272 DEBUG(dbgs() << "++ New critical resource: " 273 << SchedModel->getProcResource(PI->ProcResourceIdx)->Name 274 << "\n";); 275 CriticalResourceIdx = PI->ProcResourceIdx; 276 } 277 } 278 279 // Make note of an instruction that uses a blocking resource (FPd). 280 if (SU->isUnbuffered) { 281 LastFPdOpCycleIdx = getCurrCycleIdx(); 282 DEBUG(dbgs() << "++ Last FPd cycle index: " 283 << LastFPdOpCycleIdx << "\n";); 284 } 285 286 // Insert SU into current group by increasing number of slots used 287 // in current group. 288 CurrGroupSize += getNumDecoderSlots(SU); 289 assert (CurrGroupSize <= 3); 290 291 // Check if current group is now full/ended. If so, move on to next 292 // group to be ready to evaluate more candidates. 293 if (CurrGroupSize == 3 || SC->EndGroup) 294 nextGroup(); 295 } 296 297 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { 298 const MCSchedClassDesc *SC = getSchedClass(SU); 299 if (!SC->isValid()) 300 return 0; 301 302 // If SU begins new group, it can either break a current group early 303 // or fit naturally if current group is empty (negative cost). 304 if (SC->BeginGroup) { 305 if (CurrGroupSize) 306 return 3 - CurrGroupSize; 307 return -1; 308 } 309 310 // Similarly, a group-ending SU may either fit well (last in group), or 311 // end the group prematurely. 312 if (SC->EndGroup) { 313 unsigned resultingGroupSize = 314 (CurrGroupSize + getNumDecoderSlots(SU)); 315 if (resultingGroupSize < 3) 316 return (3 - resultingGroupSize); 317 return -1; 318 } 319 320 // Most instructions can be placed in any decoder slot. 321 return 0; 322 } 323 324 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(const SUnit *SU) { 325 assert (SU->isUnbuffered); 326 // If this is the first FPd op, it should be scheduled high. 327 if (LastFPdOpCycleIdx == UINT_MAX) 328 return true; 329 // If this is not the first PFd op, it should go into the other side 330 // of the processor to use the other FPd unit there. This should 331 // generally happen if two FPd ops are placed with 2 other 332 // instructions between them (modulo 6). 333 if (LastFPdOpCycleIdx > getCurrCycleIdx()) 334 return ((LastFPdOpCycleIdx - getCurrCycleIdx()) == 3); 335 return ((getCurrCycleIdx() - LastFPdOpCycleIdx) == 3); 336 } 337 338 int SystemZHazardRecognizer:: 339 resourcesCost(SUnit *SU) { 340 int Cost = 0; 341 342 const MCSchedClassDesc *SC = getSchedClass(SU); 343 if (!SC->isValid()) 344 return 0; 345 346 // For a FPd op, either return min or max value as indicated by the 347 // distance to any prior FPd op. 348 if (SU->isUnbuffered) 349 Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX); 350 // For other instructions, give a cost to the use of the critical resource. 351 else if (CriticalResourceIdx != UINT_MAX) { 352 for (TargetSchedModel::ProcResIter 353 PI = SchedModel->getWriteProcResBegin(SC), 354 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) 355 if (PI->ProcResourceIdx == CriticalResourceIdx) 356 Cost = PI->Cycles; 357 } 358 359 return Cost; 360 } 361 362 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, 363 bool TakenBranch) { 364 // Make a temporary SUnit. 365 SUnit SU(MI, 0); 366 367 // Set interesting flags. 368 SU.isCall = MI->isCall(); 369 370 const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); 371 for (const MCWriteProcResEntry &PRE : 372 make_range(SchedModel->getWriteProcResBegin(SC), 373 SchedModel->getWriteProcResEnd(SC))) { 374 switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { 375 case 0: 376 SU.hasReservedResource = true; 377 break; 378 case 1: 379 SU.isUnbuffered = true; 380 break; 381 default: 382 break; 383 } 384 } 385 386 unsigned GroupSizeBeforeEmit = CurrGroupSize; 387 EmitInstruction(&SU); 388 389 if (!TakenBranch && isBranchRetTrap(MI)) { 390 // NT Branch on second slot ends group. 391 if (GroupSizeBeforeEmit == 1) 392 nextGroup(); 393 } 394 395 if (TakenBranch && CurrGroupSize > 0) 396 nextGroup(); 397 398 assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && 399 "Scheduler: unhandled terminator!"); 400 } 401 402 void SystemZHazardRecognizer:: 403 copyState(SystemZHazardRecognizer *Incoming) { 404 // Current decoder group 405 CurrGroupSize = Incoming->CurrGroupSize; 406 DEBUG(CurGroupDbg = Incoming->CurGroupDbg;); 407 408 // Processor resources 409 ProcResourceCounters = Incoming->ProcResourceCounters; 410 CriticalResourceIdx = Incoming->CriticalResourceIdx; 411 412 // FPd 413 LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; 414 GrpCount = Incoming->GrpCount; 415 } 416