1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines a hazard recognizer for the SystemZ scheduler. 11 // 12 // This class is used by the SystemZ scheduling strategy to maintain 13 // the state during scheduling, and provide cost functions for 14 // scheduling candidates. This includes: 15 // 16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and 17 // instructions that always begin a new group should be scheduled when 18 // the current decoder group is empty. 19 // * Processor resources usage. It is beneficial to balance the use of 20 // resources. 21 // 22 // A goal is to consider all instructions, also those outside of any 23 // scheduling region. Such instructions are "advanced" past and include 24 // single instructions before a scheduling region, branches etc. 25 // 26 // A block that has only one predecessor continues scheduling with the state 27 // of it (which may be updated by emitting branches). 28 // 29 // ===---------------------------------------------------------------------===// 30 31 #include "SystemZHazardRecognizer.h" 32 #include "llvm/ADT/Statistic.h" 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "machine-scheduler" 37 38 // This is the limit of processor resource usage at which the 39 // scheduler should try to look for other instructions (not using the 40 // critical resource). 41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, 42 cl::desc("The OOO window for processor " 43 "resources during scheduling."), 44 cl::init(8)); 45 46 unsigned SystemZHazardRecognizer:: 47 getNumDecoderSlots(SUnit *SU) const { 48 const MCSchedClassDesc *SC = getSchedClass(SU); 49 if (!SC->isValid()) 50 return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. 51 52 assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) && 53 "Only cracked instruction can have 2 uops."); 54 assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) && 55 "Expanded instructions always group alone."); 56 assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) && 57 "Expanded instructions fill the group(s)."); 58 59 return SC->NumMicroOps; 60 } 61 62 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const { 63 unsigned Idx = CurrGroupSize; 64 if (GrpCount % 2) 65 Idx += 3; 66 67 if (SU != nullptr && !fitsIntoCurrentGroup(SU)) { 68 if (Idx == 1 || Idx == 2) 69 Idx = 3; 70 else if (Idx == 4 || Idx == 5) 71 Idx = 0; 72 } 73 74 return Idx; 75 } 76 77 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer:: 78 getHazardType(SUnit *m, int Stalls) { 79 return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard); 80 } 81 82 void SystemZHazardRecognizer::Reset() { 83 CurrGroupSize = 0; 84 CurrGroupHas4RegOps = false; 85 clearProcResCounters(); 86 GrpCount = 0; 87 LastFPdOpCycleIdx = UINT_MAX; 88 LastEmittedMI = nullptr; 89 LLVM_DEBUG(CurGroupDbg = "";); 90 } 91 92 bool 93 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { 94 const MCSchedClassDesc *SC = getSchedClass(SU); 95 if (!SC->isValid()) 96 return true; 97 98 // A cracked instruction only fits into schedule if the current 99 // group is empty. 100 if (SC->BeginGroup) 101 return (CurrGroupSize == 0); 102 103 // An instruction with 4 register operands will not fit in last slot. 104 assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) && 105 "Current decoder group is already full!"); 106 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) 107 return false; 108 109 // Since a full group is handled immediately in EmitInstruction(), 110 // SU should fit into current group. NumSlots should be 1 or 0, 111 // since it is not a cracked or expanded instruction. 112 assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) && 113 "Expected normal instruction to fit in non-full group!"); 114 115 return true; 116 } 117 118 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const { 119 const MachineFunction &MF = *MI->getParent()->getParent(); 120 const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); 121 const MCInstrDesc &MID = MI->getDesc(); 122 unsigned Count = 0; 123 for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) { 124 const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF); 125 if (RC == nullptr) 126 continue; 127 if (OpIdx >= MID.getNumDefs() && 128 MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) 129 continue; 130 Count++; 131 } 132 return Count >= 4; 133 } 134 135 void SystemZHazardRecognizer::nextGroup() { 136 if (CurrGroupSize == 0) 137 return; 138 139 LLVM_DEBUG(dumpCurrGroup("Completed decode group")); 140 LLVM_DEBUG(CurGroupDbg = "";); 141 142 int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1); 143 assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) && 144 "Current decoder group bad."); 145 146 // Reset counter for next group. 147 CurrGroupSize = 0; 148 CurrGroupHas4RegOps = false; 149 150 GrpCount += ((unsigned) NumGroups); 151 152 // Decrease counters for execution units by one. 153 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 154 if (ProcResourceCounters[i] > 0) 155 ProcResourceCounters[i] = 156 ((ProcResourceCounters[i] > NumGroups) ? 157 (ProcResourceCounters[i] - NumGroups) : 0); 158 159 // Clear CriticalResourceIdx if it is now below the threshold. 160 if (CriticalResourceIdx != UINT_MAX && 161 (ProcResourceCounters[CriticalResourceIdx] <= 162 ProcResCostLim)) 163 CriticalResourceIdx = UINT_MAX; 164 165 LLVM_DEBUG(dumpState();); 166 } 167 168 #ifndef NDEBUG // Debug output 169 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { 170 OS << "SU(" << SU->NodeNum << "):"; 171 OS << TII->getName(SU->getInstr()->getOpcode()); 172 173 const MCSchedClassDesc *SC = getSchedClass(SU); 174 if (!SC->isValid()) 175 return; 176 177 for (TargetSchedModel::ProcResIter 178 PI = SchedModel->getWriteProcResBegin(SC), 179 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 180 const MCProcResourceDesc &PRD = 181 *SchedModel->getProcResource(PI->ProcResourceIdx); 182 std::string FU(PRD.Name); 183 // trim e.g. Z13_FXaUnit -> FXa 184 FU = FU.substr(FU.find("_") + 1); 185 size_t Pos = FU.find("Unit"); 186 if (Pos != std::string::npos) 187 FU.resize(Pos); 188 if (FU == "LS") // LSUnit -> LSU 189 FU = "LSU"; 190 OS << "/" << FU; 191 192 if (PI->Cycles > 1) 193 OS << "(" << PI->Cycles << "cyc)"; 194 } 195 196 if (SC->NumMicroOps > 1) 197 OS << "/" << SC->NumMicroOps << "uops"; 198 if (SC->BeginGroup && SC->EndGroup) 199 OS << "/GroupsAlone"; 200 else if (SC->BeginGroup) 201 OS << "/BeginsGroup"; 202 else if (SC->EndGroup) 203 OS << "/EndsGroup"; 204 if (SU->isUnbuffered) 205 OS << "/Unbuffered"; 206 if (has4RegOps(SU->getInstr())) 207 OS << "/4RegOps"; 208 } 209 210 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { 211 dbgs() << "++ " << Msg; 212 dbgs() << ": "; 213 214 if (CurGroupDbg.empty()) 215 dbgs() << " <empty>\n"; 216 else { 217 dbgs() << "{ " << CurGroupDbg << " }"; 218 dbgs() << " (" << CurrGroupSize << " decoder slot" 219 << (CurrGroupSize > 1 ? "s":"") 220 << (CurrGroupHas4RegOps ? ", 4RegOps" : "") 221 << ")\n"; 222 } 223 } 224 225 void SystemZHazardRecognizer::dumpProcResourceCounters() const { 226 bool any = false; 227 228 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 229 if (ProcResourceCounters[i] > 0) { 230 any = true; 231 break; 232 } 233 234 if (!any) 235 return; 236 237 dbgs() << "++ | Resource counters: "; 238 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) 239 if (ProcResourceCounters[i] > 0) 240 dbgs() << SchedModel->getProcResource(i)->Name 241 << ":" << ProcResourceCounters[i] << " "; 242 dbgs() << "\n"; 243 244 if (CriticalResourceIdx != UINT_MAX) 245 dbgs() << "++ | Critical resource: " 246 << SchedModel->getProcResource(CriticalResourceIdx)->Name 247 << "\n"; 248 } 249 250 void SystemZHazardRecognizer::dumpState() const { 251 dumpCurrGroup("| Current decoder group"); 252 dbgs() << "++ | Current cycle index: " 253 << getCurrCycleIdx() << "\n"; 254 dumpProcResourceCounters(); 255 if (LastFPdOpCycleIdx != UINT_MAX) 256 dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n"; 257 } 258 259 #endif //NDEBUG 260 261 void SystemZHazardRecognizer::clearProcResCounters() { 262 ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0); 263 CriticalResourceIdx = UINT_MAX; 264 } 265 266 static inline bool isBranchRetTrap(MachineInstr *MI) { 267 return (MI->isBranch() || MI->isReturn() || 268 MI->getOpcode() == SystemZ::CondTrap); 269 } 270 271 // Update state with SU as the next scheduled unit. 272 void SystemZHazardRecognizer:: 273 EmitInstruction(SUnit *SU) { 274 const MCSchedClassDesc *SC = getSchedClass(SU); 275 LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs()); 276 dbgs() << "\n";); 277 LLVM_DEBUG(dumpCurrGroup("Decode group before emission");); 278 279 // If scheduling an SU that must begin a new decoder group, move on 280 // to next group. 281 if (!fitsIntoCurrentGroup(SU)) 282 nextGroup(); 283 284 LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg); 285 if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd);); 286 287 LastEmittedMI = SU->getInstr(); 288 289 // After returning from a call, we don't know much about the state. 290 if (SU->isCall) { 291 LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";); 292 Reset(); 293 LastEmittedMI = SU->getInstr(); 294 return; 295 } 296 297 // Increase counter for execution unit(s). 298 for (TargetSchedModel::ProcResIter 299 PI = SchedModel->getWriteProcResBegin(SC), 300 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 301 // Don't handle FPd together with the other resources. 302 if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1) 303 continue; 304 int &CurrCounter = 305 ProcResourceCounters[PI->ProcResourceIdx]; 306 CurrCounter += PI->Cycles; 307 // Check if this is now the new critical resource. 308 if ((CurrCounter > ProcResCostLim) && 309 (CriticalResourceIdx == UINT_MAX || 310 (PI->ProcResourceIdx != CriticalResourceIdx && 311 CurrCounter > 312 ProcResourceCounters[CriticalResourceIdx]))) { 313 LLVM_DEBUG( 314 dbgs() << "++ New critical resource: " 315 << SchedModel->getProcResource(PI->ProcResourceIdx)->Name 316 << "\n";); 317 CriticalResourceIdx = PI->ProcResourceIdx; 318 } 319 } 320 321 // Make note of an instruction that uses a blocking resource (FPd). 322 if (SU->isUnbuffered) { 323 LastFPdOpCycleIdx = getCurrCycleIdx(SU); 324 LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx 325 << "\n";); 326 } 327 328 // Insert SU into current group by increasing number of slots used 329 // in current group. 330 CurrGroupSize += getNumDecoderSlots(SU); 331 CurrGroupHas4RegOps |= has4RegOps(SU->getInstr()); 332 unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3); 333 assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU)) 334 && "SU does not fit into decoder group!"); 335 336 // Check if current group is now full/ended. If so, move on to next 337 // group to be ready to evaluate more candidates. 338 if (CurrGroupSize >= GroupLim || SC->EndGroup) 339 nextGroup(); 340 } 341 342 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { 343 const MCSchedClassDesc *SC = getSchedClass(SU); 344 if (!SC->isValid()) 345 return 0; 346 347 // If SU begins new group, it can either break a current group early 348 // or fit naturally if current group is empty (negative cost). 349 if (SC->BeginGroup) { 350 if (CurrGroupSize) 351 return 3 - CurrGroupSize; 352 return -1; 353 } 354 355 // Similarly, a group-ending SU may either fit well (last in group), or 356 // end the group prematurely. 357 if (SC->EndGroup) { 358 unsigned resultingGroupSize = 359 (CurrGroupSize + getNumDecoderSlots(SU)); 360 if (resultingGroupSize < 3) 361 return (3 - resultingGroupSize); 362 return -1; 363 } 364 365 // An instruction with 4 register operands will not fit in last slot. 366 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) 367 return 1; 368 369 // Most instructions can be placed in any decoder slot. 370 return 0; 371 } 372 373 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const { 374 assert (SU->isUnbuffered); 375 // If this is the first FPd op, it should be scheduled high. 376 if (LastFPdOpCycleIdx == UINT_MAX) 377 return true; 378 // If this is not the first PFd op, it should go into the other side 379 // of the processor to use the other FPd unit there. This should 380 // generally happen if two FPd ops are placed with 2 other 381 // instructions between them (modulo 6). 382 unsigned SUCycleIdx = getCurrCycleIdx(SU); 383 if (LastFPdOpCycleIdx > SUCycleIdx) 384 return ((LastFPdOpCycleIdx - SUCycleIdx) == 3); 385 return ((SUCycleIdx - LastFPdOpCycleIdx) == 3); 386 } 387 388 int SystemZHazardRecognizer:: 389 resourcesCost(SUnit *SU) { 390 int Cost = 0; 391 392 const MCSchedClassDesc *SC = getSchedClass(SU); 393 if (!SC->isValid()) 394 return 0; 395 396 // For a FPd op, either return min or max value as indicated by the 397 // distance to any prior FPd op. 398 if (SU->isUnbuffered) 399 Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX); 400 // For other instructions, give a cost to the use of the critical resource. 401 else if (CriticalResourceIdx != UINT_MAX) { 402 for (TargetSchedModel::ProcResIter 403 PI = SchedModel->getWriteProcResBegin(SC), 404 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) 405 if (PI->ProcResourceIdx == CriticalResourceIdx) 406 Cost = PI->Cycles; 407 } 408 409 return Cost; 410 } 411 412 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, 413 bool TakenBranch) { 414 // Make a temporary SUnit. 415 SUnit SU(MI, 0); 416 417 // Set interesting flags. 418 SU.isCall = MI->isCall(); 419 420 const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); 421 for (const MCWriteProcResEntry &PRE : 422 make_range(SchedModel->getWriteProcResBegin(SC), 423 SchedModel->getWriteProcResEnd(SC))) { 424 switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { 425 case 0: 426 SU.hasReservedResource = true; 427 break; 428 case 1: 429 SU.isUnbuffered = true; 430 break; 431 default: 432 break; 433 } 434 } 435 436 unsigned GroupSizeBeforeEmit = CurrGroupSize; 437 EmitInstruction(&SU); 438 439 if (!TakenBranch && isBranchRetTrap(MI)) { 440 // NT Branch on second slot ends group. 441 if (GroupSizeBeforeEmit == 1) 442 nextGroup(); 443 } 444 445 if (TakenBranch && CurrGroupSize > 0) 446 nextGroup(); 447 448 assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && 449 "Scheduler: unhandled terminator!"); 450 } 451 452 void SystemZHazardRecognizer:: 453 copyState(SystemZHazardRecognizer *Incoming) { 454 // Current decoder group 455 CurrGroupSize = Incoming->CurrGroupSize; 456 LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;); 457 458 // Processor resources 459 ProcResourceCounters = Incoming->ProcResourceCounters; 460 CriticalResourceIdx = Incoming->CriticalResourceIdx; 461 462 // FPd 463 LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; 464 GrpCount = Incoming->GrpCount; 465 } 466