1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the ResourcePriorityQueue class, which is a 10 // SchedulingPriorityQueue that prioritizes instructions using DFA state to 11 // reduce the length of the critical path through the basic block 12 // on VLIW platforms. 13 // The scheduler is basically a top-down adaptable list scheduler with DFA 14 // resource tracking added to the cost function. 15 // DFA is queried as a state machine to model "packets/bundles" during 16 // schedule. Currently packets/bundles are discarded at the end of 17 // scheduling, affecting only order of instructions. 18 // 19 //===----------------------------------------------------------------------===// 20 21 #include "llvm/CodeGen/ResourcePriorityQueue.h" 22 #include "llvm/CodeGen/MachineInstr.h" 23 #include "llvm/CodeGen/SelectionDAGNodes.h" 24 #include "llvm/CodeGen/TargetLowering.h" 25 #include "llvm/CodeGen/TargetSubtargetInfo.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/Target/TargetMachine.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "scheduler" 34 35 static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, 36 cl::ZeroOrMore, cl::init(false), 37 cl::desc("Disable use of DFA during scheduling")); 38 39 static cl::opt<int> RegPressureThreshold( 40 "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), 41 cl::desc("Track reg pressure and switch priority to in-depth")); 42 43 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) 44 : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) { 45 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 46 TRI = STI.getRegisterInfo(); 47 TLI = IS->TLI; 48 TII = STI.getInstrInfo(); 49 ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); 50 // This hard requirement could be relaxed, but for now 51 // do not let it proceed. 52 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); 53 54 unsigned NumRC = TRI->getNumRegClasses(); 55 RegLimit.resize(NumRC); 56 RegPressure.resize(NumRC); 57 std::fill(RegLimit.begin(), RegLimit.end(), 0); 58 std::fill(RegPressure.begin(), RegPressure.end(), 0); 59 for (const TargetRegisterClass *RC : TRI->regclasses()) 60 RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF); 61 62 ParallelLiveRanges = 0; 63 HorizontalVerticalBalance = 0; 64 } 65 66 unsigned 67 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { 68 unsigned NumberDeps = 0; 69 for (SDep &Pred : SU->Preds) { 70 if (Pred.isCtrl()) 71 continue; 72 73 SUnit *PredSU = Pred.getSUnit(); 74 const SDNode *ScegN = PredSU->getNode(); 75 76 if (!ScegN) 77 continue; 78 79 // If value is passed to CopyToReg, it is probably 80 // live outside BB. 81 switch (ScegN->getOpcode()) { 82 default: break; 83 case ISD::TokenFactor: break; 84 case ISD::CopyFromReg: NumberDeps++; break; 85 case ISD::CopyToReg: break; 86 case ISD::INLINEASM: break; 87 } 88 if (!ScegN->isMachineOpcode()) 89 continue; 90 91 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { 92 MVT VT = ScegN->getSimpleValueType(i); 93 if (TLI->isTypeLegal(VT) 94 && (TLI->getRegClassFor(VT)->getID() == RCId)) { 95 NumberDeps++; 96 break; 97 } 98 } 99 } 100 return NumberDeps; 101 } 102 103 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, 104 unsigned RCId) { 105 unsigned NumberDeps = 0; 106 for (const SDep &Succ : SU->Succs) { 107 if (Succ.isCtrl()) 108 continue; 109 110 SUnit *SuccSU = Succ.getSUnit(); 111 const SDNode *ScegN = SuccSU->getNode(); 112 if (!ScegN) 113 continue; 114 115 // If value is passed to CopyToReg, it is probably 116 // live outside BB. 117 switch (ScegN->getOpcode()) { 118 default: break; 119 case ISD::TokenFactor: break; 120 case ISD::CopyFromReg: break; 121 case ISD::CopyToReg: NumberDeps++; break; 122 case ISD::INLINEASM: break; 123 } 124 if (!ScegN->isMachineOpcode()) 125 continue; 126 127 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { 128 const SDValue &Op = ScegN->getOperand(i); 129 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 130 if (TLI->isTypeLegal(VT) 131 && (TLI->getRegClassFor(VT)->getID() == RCId)) { 132 NumberDeps++; 133 break; 134 } 135 } 136 } 137 return NumberDeps; 138 } 139 140 static unsigned numberCtrlDepsInSU(SUnit *SU) { 141 unsigned NumberDeps = 0; 142 for (const SDep &Succ : SU->Succs) 143 if (Succ.isCtrl()) 144 NumberDeps++; 145 146 return NumberDeps; 147 } 148 149 static unsigned numberCtrlPredInSU(SUnit *SU) { 150 unsigned NumberDeps = 0; 151 for (SDep &Pred : SU->Preds) 152 if (Pred.isCtrl()) 153 NumberDeps++; 154 155 return NumberDeps; 156 } 157 158 /// 159 /// Initialize nodes. 160 /// 161 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) { 162 SUnits = &sunits; 163 NumNodesSolelyBlocking.resize(SUnits->size(), 0); 164 165 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 166 SUnit *SU = &(*SUnits)[i]; 167 initNumRegDefsLeft(SU); 168 SU->NodeQueueId = 0; 169 } 170 } 171 172 /// This heuristic is used if DFA scheduling is not desired 173 /// for some VLIW platform. 174 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { 175 // The isScheduleHigh flag allows nodes with wraparound dependencies that 176 // cannot easily be modeled as edges with latencies to be scheduled as 177 // soon as possible in a top-down schedule. 178 if (LHS->isScheduleHigh && !RHS->isScheduleHigh) 179 return false; 180 181 if (!LHS->isScheduleHigh && RHS->isScheduleHigh) 182 return true; 183 184 unsigned LHSNum = LHS->NodeNum; 185 unsigned RHSNum = RHS->NodeNum; 186 187 // The most important heuristic is scheduling the critical path. 188 unsigned LHSLatency = PQ->getLatency(LHSNum); 189 unsigned RHSLatency = PQ->getLatency(RHSNum); 190 if (LHSLatency < RHSLatency) return true; 191 if (LHSLatency > RHSLatency) return false; 192 193 // After that, if two nodes have identical latencies, look to see if one will 194 // unblock more other nodes than the other. 195 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); 196 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); 197 if (LHSBlocked < RHSBlocked) return true; 198 if (LHSBlocked > RHSBlocked) return false; 199 200 // Finally, just to provide a stable ordering, use the node number as a 201 // deciding factor. 202 return LHSNum < RHSNum; 203 } 204 205 206 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor 207 /// of SU, return it, otherwise return null. 208 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { 209 SUnit *OnlyAvailablePred = nullptr; 210 for (const SDep &Pred : SU->Preds) { 211 SUnit &PredSU = *Pred.getSUnit(); 212 if (!PredSU.isScheduled) { 213 // We found an available, but not scheduled, predecessor. If it's the 214 // only one we have found, keep track of it... otherwise give up. 215 if (OnlyAvailablePred && OnlyAvailablePred != &PredSU) 216 return nullptr; 217 OnlyAvailablePred = &PredSU; 218 } 219 } 220 return OnlyAvailablePred; 221 } 222 223 void ResourcePriorityQueue::push(SUnit *SU) { 224 // Look at all of the successors of this node. Count the number of nodes that 225 // this node is the sole unscheduled node for. 226 unsigned NumNodesBlocking = 0; 227 for (const SDep &Succ : SU->Succs) 228 if (getSingleUnscheduledPred(Succ.getSUnit()) == SU) 229 ++NumNodesBlocking; 230 231 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; 232 Queue.push_back(SU); 233 } 234 235 /// Check if scheduling of this SU is possible 236 /// in the current packet. 237 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { 238 if (!SU || !SU->getNode()) 239 return false; 240 241 // If this is a compound instruction, 242 // it is likely to be a call. Do not delay it. 243 if (SU->getNode()->getGluedNode()) 244 return true; 245 246 // First see if the pipeline could receive this instruction 247 // in the current cycle. 248 if (SU->getNode()->isMachineOpcode()) 249 switch (SU->getNode()->getMachineOpcode()) { 250 default: 251 if (!ResourcesModel->canReserveResources(&TII->get( 252 SU->getNode()->getMachineOpcode()))) 253 return false; 254 break; 255 case TargetOpcode::EXTRACT_SUBREG: 256 case TargetOpcode::INSERT_SUBREG: 257 case TargetOpcode::SUBREG_TO_REG: 258 case TargetOpcode::REG_SEQUENCE: 259 case TargetOpcode::IMPLICIT_DEF: 260 break; 261 } 262 263 // Now see if there are no other dependencies 264 // to instructions already in the packet. 265 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 266 for (const SDep &Succ : Packet[i]->Succs) { 267 // Since we do not add pseudos to packets, might as well 268 // ignore order deps. 269 if (Succ.isCtrl()) 270 continue; 271 272 if (Succ.getSUnit() == SU) 273 return false; 274 } 275 276 return true; 277 } 278 279 /// Keep track of available resources. 280 void ResourcePriorityQueue::reserveResources(SUnit *SU) { 281 // If this SU does not fit in the packet 282 // start a new one. 283 if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) { 284 ResourcesModel->clearResources(); 285 Packet.clear(); 286 } 287 288 if (SU->getNode() && SU->getNode()->isMachineOpcode()) { 289 switch (SU->getNode()->getMachineOpcode()) { 290 default: 291 ResourcesModel->reserveResources(&TII->get( 292 SU->getNode()->getMachineOpcode())); 293 break; 294 case TargetOpcode::EXTRACT_SUBREG: 295 case TargetOpcode::INSERT_SUBREG: 296 case TargetOpcode::SUBREG_TO_REG: 297 case TargetOpcode::REG_SEQUENCE: 298 case TargetOpcode::IMPLICIT_DEF: 299 break; 300 } 301 Packet.push_back(SU); 302 } 303 // Forcefully end packet for PseudoOps. 304 else { 305 ResourcesModel->clearResources(); 306 Packet.clear(); 307 } 308 309 // If packet is now full, reset the state so in the next cycle 310 // we start fresh. 311 if (Packet.size() >= InstrItins->SchedModel.IssueWidth) { 312 ResourcesModel->clearResources(); 313 Packet.clear(); 314 } 315 } 316 317 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { 318 int RegBalance = 0; 319 320 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) 321 return RegBalance; 322 323 // Gen estimate. 324 for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { 325 MVT VT = SU->getNode()->getSimpleValueType(i); 326 if (TLI->isTypeLegal(VT) 327 && TLI->getRegClassFor(VT) 328 && TLI->getRegClassFor(VT)->getID() == RCId) 329 RegBalance += numberRCValSuccInSU(SU, RCId); 330 } 331 // Kill estimate. 332 for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { 333 const SDValue &Op = SU->getNode()->getOperand(i); 334 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 335 if (isa<ConstantSDNode>(Op.getNode())) 336 continue; 337 338 if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) 339 && TLI->getRegClassFor(VT)->getID() == RCId) 340 RegBalance -= numberRCValPredInSU(SU, RCId); 341 } 342 return RegBalance; 343 } 344 345 /// Estimates change in reg pressure from this SU. 346 /// It is achieved by trivial tracking of defined 347 /// and used vregs in dependent instructions. 348 /// The RawPressure flag makes this function to ignore 349 /// existing reg file sizes, and report raw def/use 350 /// balance. 351 int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { 352 int RegBalance = 0; 353 354 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) 355 return RegBalance; 356 357 if (RawPressure) { 358 for (const TargetRegisterClass *RC : TRI->regclasses()) 359 RegBalance += rawRegPressureDelta(SU, RC->getID()); 360 } 361 else { 362 for (const TargetRegisterClass *RC : TRI->regclasses()) { 363 if ((RegPressure[RC->getID()] + 364 rawRegPressureDelta(SU, RC->getID()) > 0) && 365 (RegPressure[RC->getID()] + 366 rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()])) 367 RegBalance += rawRegPressureDelta(SU, RC->getID()); 368 } 369 } 370 371 return RegBalance; 372 } 373 374 // Constants used to denote relative importance of 375 // heuristic components for cost computation. 376 static const unsigned PriorityOne = 200; 377 static const unsigned PriorityTwo = 50; 378 static const unsigned PriorityThree = 15; 379 static const unsigned PriorityFour = 5; 380 static const unsigned ScaleOne = 20; 381 static const unsigned ScaleTwo = 10; 382 static const unsigned ScaleThree = 5; 383 static const unsigned FactorOne = 2; 384 385 /// Returns single number reflecting benefit of scheduling SU 386 /// in the current cycle. 387 int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { 388 // Initial trivial priority. 389 int ResCount = 1; 390 391 // Do not waste time on a node that is already scheduled. 392 if (SU->isScheduled) 393 return ResCount; 394 395 // Forced priority is high. 396 if (SU->isScheduleHigh) 397 ResCount += PriorityOne; 398 399 // Adaptable scheduling 400 // A small, but very parallel 401 // region, where reg pressure is an issue. 402 if (HorizontalVerticalBalance > RegPressureThreshold) { 403 // Critical path first 404 ResCount += (SU->getHeight() * ScaleTwo); 405 // If resources are available for it, multiply the 406 // chance of scheduling. 407 if (isResourceAvailable(SU)) 408 ResCount <<= FactorOne; 409 410 // Consider change to reg pressure from scheduling 411 // this SU. 412 ResCount -= (regPressureDelta(SU,true) * ScaleOne); 413 } 414 // Default heuristic, greeady and 415 // critical path driven. 416 else { 417 // Critical path first. 418 ResCount += (SU->getHeight() * ScaleTwo); 419 // Now see how many instructions is blocked by this SU. 420 ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); 421 // If resources are available for it, multiply the 422 // chance of scheduling. 423 if (isResourceAvailable(SU)) 424 ResCount <<= FactorOne; 425 426 ResCount -= (regPressureDelta(SU) * ScaleTwo); 427 } 428 429 // These are platform-specific things. 430 // Will need to go into the back end 431 // and accessed from here via a hook. 432 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { 433 if (N->isMachineOpcode()) { 434 const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); 435 if (TID.isCall()) 436 ResCount += (PriorityTwo + (ScaleThree*N->getNumValues())); 437 } 438 else 439 switch (N->getOpcode()) { 440 default: break; 441 case ISD::TokenFactor: 442 case ISD::CopyFromReg: 443 case ISD::CopyToReg: 444 ResCount += PriorityFour; 445 break; 446 447 case ISD::INLINEASM: 448 ResCount += PriorityThree; 449 break; 450 } 451 } 452 return ResCount; 453 } 454 455 456 /// Main resource tracking point. 457 void ResourcePriorityQueue::scheduledNode(SUnit *SU) { 458 // Use NULL entry as an event marker to reset 459 // the DFA state. 460 if (!SU) { 461 ResourcesModel->clearResources(); 462 Packet.clear(); 463 return; 464 } 465 466 const SDNode *ScegN = SU->getNode(); 467 // Update reg pressure tracking. 468 // First update current node. 469 if (ScegN->isMachineOpcode()) { 470 // Estimate generated regs. 471 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { 472 MVT VT = ScegN->getSimpleValueType(i); 473 474 if (TLI->isTypeLegal(VT)) { 475 const TargetRegisterClass *RC = TLI->getRegClassFor(VT); 476 if (RC) 477 RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID()); 478 } 479 } 480 // Estimate killed regs. 481 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { 482 const SDValue &Op = ScegN->getOperand(i); 483 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 484 485 if (TLI->isTypeLegal(VT)) { 486 const TargetRegisterClass *RC = TLI->getRegClassFor(VT); 487 if (RC) { 488 if (RegPressure[RC->getID()] > 489 (numberRCValPredInSU(SU, RC->getID()))) 490 RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID()); 491 else RegPressure[RC->getID()] = 0; 492 } 493 } 494 } 495 for (SDep &Pred : SU->Preds) { 496 if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0)) 497 continue; 498 --Pred.getSUnit()->NumRegDefsLeft; 499 } 500 } 501 502 // Reserve resources for this SU. 503 reserveResources(SU); 504 505 // Adjust number of parallel live ranges. 506 // Heuristic is simple - node with no data successors reduces 507 // number of live ranges. All others, increase it. 508 unsigned NumberNonControlDeps = 0; 509 510 for (const SDep &Succ : SU->Succs) { 511 adjustPriorityOfUnscheduledPreds(Succ.getSUnit()); 512 if (!Succ.isCtrl()) 513 NumberNonControlDeps++; 514 } 515 516 if (!NumberNonControlDeps) { 517 if (ParallelLiveRanges >= SU->NumPreds) 518 ParallelLiveRanges -= SU->NumPreds; 519 else 520 ParallelLiveRanges = 0; 521 522 } 523 else 524 ParallelLiveRanges += SU->NumRegDefsLeft; 525 526 // Track parallel live chains. 527 HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU)); 528 HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU)); 529 } 530 531 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { 532 unsigned NodeNumDefs = 0; 533 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) 534 if (N->isMachineOpcode()) { 535 const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); 536 // No register need be allocated for this. 537 if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { 538 NodeNumDefs = 0; 539 break; 540 } 541 NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs()); 542 } 543 else 544 switch(N->getOpcode()) { 545 default: break; 546 case ISD::CopyFromReg: 547 NodeNumDefs++; 548 break; 549 case ISD::INLINEASM: 550 NodeNumDefs++; 551 break; 552 } 553 554 SU->NumRegDefsLeft = NodeNumDefs; 555 } 556 557 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just 558 /// scheduled. If SU is not itself available, then there is at least one 559 /// predecessor node that has not been scheduled yet. If SU has exactly ONE 560 /// unscheduled predecessor, we want to increase its priority: it getting 561 /// scheduled will make this node available, so it is better than some other 562 /// node of the same priority that will not make a node available. 563 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { 564 if (SU->isAvailable) return; // All preds scheduled. 565 566 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); 567 if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) 568 return; 569 570 // Okay, we found a single predecessor that is available, but not scheduled. 571 // Since it is available, it must be in the priority queue. First remove it. 572 remove(OnlyAvailablePred); 573 574 // Reinsert the node into the priority queue, which recomputes its 575 // NumNodesSolelyBlocking value. 576 push(OnlyAvailablePred); 577 } 578 579 580 /// Main access point - returns next instructions 581 /// to be placed in scheduling sequence. 582 SUnit *ResourcePriorityQueue::pop() { 583 if (empty()) 584 return nullptr; 585 586 std::vector<SUnit *>::iterator Best = Queue.begin(); 587 if (!DisableDFASched) { 588 int BestCost = SUSchedulingCost(*Best); 589 for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { 590 591 if (SUSchedulingCost(*I) > BestCost) { 592 BestCost = SUSchedulingCost(*I); 593 Best = I; 594 } 595 } 596 } 597 // Use default TD scheduling mechanism. 598 else { 599 for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) 600 if (Picker(*Best, *I)) 601 Best = I; 602 } 603 604 SUnit *V = *Best; 605 if (Best != std::prev(Queue.end())) 606 std::swap(*Best, Queue.back()); 607 608 Queue.pop_back(); 609 610 return V; 611 } 612 613 614 void ResourcePriorityQueue::remove(SUnit *SU) { 615 assert(!Queue.empty() && "Queue is empty!"); 616 std::vector<SUnit *>::iterator I = find(Queue, SU); 617 if (I != std::prev(Queue.end())) 618 std::swap(*I, Queue.back()); 619 620 Queue.pop_back(); 621 } 622