1 //===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a Partitioned Boolean Quadratic Programming (PBQP) based 11 // register allocator for LLVM. This allocator works by constructing a PBQP 12 // problem representing the register allocation problem under consideration, 13 // solving this using a PBQP solver, and mapping the solution back to a 14 // register assignment. If any variables are selected for spilling then spill 15 // code is inserted and the process repeated. 16 // 17 // The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned 18 // for register allocation. For more information on PBQP for register 19 // allocation, see the following papers: 20 // 21 // (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with 22 // PBQP. In Proceedings of the 7th Joint Modular Languages Conference 23 // (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361. 24 // 25 // (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular 26 // architectures. In Proceedings of the Joint Conference on Languages, 27 // Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York, 28 // NY, USA, 139-148. 29 // 30 //===----------------------------------------------------------------------===// 31 32 #include "llvm/CodeGen/RegAllocPBQP.h" 33 #include "RegisterCoalescer.h" 34 #include "Spiller.h" 35 #include "llvm/ADT/ArrayRef.h" 36 #include "llvm/ADT/BitVector.h" 37 #include "llvm/ADT/DenseMap.h" 38 #include "llvm/ADT/DenseSet.h" 39 #include "llvm/ADT/STLExtras.h" 40 #include "llvm/ADT/SmallPtrSet.h" 41 #include "llvm/ADT/SmallVector.h" 42 #include "llvm/ADT/StringRef.h" 43 #include "llvm/Analysis/AliasAnalysis.h" 44 #include "llvm/CodeGen/CalcSpillWeights.h" 45 #include "llvm/CodeGen/LiveInterval.h" 46 #include "llvm/CodeGen/LiveIntervals.h" 47 #include "llvm/CodeGen/LiveRangeEdit.h" 48 #include "llvm/CodeGen/LiveStacks.h" 49 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 50 #include "llvm/CodeGen/MachineDominators.h" 51 #include "llvm/CodeGen/MachineFunction.h" 52 #include "llvm/CodeGen/MachineFunctionPass.h" 53 #include "llvm/CodeGen/MachineInstr.h" 54 #include "llvm/CodeGen/MachineLoopInfo.h" 55 #include "llvm/CodeGen/MachineRegisterInfo.h" 56 #include "llvm/CodeGen/PBQP/Graph.h" 57 #include "llvm/CodeGen/PBQP/Math.h" 58 #include "llvm/CodeGen/PBQP/Solution.h" 59 #include "llvm/CodeGen/PBQPRAConstraint.h" 60 #include "llvm/CodeGen/RegAllocRegistry.h" 61 #include "llvm/CodeGen/SlotIndexes.h" 62 #include "llvm/CodeGen/TargetRegisterInfo.h" 63 #include "llvm/CodeGen/TargetSubtargetInfo.h" 64 #include "llvm/CodeGen/VirtRegMap.h" 65 #include "llvm/Config/llvm-config.h" 66 #include "llvm/IR/Function.h" 67 #include "llvm/IR/Module.h" 68 #include "llvm/MC/MCRegisterInfo.h" 69 #include "llvm/Pass.h" 70 #include "llvm/Support/CommandLine.h" 71 #include "llvm/Support/Compiler.h" 72 #include "llvm/Support/Debug.h" 73 #include "llvm/Support/FileSystem.h" 74 #include "llvm/Support/Printable.h" 75 #include "llvm/Support/raw_ostream.h" 76 #include <algorithm> 77 #include <cassert> 78 #include <cstddef> 79 #include <limits> 80 #include <map> 81 #include <memory> 82 #include <queue> 83 #include <set> 84 #include <sstream> 85 #include <string> 86 #include <system_error> 87 #include <tuple> 88 #include <utility> 89 #include <vector> 90 91 using namespace llvm; 92 93 #define DEBUG_TYPE "regalloc" 94 95 static RegisterRegAlloc 96 RegisterPBQPRepAlloc("pbqp", "PBQP register allocator", 97 createDefaultPBQPRegisterAllocator); 98 99 static cl::opt<bool> 100 PBQPCoalescing("pbqp-coalescing", 101 cl::desc("Attempt coalescing during PBQP register allocation."), 102 cl::init(false), cl::Hidden); 103 104 #ifndef NDEBUG 105 static cl::opt<bool> 106 PBQPDumpGraphs("pbqp-dump-graphs", 107 cl::desc("Dump graphs for each function/round in the compilation unit."), 108 cl::init(false), cl::Hidden); 109 #endif 110 111 namespace { 112 113 /// 114 /// PBQP based allocators solve the register allocation problem by mapping 115 /// register allocation problems to Partitioned Boolean Quadratic 116 /// Programming problems. 117 class RegAllocPBQP : public MachineFunctionPass { 118 public: 119 static char ID; 120 121 /// Construct a PBQP register allocator. 122 RegAllocPBQP(char *cPassID = nullptr) 123 : MachineFunctionPass(ID), customPassID(cPassID) { 124 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 125 initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); 126 initializeLiveStacksPass(*PassRegistry::getPassRegistry()); 127 initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); 128 } 129 130 /// Return the pass name. 131 StringRef getPassName() const override { return "PBQP Register Allocator"; } 132 133 /// PBQP analysis usage. 134 void getAnalysisUsage(AnalysisUsage &au) const override; 135 136 /// Perform register allocation 137 bool runOnMachineFunction(MachineFunction &MF) override; 138 139 MachineFunctionProperties getRequiredProperties() const override { 140 return MachineFunctionProperties().set( 141 MachineFunctionProperties::Property::NoPHIs); 142 } 143 144 private: 145 using LI2NodeMap = std::map<const LiveInterval *, unsigned>; 146 using Node2LIMap = std::vector<const LiveInterval *>; 147 using AllowedSet = std::vector<unsigned>; 148 using AllowedSetMap = std::vector<AllowedSet>; 149 using RegPair = std::pair<unsigned, unsigned>; 150 using CoalesceMap = std::map<RegPair, PBQP::PBQPNum>; 151 using RegSet = std::set<unsigned>; 152 153 char *customPassID; 154 155 RegSet VRegsToAlloc, EmptyIntervalVRegs; 156 157 /// Inst which is a def of an original reg and whose defs are already all 158 /// dead after remat is saved in DeadRemats. The deletion of such inst is 159 /// postponed till all the allocations are done, so its remat expr is 160 /// always available for the remat of all the siblings of the original reg. 161 SmallPtrSet<MachineInstr *, 32> DeadRemats; 162 163 /// Finds the initial set of vreg intervals to allocate. 164 void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS); 165 166 /// Constructs an initial graph. 167 void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller); 168 169 /// Spill the given VReg. 170 void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals, 171 MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, 172 Spiller &VRegSpiller); 173 174 /// Given a solved PBQP problem maps this solution back to a register 175 /// assignment. 176 bool mapPBQPToRegAlloc(const PBQPRAGraph &G, 177 const PBQP::Solution &Solution, 178 VirtRegMap &VRM, 179 Spiller &VRegSpiller); 180 181 /// Postprocessing before final spilling. Sets basic block "live in" 182 /// variables. 183 void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS, 184 VirtRegMap &VRM) const; 185 186 void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS); 187 }; 188 189 char RegAllocPBQP::ID = 0; 190 191 /// Set spill costs for each node in the PBQP reg-alloc graph. 192 class SpillCosts : public PBQPRAConstraint { 193 public: 194 void apply(PBQPRAGraph &G) override { 195 LiveIntervals &LIS = G.getMetadata().LIS; 196 197 // A minimum spill costs, so that register constraints can can be set 198 // without normalization in the [0.0:MinSpillCost( interval. 199 const PBQP::PBQPNum MinSpillCost = 10.0; 200 201 for (auto NId : G.nodeIds()) { 202 PBQP::PBQPNum SpillCost = 203 LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight; 204 if (SpillCost == 0.0) 205 SpillCost = std::numeric_limits<PBQP::PBQPNum>::min(); 206 else 207 SpillCost += MinSpillCost; 208 PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId)); 209 NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost; 210 G.setNodeCosts(NId, std::move(NodeCosts)); 211 } 212 } 213 }; 214 215 /// Add interference edges between overlapping vregs. 216 class Interference : public PBQPRAConstraint { 217 private: 218 using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *; 219 using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>; 220 using IMatrixCache = DenseMap<IKey, PBQPRAGraph::MatrixPtr>; 221 using DisjointAllowedRegsCache = DenseSet<IKey>; 222 using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>; 223 using IEdgeCache = DenseSet<IEdgeKey>; 224 225 bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, 226 PBQPRAGraph::NodeId MId, 227 const DisjointAllowedRegsCache &D) const { 228 const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs(); 229 const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs(); 230 231 if (NRegs == MRegs) 232 return false; 233 234 if (NRegs < MRegs) 235 return D.count(IKey(NRegs, MRegs)) > 0; 236 237 return D.count(IKey(MRegs, NRegs)) > 0; 238 } 239 240 void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, 241 PBQPRAGraph::NodeId MId, 242 DisjointAllowedRegsCache &D) { 243 const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs(); 244 const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs(); 245 246 assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself"); 247 248 if (NRegs < MRegs) 249 D.insert(IKey(NRegs, MRegs)); 250 else 251 D.insert(IKey(MRegs, NRegs)); 252 } 253 254 // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required 255 // for the fast interference graph construction algorithm. The last is there 256 // to save us from looking up node ids via the VRegToNode map in the graph 257 // metadata. 258 using IntervalInfo = 259 std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>; 260 261 static SlotIndex getStartPoint(const IntervalInfo &I) { 262 return std::get<0>(I)->segments[std::get<1>(I)].start; 263 } 264 265 static SlotIndex getEndPoint(const IntervalInfo &I) { 266 return std::get<0>(I)->segments[std::get<1>(I)].end; 267 } 268 269 static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) { 270 return std::get<2>(I); 271 } 272 273 static bool lowestStartPoint(const IntervalInfo &I1, 274 const IntervalInfo &I2) { 275 // Condition reversed because priority queue has the *highest* element at 276 // the front, rather than the lowest. 277 return getStartPoint(I1) > getStartPoint(I2); 278 } 279 280 static bool lowestEndPoint(const IntervalInfo &I1, 281 const IntervalInfo &I2) { 282 SlotIndex E1 = getEndPoint(I1); 283 SlotIndex E2 = getEndPoint(I2); 284 285 if (E1 < E2) 286 return true; 287 288 if (E1 > E2) 289 return false; 290 291 // If two intervals end at the same point, we need a way to break the tie or 292 // the set will assume they're actually equal and refuse to insert a 293 // "duplicate". Just compare the vregs - fast and guaranteed unique. 294 return std::get<0>(I1)->reg < std::get<0>(I2)->reg; 295 } 296 297 static bool isAtLastSegment(const IntervalInfo &I) { 298 return std::get<1>(I) == std::get<0>(I)->size() - 1; 299 } 300 301 static IntervalInfo nextSegment(const IntervalInfo &I) { 302 return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I)); 303 } 304 305 public: 306 void apply(PBQPRAGraph &G) override { 307 // The following is loosely based on the linear scan algorithm introduced in 308 // "Linear Scan Register Allocation" by Poletto and Sarkar. This version 309 // isn't linear, because the size of the active set isn't bound by the 310 // number of registers, but rather the size of the largest clique in the 311 // graph. Still, we expect this to be better than N^2. 312 LiveIntervals &LIS = G.getMetadata().LIS; 313 314 // Interferenc matrices are incredibly regular - they're only a function of 315 // the allowed sets, so we cache them to avoid the overhead of constructing 316 // and uniquing them. 317 IMatrixCache C; 318 319 // Finding an edge is expensive in the worst case (O(max_clique(G))). So 320 // cache locally edges we have already seen. 321 IEdgeCache EC; 322 323 // Cache known disjoint allowed registers pairs 324 DisjointAllowedRegsCache D; 325 326 using IntervalSet = std::set<IntervalInfo, decltype(&lowestEndPoint)>; 327 using IntervalQueue = 328 std::priority_queue<IntervalInfo, std::vector<IntervalInfo>, 329 decltype(&lowestStartPoint)>; 330 IntervalSet Active(lowestEndPoint); 331 IntervalQueue Inactive(lowestStartPoint); 332 333 // Start by building the inactive set. 334 for (auto NId : G.nodeIds()) { 335 unsigned VReg = G.getNodeMetadata(NId).getVReg(); 336 LiveInterval &LI = LIS.getInterval(VReg); 337 assert(!LI.empty() && "PBQP graph contains node for empty interval"); 338 Inactive.push(std::make_tuple(&LI, 0, NId)); 339 } 340 341 while (!Inactive.empty()) { 342 // Tentatively grab the "next" interval - this choice may be overriden 343 // below. 344 IntervalInfo Cur = Inactive.top(); 345 346 // Retire any active intervals that end before Cur starts. 347 IntervalSet::iterator RetireItr = Active.begin(); 348 while (RetireItr != Active.end() && 349 (getEndPoint(*RetireItr) <= getStartPoint(Cur))) { 350 // If this interval has subsequent segments, add the next one to the 351 // inactive list. 352 if (!isAtLastSegment(*RetireItr)) 353 Inactive.push(nextSegment(*RetireItr)); 354 355 ++RetireItr; 356 } 357 Active.erase(Active.begin(), RetireItr); 358 359 // One of the newly retired segments may actually start before the 360 // Cur segment, so re-grab the front of the inactive list. 361 Cur = Inactive.top(); 362 Inactive.pop(); 363 364 // At this point we know that Cur overlaps all active intervals. Add the 365 // interference edges. 366 PBQP::GraphBase::NodeId NId = getNodeId(Cur); 367 for (const auto &A : Active) { 368 PBQP::GraphBase::NodeId MId = getNodeId(A); 369 370 // Do not add an edge when the nodes' allowed registers do not 371 // intersect: there is obviously no interference. 372 if (haveDisjointAllowedRegs(G, NId, MId, D)) 373 continue; 374 375 // Check that we haven't already added this edge 376 IEdgeKey EK(std::min(NId, MId), std::max(NId, MId)); 377 if (EC.count(EK)) 378 continue; 379 380 // This is a new edge - add it to the graph. 381 if (!createInterferenceEdge(G, NId, MId, C)) 382 setDisjointAllowedRegs(G, NId, MId, D); 383 else 384 EC.insert(EK); 385 } 386 387 // Finally, add Cur to the Active set. 388 Active.insert(Cur); 389 } 390 } 391 392 private: 393 // Create an Interference edge and add it to the graph, unless it is 394 // a null matrix, meaning the nodes' allowed registers do not have any 395 // interference. This case occurs frequently between integer and floating 396 // point registers for example. 397 // return true iff both nodes interferes. 398 bool createInterferenceEdge(PBQPRAGraph &G, 399 PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId, 400 IMatrixCache &C) { 401 const TargetRegisterInfo &TRI = 402 *G.getMetadata().MF.getSubtarget().getRegisterInfo(); 403 const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs(); 404 const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs(); 405 406 // Try looking the edge costs up in the IMatrixCache first. 407 IKey K(&NRegs, &MRegs); 408 IMatrixCache::iterator I = C.find(K); 409 if (I != C.end()) { 410 G.addEdgeBypassingCostAllocator(NId, MId, I->second); 411 return true; 412 } 413 414 PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0); 415 bool NodesInterfere = false; 416 for (unsigned I = 0; I != NRegs.size(); ++I) { 417 unsigned PRegN = NRegs[I]; 418 for (unsigned J = 0; J != MRegs.size(); ++J) { 419 unsigned PRegM = MRegs[J]; 420 if (TRI.regsOverlap(PRegN, PRegM)) { 421 M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); 422 NodesInterfere = true; 423 } 424 } 425 } 426 427 if (!NodesInterfere) 428 return false; 429 430 PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M)); 431 C[K] = G.getEdgeCostsPtr(EId); 432 433 return true; 434 } 435 }; 436 437 class Coalescing : public PBQPRAConstraint { 438 public: 439 void apply(PBQPRAGraph &G) override { 440 MachineFunction &MF = G.getMetadata().MF; 441 MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI; 442 CoalescerPair CP(*MF.getSubtarget().getRegisterInfo()); 443 444 // Scan the machine function and add a coalescing cost whenever CoalescerPair 445 // gives the Ok. 446 for (const auto &MBB : MF) { 447 for (const auto &MI : MBB) { 448 // Skip not-coalescable or already coalesced copies. 449 if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg()) 450 continue; 451 452 unsigned DstReg = CP.getDstReg(); 453 unsigned SrcReg = CP.getSrcReg(); 454 455 const float Scale = 1.0f / MBFI.getEntryFreq(); 456 PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale; 457 458 if (CP.isPhys()) { 459 if (!MF.getRegInfo().isAllocatable(DstReg)) 460 continue; 461 462 PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg); 463 464 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed = 465 G.getNodeMetadata(NId).getAllowedRegs(); 466 467 unsigned PRegOpt = 0; 468 while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg) 469 ++PRegOpt; 470 471 if (PRegOpt < Allowed.size()) { 472 PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId)); 473 NewCosts[PRegOpt + 1] -= CBenefit; 474 G.setNodeCosts(NId, std::move(NewCosts)); 475 } 476 } else { 477 PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg); 478 PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg); 479 const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 = 480 &G.getNodeMetadata(N1Id).getAllowedRegs(); 481 const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 = 482 &G.getNodeMetadata(N2Id).getAllowedRegs(); 483 484 PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id); 485 if (EId == G.invalidEdgeId()) { 486 PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1, 487 Allowed2->size() + 1, 0); 488 addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); 489 G.addEdge(N1Id, N2Id, std::move(Costs)); 490 } else { 491 if (G.getEdgeNode1Id(EId) == N2Id) { 492 std::swap(N1Id, N2Id); 493 std::swap(Allowed1, Allowed2); 494 } 495 PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId)); 496 addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); 497 G.updateEdgeCosts(EId, std::move(Costs)); 498 } 499 } 500 } 501 } 502 } 503 504 private: 505 void addVirtRegCoalesce( 506 PBQPRAGraph::RawMatrix &CostMat, 507 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1, 508 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2, 509 PBQP::PBQPNum Benefit) { 510 assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch."); 511 assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch."); 512 for (unsigned I = 0; I != Allowed1.size(); ++I) { 513 unsigned PReg1 = Allowed1[I]; 514 for (unsigned J = 0; J != Allowed2.size(); ++J) { 515 unsigned PReg2 = Allowed2[J]; 516 if (PReg1 == PReg2) 517 CostMat[I + 1][J + 1] -= Benefit; 518 } 519 } 520 } 521 }; 522 523 } // end anonymous namespace 524 525 // Out-of-line destructor/anchor for PBQPRAConstraint. 526 PBQPRAConstraint::~PBQPRAConstraint() = default; 527 528 void PBQPRAConstraint::anchor() {} 529 530 void PBQPRAConstraintList::anchor() {} 531 532 void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { 533 au.setPreservesCFG(); 534 au.addRequired<AAResultsWrapperPass>(); 535 au.addPreserved<AAResultsWrapperPass>(); 536 au.addRequired<SlotIndexes>(); 537 au.addPreserved<SlotIndexes>(); 538 au.addRequired<LiveIntervals>(); 539 au.addPreserved<LiveIntervals>(); 540 //au.addRequiredID(SplitCriticalEdgesID); 541 if (customPassID) 542 au.addRequiredID(*customPassID); 543 au.addRequired<LiveStacks>(); 544 au.addPreserved<LiveStacks>(); 545 au.addRequired<MachineBlockFrequencyInfo>(); 546 au.addPreserved<MachineBlockFrequencyInfo>(); 547 au.addRequired<MachineLoopInfo>(); 548 au.addPreserved<MachineLoopInfo>(); 549 au.addRequired<MachineDominatorTree>(); 550 au.addPreserved<MachineDominatorTree>(); 551 au.addRequired<VirtRegMap>(); 552 au.addPreserved<VirtRegMap>(); 553 MachineFunctionPass::getAnalysisUsage(au); 554 } 555 556 void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF, 557 LiveIntervals &LIS) { 558 const MachineRegisterInfo &MRI = MF.getRegInfo(); 559 560 // Iterate over all live ranges. 561 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { 562 unsigned Reg = TargetRegisterInfo::index2VirtReg(I); 563 if (MRI.reg_nodbg_empty(Reg)) 564 continue; 565 VRegsToAlloc.insert(Reg); 566 } 567 } 568 569 static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, 570 const MachineFunction &MF) { 571 const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs(); 572 for (unsigned i = 0; CSR[i] != 0; ++i) 573 if (TRI.regsOverlap(reg, CSR[i])) 574 return true; 575 return false; 576 } 577 578 void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, 579 Spiller &VRegSpiller) { 580 MachineFunction &MF = G.getMetadata().MF; 581 582 LiveIntervals &LIS = G.getMetadata().LIS; 583 const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); 584 const TargetRegisterInfo &TRI = 585 *G.getMetadata().MF.getSubtarget().getRegisterInfo(); 586 587 std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end()); 588 589 std::map<unsigned, std::vector<unsigned>> VRegAllowedMap; 590 591 while (!Worklist.empty()) { 592 unsigned VReg = Worklist.back(); 593 Worklist.pop_back(); 594 595 LiveInterval &VRegLI = LIS.getInterval(VReg); 596 597 // If this is an empty interval move it to the EmptyIntervalVRegs set then 598 // continue. 599 if (VRegLI.empty()) { 600 EmptyIntervalVRegs.insert(VRegLI.reg); 601 VRegsToAlloc.erase(VRegLI.reg); 602 continue; 603 } 604 605 const TargetRegisterClass *TRC = MRI.getRegClass(VReg); 606 607 // Record any overlaps with regmask operands. 608 BitVector RegMaskOverlaps; 609 LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps); 610 611 // Compute an initial allowed set for the current vreg. 612 std::vector<unsigned> VRegAllowed; 613 ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF); 614 for (unsigned I = 0; I != RawPRegOrder.size(); ++I) { 615 unsigned PReg = RawPRegOrder[I]; 616 if (MRI.isReserved(PReg)) 617 continue; 618 619 // vregLI crosses a regmask operand that clobbers preg. 620 if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg)) 621 continue; 622 623 // vregLI overlaps fixed regunit interference. 624 bool Interference = false; 625 for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); ++Units) { 626 if (VRegLI.overlaps(LIS.getRegUnit(*Units))) { 627 Interference = true; 628 break; 629 } 630 } 631 if (Interference) 632 continue; 633 634 // preg is usable for this virtual register. 635 VRegAllowed.push_back(PReg); 636 } 637 638 // Check for vregs that have no allowed registers. These should be 639 // pre-spilled and the new vregs added to the worklist. 640 if (VRegAllowed.empty()) { 641 SmallVector<unsigned, 8> NewVRegs; 642 spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); 643 Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end()); 644 continue; 645 } else 646 VRegAllowedMap[VReg] = std::move(VRegAllowed); 647 } 648 649 for (auto &KV : VRegAllowedMap) { 650 auto VReg = KV.first; 651 652 // Move empty intervals to the EmptyIntervalVReg set. 653 if (LIS.getInterval(VReg).empty()) { 654 EmptyIntervalVRegs.insert(VReg); 655 VRegsToAlloc.erase(VReg); 656 continue; 657 } 658 659 auto &VRegAllowed = KV.second; 660 661 PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0); 662 663 // Tweak cost of callee saved registers, as using then force spilling and 664 // restoring them. This would only happen in the prologue / epilogue though. 665 for (unsigned i = 0; i != VRegAllowed.size(); ++i) 666 if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF)) 667 NodeCosts[1 + i] += 1.0; 668 669 PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts)); 670 G.getNodeMetadata(NId).setVReg(VReg); 671 G.getNodeMetadata(NId).setAllowedRegs( 672 G.getMetadata().getAllowedRegs(std::move(VRegAllowed))); 673 G.getMetadata().setNodeIdForVReg(VReg, NId); 674 } 675 } 676 677 void RegAllocPBQP::spillVReg(unsigned VReg, 678 SmallVectorImpl<unsigned> &NewIntervals, 679 MachineFunction &MF, LiveIntervals &LIS, 680 VirtRegMap &VRM, Spiller &VRegSpiller) { 681 VRegsToAlloc.erase(VReg); 682 LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM, 683 nullptr, &DeadRemats); 684 VRegSpiller.spill(LRE); 685 686 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 687 (void)TRI; 688 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: " 689 << LRE.getParent().weight << ", New vregs: "); 690 691 // Copy any newly inserted live intervals into the list of regs to 692 // allocate. 693 for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end(); 694 I != E; ++I) { 695 const LiveInterval &LI = LIS.getInterval(*I); 696 assert(!LI.empty() && "Empty spill range."); 697 LLVM_DEBUG(dbgs() << printReg(LI.reg, &TRI) << " "); 698 VRegsToAlloc.insert(LI.reg); 699 } 700 701 LLVM_DEBUG(dbgs() << ")\n"); 702 } 703 704 bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, 705 const PBQP::Solution &Solution, 706 VirtRegMap &VRM, 707 Spiller &VRegSpiller) { 708 MachineFunction &MF = G.getMetadata().MF; 709 LiveIntervals &LIS = G.getMetadata().LIS; 710 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 711 (void)TRI; 712 713 // Set to true if we have any spills 714 bool AnotherRoundNeeded = false; 715 716 // Clear the existing allocation. 717 VRM.clearAllVirt(); 718 719 // Iterate over the nodes mapping the PBQP solution to a register 720 // assignment. 721 for (auto NId : G.nodeIds()) { 722 unsigned VReg = G.getNodeMetadata(NId).getVReg(); 723 unsigned AllocOption = Solution.getSelection(NId); 724 725 if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) { 726 unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1]; 727 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> " 728 << TRI.getName(PReg) << "\n"); 729 assert(PReg != 0 && "Invalid preg selected."); 730 VRM.assignVirt2Phys(VReg, PReg); 731 } else { 732 // Spill VReg. If this introduces new intervals we'll need another round 733 // of allocation. 734 SmallVector<unsigned, 8> NewVRegs; 735 spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); 736 AnotherRoundNeeded |= !NewVRegs.empty(); 737 } 738 } 739 740 return !AnotherRoundNeeded; 741 } 742 743 void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, 744 LiveIntervals &LIS, 745 VirtRegMap &VRM) const { 746 MachineRegisterInfo &MRI = MF.getRegInfo(); 747 748 // First allocate registers for the empty intervals. 749 for (RegSet::const_iterator 750 I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end(); 751 I != E; ++I) { 752 LiveInterval &LI = LIS.getInterval(*I); 753 754 unsigned PReg = MRI.getSimpleHint(LI.reg); 755 756 if (PReg == 0) { 757 const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg); 758 const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF); 759 for (unsigned CandidateReg : RawPRegOrder) { 760 if (!VRM.getRegInfo().isReserved(CandidateReg)) { 761 PReg = CandidateReg; 762 break; 763 } 764 } 765 assert(PReg && 766 "No un-reserved physical registers in this register class"); 767 } 768 769 VRM.assignVirt2Phys(LI.reg, PReg); 770 } 771 } 772 773 void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) { 774 VRegSpiller.postOptimization(); 775 /// Remove dead defs because of rematerialization. 776 for (auto DeadInst : DeadRemats) { 777 LIS.RemoveMachineInstrFromMaps(*DeadInst); 778 DeadInst->eraseFromParent(); 779 } 780 DeadRemats.clear(); 781 } 782 783 static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size, 784 unsigned NumInstr) { 785 // All intervals have a spill weight that is mostly proportional to the number 786 // of uses, with uses in loops having a bigger weight. 787 return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1); 788 } 789 790 bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { 791 LiveIntervals &LIS = getAnalysis<LiveIntervals>(); 792 MachineBlockFrequencyInfo &MBFI = 793 getAnalysis<MachineBlockFrequencyInfo>(); 794 795 VirtRegMap &VRM = getAnalysis<VirtRegMap>(); 796 797 calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(), 798 MBFI, normalizePBQPSpillWeight); 799 800 std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM)); 801 802 MF.getRegInfo().freezeReservedRegs(MF); 803 804 LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n"); 805 806 // Allocator main loop: 807 // 808 // * Map current regalloc problem to a PBQP problem 809 // * Solve the PBQP problem 810 // * Map the solution back to a register allocation 811 // * Spill if necessary 812 // 813 // This process is continued till no more spills are generated. 814 815 // Find the vreg intervals in need of allocation. 816 findVRegIntervalsToAlloc(MF, LIS); 817 818 #ifndef NDEBUG 819 const Function &F = MF.getFunction(); 820 std::string FullyQualifiedName = 821 F.getParent()->getModuleIdentifier() + "." + F.getName().str(); 822 #endif 823 824 // If there are non-empty intervals allocate them using pbqp. 825 if (!VRegsToAlloc.empty()) { 826 const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); 827 std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot = 828 llvm::make_unique<PBQPRAConstraintList>(); 829 ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>()); 830 ConstraintsRoot->addConstraint(llvm::make_unique<Interference>()); 831 if (PBQPCoalescing) 832 ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>()); 833 ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints()); 834 835 bool PBQPAllocComplete = false; 836 unsigned Round = 0; 837 838 while (!PBQPAllocComplete) { 839 LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); 840 841 PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI)); 842 initializeGraph(G, VRM, *VRegSpiller); 843 ConstraintsRoot->apply(G); 844 845 #ifndef NDEBUG 846 if (PBQPDumpGraphs) { 847 std::ostringstream RS; 848 RS << Round; 849 std::string GraphFileName = FullyQualifiedName + "." + RS.str() + 850 ".pbqpgraph"; 851 std::error_code EC; 852 raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); 853 LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" 854 << GraphFileName << "\"\n"); 855 G.dump(OS); 856 } 857 #endif 858 859 PBQP::Solution Solution = PBQP::RegAlloc::solve(G); 860 PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller); 861 ++Round; 862 } 863 } 864 865 // Finalise allocation, allocate empty ranges. 866 finalizeAlloc(MF, LIS, VRM); 867 postOptimization(*VRegSpiller, LIS); 868 VRegsToAlloc.clear(); 869 EmptyIntervalVRegs.clear(); 870 871 LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n"); 872 873 return true; 874 } 875 876 /// Create Printable object for node and register info. 877 static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, 878 const PBQP::RegAlloc::PBQPRAGraph &G) { 879 return Printable([NId, &G](raw_ostream &OS) { 880 const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); 881 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); 882 unsigned VReg = G.getNodeMetadata(NId).getVReg(); 883 const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); 884 OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')'; 885 }); 886 } 887 888 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 889 LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { 890 for (auto NId : nodeIds()) { 891 const Vector &Costs = getNodeCosts(NId); 892 assert(Costs.getLength() != 0 && "Empty vector in graph."); 893 OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n'; 894 } 895 OS << '\n'; 896 897 for (auto EId : edgeIds()) { 898 NodeId N1Id = getEdgeNode1Id(EId); 899 NodeId N2Id = getEdgeNode2Id(EId); 900 assert(N1Id != N2Id && "PBQP graphs should not have self-edges."); 901 const Matrix &M = getEdgeCosts(EId); 902 assert(M.getRows() != 0 && "No rows in matrix."); 903 assert(M.getCols() != 0 && "No cols in matrix."); 904 OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / "; 905 OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n"; 906 OS << M << '\n'; 907 } 908 } 909 910 LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const { 911 dump(dbgs()); 912 } 913 #endif 914 915 void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const { 916 OS << "graph {\n"; 917 for (auto NId : nodeIds()) { 918 OS << " node" << NId << " [ label=\"" 919 << PrintNodeInfo(NId, *this) << "\\n" 920 << getNodeCosts(NId) << "\" ]\n"; 921 } 922 923 OS << " edge [ len=" << nodeIds().size() << " ]\n"; 924 for (auto EId : edgeIds()) { 925 OS << " node" << getEdgeNode1Id(EId) 926 << " -- node" << getEdgeNode2Id(EId) 927 << " [ label=\""; 928 const Matrix &EdgeCosts = getEdgeCosts(EId); 929 for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) { 930 OS << EdgeCosts.getRowAsVector(i) << "\\n"; 931 } 932 OS << "\" ]\n"; 933 } 934 OS << "}\n"; 935 } 936 937 FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) { 938 return new RegAllocPBQP(customPassID); 939 } 940 941 FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { 942 return createPBQPRegisterAllocator(); 943 } 944