1 //===- HexagonGenInsert.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "BitTracker.h" 10 #include "HexagonBitTracker.h" 11 #include "HexagonInstrInfo.h" 12 #include "HexagonRegisterInfo.h" 13 #include "HexagonSubtarget.h" 14 #include "llvm/ADT/BitVector.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/GraphTraits.h" 17 #include "llvm/ADT/PostOrderIterator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SmallSet.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/CodeGen/MachineBasicBlock.h" 23 #include "llvm/CodeGen/MachineDominators.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/TargetRegisterInfo.h" 31 #include "llvm/IR/DebugLoc.h" 32 #include "llvm/InitializePasses.h" 33 #include "llvm/Pass.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/Timer.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <algorithm> 40 #include <cassert> 41 #include <cstdint> 42 #include <iterator> 43 #include <utility> 44 #include <vector> 45 46 #define DEBUG_TYPE "hexinsert" 47 48 using namespace llvm; 49 50 static cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), 51 cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation.")); 52 // The distance cutoff is selected based on the precheckin-perf results: 53 // cutoffs 20, 25, 35, and 40 are worse than 30. 54 static cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U), 55 cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert " 56 "generation.")); 57 58 // Limit the container sizes for extreme cases where we run out of memory. 59 static cl::opt<unsigned> MaxORLSize("insert-max-orl", cl::init(4096), 60 cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of OrderedRegisterList")); 61 static cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024), 62 cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of IFMap")); 63 64 static cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden, 65 cl::ZeroOrMore, cl::desc("Enable timing of insert generation")); 66 static cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false), 67 cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert " 68 "generation")); 69 70 static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden, 71 cl::ZeroOrMore); 72 static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden, 73 cl::ZeroOrMore); 74 // Whether to construct constant values via "insert". Could eliminate constant 75 // extenders, but often not practical. 76 static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden, 77 cl::ZeroOrMore); 78 79 // The preprocessor gets confused when the DEBUG macro is passed larger 80 // chunks of code. Use this function to detect debugging. 81 inline static bool isDebug() { 82 #ifndef NDEBUG 83 return DebugFlag && isCurrentDebugType(DEBUG_TYPE); 84 #else 85 return false; 86 #endif 87 } 88 89 namespace { 90 91 // Set of virtual registers, based on BitVector. 92 struct RegisterSet : private BitVector { 93 RegisterSet() = default; 94 explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} 95 RegisterSet(const RegisterSet &RS) = default; 96 RegisterSet &operator=(const RegisterSet &RS) = default; 97 98 using BitVector::clear; 99 100 unsigned find_first() const { 101 int First = BitVector::find_first(); 102 if (First < 0) 103 return 0; 104 return x2v(First); 105 } 106 107 unsigned find_next(unsigned Prev) const { 108 int Next = BitVector::find_next(v2x(Prev)); 109 if (Next < 0) 110 return 0; 111 return x2v(Next); 112 } 113 114 RegisterSet &insert(unsigned R) { 115 unsigned Idx = v2x(R); 116 ensure(Idx); 117 return static_cast<RegisterSet&>(BitVector::set(Idx)); 118 } 119 RegisterSet &remove(unsigned R) { 120 unsigned Idx = v2x(R); 121 if (Idx >= size()) 122 return *this; 123 return static_cast<RegisterSet&>(BitVector::reset(Idx)); 124 } 125 126 RegisterSet &insert(const RegisterSet &Rs) { 127 return static_cast<RegisterSet&>(BitVector::operator|=(Rs)); 128 } 129 RegisterSet &remove(const RegisterSet &Rs) { 130 return static_cast<RegisterSet&>(BitVector::reset(Rs)); 131 } 132 133 reference operator[](unsigned R) { 134 unsigned Idx = v2x(R); 135 ensure(Idx); 136 return BitVector::operator[](Idx); 137 } 138 bool operator[](unsigned R) const { 139 unsigned Idx = v2x(R); 140 assert(Idx < size()); 141 return BitVector::operator[](Idx); 142 } 143 bool has(unsigned R) const { 144 unsigned Idx = v2x(R); 145 if (Idx >= size()) 146 return false; 147 return BitVector::test(Idx); 148 } 149 150 bool empty() const { 151 return !BitVector::any(); 152 } 153 bool includes(const RegisterSet &Rs) const { 154 // A.BitVector::test(B) <=> A-B != {} 155 return !Rs.BitVector::test(*this); 156 } 157 bool intersects(const RegisterSet &Rs) const { 158 return BitVector::anyCommon(Rs); 159 } 160 161 private: 162 void ensure(unsigned Idx) { 163 if (size() <= Idx) 164 resize(std::max(Idx+1, 32U)); 165 } 166 167 static inline unsigned v2x(unsigned v) { 168 return Register::virtReg2Index(v); 169 } 170 171 static inline unsigned x2v(unsigned x) { 172 return Register::index2VirtReg(x); 173 } 174 }; 175 176 struct PrintRegSet { 177 PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) 178 : RS(S), TRI(RI) {} 179 180 friend raw_ostream &operator<< (raw_ostream &OS, 181 const PrintRegSet &P); 182 183 private: 184 const RegisterSet &RS; 185 const TargetRegisterInfo *TRI; 186 }; 187 188 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { 189 OS << '{'; 190 for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) 191 OS << ' ' << printReg(R, P.TRI); 192 OS << " }"; 193 return OS; 194 } 195 196 // A convenience class to associate unsigned numbers (such as virtual 197 // registers) with unsigned numbers. 198 struct UnsignedMap : public DenseMap<unsigned,unsigned> { 199 UnsignedMap() = default; 200 201 private: 202 using BaseType = DenseMap<unsigned, unsigned>; 203 }; 204 205 // A utility to establish an ordering between virtual registers: 206 // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB] 207 // This is meant as a cache for the ordering of virtual registers defined 208 // by a potentially expensive comparison function, or obtained by a proce- 209 // dure that should not be repeated each time two registers are compared. 210 struct RegisterOrdering : public UnsignedMap { 211 RegisterOrdering() = default; 212 213 unsigned operator[](unsigned VR) const { 214 const_iterator F = find(VR); 215 assert(F != end()); 216 return F->second; 217 } 218 219 // Add operator(), so that objects of this class can be used as 220 // comparators in std::sort et al. 221 bool operator() (unsigned VR1, unsigned VR2) const { 222 return operator[](VR1) < operator[](VR2); 223 } 224 }; 225 226 // Ordering of bit values. This class does not have operator[], but 227 // is supplies a comparison operator() for use in std:: algorithms. 228 // The order is as follows: 229 // - 0 < 1 < ref 230 // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg), 231 // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos. 232 struct BitValueOrdering { 233 BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {} 234 235 bool operator() (const BitTracker::BitValue &V1, 236 const BitTracker::BitValue &V2) const; 237 238 const RegisterOrdering &BaseOrd; 239 }; 240 241 } // end anonymous namespace 242 243 bool BitValueOrdering::operator() (const BitTracker::BitValue &V1, 244 const BitTracker::BitValue &V2) const { 245 if (V1 == V2) 246 return false; 247 // V1==0 => true, V2==0 => false 248 if (V1.is(0) || V2.is(0)) 249 return V1.is(0); 250 // Neither of V1,V2 is 0, and V1!=V2. 251 // V2==1 => false, V1==1 => true 252 if (V2.is(1) || V1.is(1)) 253 return !V2.is(1); 254 // Both V1,V2 are refs. 255 unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg]; 256 if (Ind1 != Ind2) 257 return Ind1 < Ind2; 258 // If V1.Pos==V2.Pos 259 assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different"); 260 return V1.RefI.Pos < V2.RefI.Pos; 261 } 262 263 namespace { 264 265 // Cache for the BitTracker's cell map. Map lookup has a logarithmic 266 // complexity, this class will memoize the lookup results to reduce 267 // the access time for repeated lookups of the same cell. 268 struct CellMapShadow { 269 CellMapShadow(const BitTracker &T) : BT(T) {} 270 271 const BitTracker::RegisterCell &lookup(unsigned VR) { 272 unsigned RInd = Register::virtReg2Index(VR); 273 // Grow the vector to at least 32 elements. 274 if (RInd >= CVect.size()) 275 CVect.resize(std::max(RInd+16, 32U), nullptr); 276 const BitTracker::RegisterCell *CP = CVect[RInd]; 277 if (CP == nullptr) 278 CP = CVect[RInd] = &BT.lookup(VR); 279 return *CP; 280 } 281 282 const BitTracker &BT; 283 284 private: 285 using CellVectType = std::vector<const BitTracker::RegisterCell *>; 286 287 CellVectType CVect; 288 }; 289 290 // Comparator class for lexicographic ordering of virtual registers 291 // according to the corresponding BitTracker::RegisterCell objects. 292 struct RegisterCellLexCompare { 293 RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M) 294 : BitOrd(BO), CM(M) {} 295 296 bool operator() (unsigned VR1, unsigned VR2) const; 297 298 private: 299 const BitValueOrdering &BitOrd; 300 CellMapShadow &CM; 301 }; 302 303 // Comparator class for lexicographic ordering of virtual registers 304 // according to the specified bits of the corresponding BitTracker:: 305 // RegisterCell objects. 306 // Specifically, this class will be used to compare bit B of a register 307 // cell for a selected virtual register R with bit N of any register 308 // other than R. 309 struct RegisterCellBitCompareSel { 310 RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N, 311 const BitValueOrdering &BO, CellMapShadow &M) 312 : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {} 313 314 bool operator() (unsigned VR1, unsigned VR2) const; 315 316 private: 317 const unsigned SelR, SelB; 318 const unsigned BitN; 319 const BitValueOrdering &BitOrd; 320 CellMapShadow &CM; 321 }; 322 323 } // end anonymous namespace 324 325 bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const { 326 // Ordering of registers, made up from two given orderings: 327 // - the ordering of the register numbers, and 328 // - the ordering of register cells. 329 // Def. R1 < R2 if: 330 // - cell(R1) < cell(R2), or 331 // - cell(R1) == cell(R2), and index(R1) < index(R2). 332 // 333 // For register cells, the ordering is lexicographic, with index 0 being 334 // the most significant. 335 if (VR1 == VR2) 336 return false; 337 338 const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2); 339 uint16_t W1 = RC1.width(), W2 = RC2.width(); 340 for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) { 341 const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i]; 342 if (V1 != V2) 343 return BitOrd(V1, V2); 344 } 345 // Cells are equal up until the common length. 346 if (W1 != W2) 347 return W1 < W2; 348 349 return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2]; 350 } 351 352 bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const { 353 if (VR1 == VR2) 354 return false; 355 const BitTracker::RegisterCell &RC1 = CM.lookup(VR1); 356 const BitTracker::RegisterCell &RC2 = CM.lookup(VR2); 357 uint16_t W1 = RC1.width(), W2 = RC2.width(); 358 uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN; 359 uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN; 360 // If Bit1 exceeds the width of VR1, then: 361 // - return false, if at the same time Bit2 exceeds VR2, or 362 // - return true, otherwise. 363 // (I.e. "a bit value that does not exist is less than any bit value 364 // that does exist".) 365 if (W1 <= Bit1) 366 return Bit2 < W2; 367 // If Bit1 is within VR1, but Bit2 is not within VR2, return false. 368 if (W2 <= Bit2) 369 return false; 370 371 const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2]; 372 if (V1 != V2) 373 return BitOrd(V1, V2); 374 return false; 375 } 376 377 namespace { 378 379 class OrderedRegisterList { 380 using ListType = std::vector<unsigned>; 381 const unsigned MaxSize; 382 383 public: 384 OrderedRegisterList(const RegisterOrdering &RO) 385 : MaxSize(MaxORLSize), Ord(RO) {} 386 387 void insert(unsigned VR); 388 void remove(unsigned VR); 389 390 unsigned operator[](unsigned Idx) const { 391 assert(Idx < Seq.size()); 392 return Seq[Idx]; 393 } 394 395 unsigned size() const { 396 return Seq.size(); 397 } 398 399 using iterator = ListType::iterator; 400 using const_iterator = ListType::const_iterator; 401 402 iterator begin() { return Seq.begin(); } 403 iterator end() { return Seq.end(); } 404 const_iterator begin() const { return Seq.begin(); } 405 const_iterator end() const { return Seq.end(); } 406 407 // Convenience function to convert an iterator to the corresponding index. 408 unsigned idx(iterator It) const { return It-begin(); } 409 410 private: 411 ListType Seq; 412 const RegisterOrdering &Ord; 413 }; 414 415 struct PrintORL { 416 PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI) 417 : RL(L), TRI(RI) {} 418 419 friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P); 420 421 private: 422 const OrderedRegisterList &RL; 423 const TargetRegisterInfo *TRI; 424 }; 425 426 raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) { 427 OS << '('; 428 OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end(); 429 for (OrderedRegisterList::const_iterator I = B; I != E; ++I) { 430 if (I != B) 431 OS << ", "; 432 OS << printReg(*I, P.TRI); 433 } 434 OS << ')'; 435 return OS; 436 } 437 438 } // end anonymous namespace 439 440 void OrderedRegisterList::insert(unsigned VR) { 441 iterator L = llvm::lower_bound(Seq, VR, Ord); 442 if (L == Seq.end()) 443 Seq.push_back(VR); 444 else 445 Seq.insert(L, VR); 446 447 unsigned S = Seq.size(); 448 if (S > MaxSize) 449 Seq.resize(MaxSize); 450 assert(Seq.size() <= MaxSize); 451 } 452 453 void OrderedRegisterList::remove(unsigned VR) { 454 iterator L = llvm::lower_bound(Seq, VR, Ord); 455 if (L != Seq.end()) 456 Seq.erase(L); 457 } 458 459 namespace { 460 461 // A record of the insert form. The fields correspond to the operands 462 // of the "insert" instruction: 463 // ... = insert(SrcR, InsR, #Wdh, #Off) 464 struct IFRecord { 465 IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0) 466 : SrcR(SR), InsR(IR), Wdh(W), Off(O) {} 467 468 unsigned SrcR, InsR; 469 uint16_t Wdh, Off; 470 }; 471 472 struct PrintIFR { 473 PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI) 474 : IFR(R), TRI(RI) {} 475 476 private: 477 friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P); 478 479 const IFRecord &IFR; 480 const TargetRegisterInfo *TRI; 481 }; 482 483 raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) { 484 unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR; 485 OS << '(' << printReg(SrcR, P.TRI) << ',' << printReg(InsR, P.TRI) 486 << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')'; 487 return OS; 488 } 489 490 using IFRecordWithRegSet = std::pair<IFRecord, RegisterSet>; 491 492 } // end anonymous namespace 493 494 namespace llvm { 495 496 void initializeHexagonGenInsertPass(PassRegistry&); 497 FunctionPass *createHexagonGenInsert(); 498 499 } // end namespace llvm 500 501 namespace { 502 503 class HexagonGenInsert : public MachineFunctionPass { 504 public: 505 static char ID; 506 507 HexagonGenInsert() : MachineFunctionPass(ID) { 508 initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry()); 509 } 510 511 StringRef getPassName() const override { 512 return "Hexagon generate \"insert\" instructions"; 513 } 514 515 void getAnalysisUsage(AnalysisUsage &AU) const override { 516 AU.addRequired<MachineDominatorTree>(); 517 AU.addPreserved<MachineDominatorTree>(); 518 MachineFunctionPass::getAnalysisUsage(AU); 519 } 520 521 bool runOnMachineFunction(MachineFunction &MF) override; 522 523 private: 524 using PairMapType = DenseMap<std::pair<unsigned, unsigned>, unsigned>; 525 526 void buildOrderingMF(RegisterOrdering &RO) const; 527 void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const; 528 bool isIntClass(const TargetRegisterClass *RC) const; 529 bool isConstant(unsigned VR) const; 530 bool isSmallConstant(unsigned VR) const; 531 bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR, 532 uint16_t L, uint16_t S) const; 533 bool findSelfReference(unsigned VR) const; 534 bool findNonSelfReference(unsigned VR) const; 535 void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const; 536 void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const; 537 unsigned distance(const MachineBasicBlock *FromB, 538 const MachineBasicBlock *ToB, const UnsignedMap &RPO, 539 PairMapType &M) const; 540 unsigned distance(MachineBasicBlock::const_iterator FromI, 541 MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, 542 PairMapType &M) const; 543 bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs); 544 void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs); 545 void findRemovableRegisters(unsigned VR, IFRecord IF, 546 RegisterSet &RMs) const; 547 void computeRemovableRegisters(); 548 549 void pruneEmptyLists(); 550 void pruneCoveredSets(unsigned VR); 551 void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M); 552 void pruneRegCopies(unsigned VR); 553 void pruneCandidates(); 554 void selectCandidates(); 555 bool generateInserts(); 556 557 bool removeDeadCode(MachineDomTreeNode *N); 558 559 // IFRecord coupled with a set of potentially removable registers: 560 using IFListType = std::vector<IFRecordWithRegSet>; 561 using IFMapType = DenseMap<unsigned, IFListType>; // vreg -> IFListType 562 563 void dump_map() const; 564 565 const HexagonInstrInfo *HII = nullptr; 566 const HexagonRegisterInfo *HRI = nullptr; 567 568 MachineFunction *MFN; 569 MachineRegisterInfo *MRI; 570 MachineDominatorTree *MDT; 571 CellMapShadow *CMS; 572 573 RegisterOrdering BaseOrd; 574 RegisterOrdering CellOrd; 575 IFMapType IFMap; 576 }; 577 578 } // end anonymous namespace 579 580 char HexagonGenInsert::ID = 0; 581 582 void HexagonGenInsert::dump_map() const { 583 for (const auto &I : IFMap) { 584 dbgs() << " " << printReg(I.first, HRI) << ":\n"; 585 const IFListType &LL = I.second; 586 for (const auto &J : LL) 587 dbgs() << " " << PrintIFR(J.first, HRI) << ", " 588 << PrintRegSet(J.second, HRI) << '\n'; 589 } 590 } 591 592 void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const { 593 unsigned Index = 0; 594 595 for (const MachineBasicBlock &B : *MFN) { 596 if (!CMS->BT.reached(&B)) 597 continue; 598 599 for (const MachineInstr &MI : B) { 600 for (const MachineOperand &MO : MI.operands()) { 601 if (MO.isReg() && MO.isDef()) { 602 Register R = MO.getReg(); 603 assert(MO.getSubReg() == 0 && "Unexpected subregister in definition"); 604 if (R.isVirtual()) 605 RO.insert(std::make_pair(R, Index++)); 606 } 607 } 608 } 609 } 610 // Since some virtual registers may have had their def and uses eliminated, 611 // they are no longer referenced in the code, and so they will not appear 612 // in the map. 613 } 614 615 void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB, 616 RegisterOrdering &RO) const { 617 // Create a vector of all virtual registers (collect them from the base 618 // ordering RB), and then sort it using the RegisterCell comparator. 619 BitValueOrdering BVO(RB); 620 RegisterCellLexCompare LexCmp(BVO, *CMS); 621 622 using SortableVectorType = std::vector<unsigned>; 623 624 SortableVectorType VRs; 625 for (auto &I : RB) 626 VRs.push_back(I.first); 627 llvm::sort(VRs, LexCmp); 628 // Transfer the results to the outgoing register ordering. 629 for (unsigned i = 0, n = VRs.size(); i < n; ++i) 630 RO.insert(std::make_pair(VRs[i], i)); 631 } 632 633 inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const { 634 return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass; 635 } 636 637 bool HexagonGenInsert::isConstant(unsigned VR) const { 638 const BitTracker::RegisterCell &RC = CMS->lookup(VR); 639 uint16_t W = RC.width(); 640 for (uint16_t i = 0; i < W; ++i) { 641 const BitTracker::BitValue &BV = RC[i]; 642 if (BV.is(0) || BV.is(1)) 643 continue; 644 return false; 645 } 646 return true; 647 } 648 649 bool HexagonGenInsert::isSmallConstant(unsigned VR) const { 650 const BitTracker::RegisterCell &RC = CMS->lookup(VR); 651 uint16_t W = RC.width(); 652 if (W > 64) 653 return false; 654 uint64_t V = 0, B = 1; 655 for (uint16_t i = 0; i < W; ++i) { 656 const BitTracker::BitValue &BV = RC[i]; 657 if (BV.is(1)) 658 V |= B; 659 else if (!BV.is(0)) 660 return false; 661 B <<= 1; 662 } 663 664 // For 32-bit registers, consider: Rd = #s16. 665 if (W == 32) 666 return isInt<16>(V); 667 668 // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8) 669 return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V)); 670 } 671 672 bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR, 673 unsigned InsR, uint16_t L, uint16_t S) const { 674 const TargetRegisterClass *DstRC = MRI->getRegClass(DstR); 675 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR); 676 const TargetRegisterClass *InsRC = MRI->getRegClass(InsR); 677 // Only integet (32-/64-bit) register classes. 678 if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC)) 679 return false; 680 // The "source" register must be of the same class as DstR. 681 if (DstRC != SrcRC) 682 return false; 683 if (DstRC == InsRC) 684 return true; 685 // A 64-bit register can only be generated from other 64-bit registers. 686 if (DstRC == &Hexagon::DoubleRegsRegClass) 687 return false; 688 // Otherwise, the L and S cannot span 32-bit word boundary. 689 if (S < 32 && S+L > 32) 690 return false; 691 return true; 692 } 693 694 bool HexagonGenInsert::findSelfReference(unsigned VR) const { 695 const BitTracker::RegisterCell &RC = CMS->lookup(VR); 696 for (uint16_t i = 0, w = RC.width(); i < w; ++i) { 697 const BitTracker::BitValue &V = RC[i]; 698 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR) 699 return true; 700 } 701 return false; 702 } 703 704 bool HexagonGenInsert::findNonSelfReference(unsigned VR) const { 705 BitTracker::RegisterCell RC = CMS->lookup(VR); 706 for (uint16_t i = 0, w = RC.width(); i < w; ++i) { 707 const BitTracker::BitValue &V = RC[i]; 708 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR) 709 return true; 710 } 711 return false; 712 } 713 714 void HexagonGenInsert::getInstrDefs(const MachineInstr *MI, 715 RegisterSet &Defs) const { 716 for (const MachineOperand &MO : MI->operands()) { 717 if (!MO.isReg() || !MO.isDef()) 718 continue; 719 Register R = MO.getReg(); 720 if (!R.isVirtual()) 721 continue; 722 Defs.insert(R); 723 } 724 } 725 726 void HexagonGenInsert::getInstrUses(const MachineInstr *MI, 727 RegisterSet &Uses) const { 728 for (const MachineOperand &MO : MI->operands()) { 729 if (!MO.isReg() || !MO.isUse()) 730 continue; 731 Register R = MO.getReg(); 732 if (!R.isVirtual()) 733 continue; 734 Uses.insert(R); 735 } 736 } 737 738 unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB, 739 const MachineBasicBlock *ToB, const UnsignedMap &RPO, 740 PairMapType &M) const { 741 // Forward distance from the end of a block to the beginning of it does 742 // not make sense. This function should not be called with FromB == ToB. 743 assert(FromB != ToB); 744 745 unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber(); 746 // If we have already computed it, return the cached result. 747 PairMapType::iterator F = M.find(std::make_pair(FromN, ToN)); 748 if (F != M.end()) 749 return F->second; 750 unsigned ToRPO = RPO.lookup(ToN); 751 752 unsigned MaxD = 0; 753 754 for (const MachineBasicBlock *PB : ToB->predecessors()) { 755 // Skip back edges. Also, if FromB is a predecessor of ToB, the distance 756 // along that path will be 0, and we don't need to do any calculations 757 // on it. 758 if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO) 759 continue; 760 unsigned D = PB->size() + distance(FromB, PB, RPO, M); 761 if (D > MaxD) 762 MaxD = D; 763 } 764 765 // Memoize the result for later lookup. 766 M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD)); 767 return MaxD; 768 } 769 770 unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI, 771 MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, 772 PairMapType &M) const { 773 const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent(); 774 if (FB == TB) 775 return std::distance(FromI, ToI); 776 unsigned D1 = std::distance(TB->begin(), ToI); 777 unsigned D2 = distance(FB, TB, RPO, M); 778 unsigned D3 = std::distance(FromI, FB->end()); 779 return D1+D2+D3; 780 } 781 782 bool HexagonGenInsert::findRecordInsertForms(unsigned VR, 783 OrderedRegisterList &AVs) { 784 if (isDebug()) { 785 dbgs() << __func__ << ": " << printReg(VR, HRI) 786 << " AVs: " << PrintORL(AVs, HRI) << "\n"; 787 } 788 if (AVs.size() == 0) 789 return false; 790 791 using iterator = OrderedRegisterList::iterator; 792 793 BitValueOrdering BVO(BaseOrd); 794 const BitTracker::RegisterCell &RC = CMS->lookup(VR); 795 uint16_t W = RC.width(); 796 797 using RSRecord = std::pair<unsigned, uint16_t>; // (reg,shift) 798 using RSListType = std::vector<RSRecord>; 799 // Have a map, with key being the matching prefix length, and the value 800 // being the list of pairs (R,S), where R's prefix matches VR at S. 801 // (DenseMap<uint16_t,RSListType> fails to instantiate.) 802 using LRSMapType = DenseMap<unsigned, RSListType>; 803 LRSMapType LM; 804 805 // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S, 806 // and find matching prefixes from AVs with the rotated RC. Such a prefix 807 // would match a string of bits (of length L) in RC starting at S. 808 for (uint16_t S = 0; S < W; ++S) { 809 iterator B = AVs.begin(), E = AVs.end(); 810 // The registers in AVs are ordered according to the lexical order of 811 // the corresponding register cells. This means that the range of regis- 812 // ters in AVs that match a prefix of length L+1 will be contained in 813 // the range that matches a prefix of length L. This means that we can 814 // keep narrowing the search space as the prefix length goes up. This 815 // helps reduce the overall complexity of the search. 816 uint16_t L; 817 for (L = 0; L < W-S; ++L) { 818 // Compare against VR's bits starting at S, which emulates rotation 819 // of VR by S. 820 RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS); 821 iterator NewB = std::lower_bound(B, E, VR, RCB); 822 iterator NewE = std::upper_bound(NewB, E, VR, RCB); 823 // For the registers that are eliminated from the next range, L is 824 // the longest prefix matching VR at position S (their prefixes 825 // differ from VR at S+L). If L>0, record this information for later 826 // use. 827 if (L > 0) { 828 for (iterator I = B; I != NewB; ++I) 829 LM[L].push_back(std::make_pair(*I, S)); 830 for (iterator I = NewE; I != E; ++I) 831 LM[L].push_back(std::make_pair(*I, S)); 832 } 833 B = NewB, E = NewE; 834 if (B == E) 835 break; 836 } 837 // Record the final register range. If this range is non-empty, then 838 // L=W-S. 839 assert(B == E || L == W-S); 840 if (B != E) { 841 for (iterator I = B; I != E; ++I) 842 LM[L].push_back(std::make_pair(*I, S)); 843 // If B!=E, then we found a range of registers whose prefixes cover the 844 // rest of VR from position S. There is no need to further advance S. 845 break; 846 } 847 } 848 849 if (isDebug()) { 850 dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n"; 851 for (const auto &I : LM) { 852 dbgs() << " L=" << I.first << ':'; 853 const RSListType &LL = I.second; 854 for (const auto &J : LL) 855 dbgs() << " (" << printReg(J.first, HRI) << ",@" << J.second << ')'; 856 dbgs() << '\n'; 857 } 858 } 859 860 bool Recorded = false; 861 862 for (unsigned SrcR : AVs) { 863 int FDi = -1, LDi = -1; // First/last different bit. 864 const BitTracker::RegisterCell &AC = CMS->lookup(SrcR); 865 uint16_t AW = AC.width(); 866 for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) { 867 if (RC[i] == AC[i]) 868 continue; 869 if (FDi == -1) 870 FDi = i; 871 LDi = i; 872 } 873 if (FDi == -1) 874 continue; // TODO (future): Record identical registers. 875 // Look for a register whose prefix could patch the range [FD..LD] 876 // where VR and SrcR differ. 877 uint16_t FD = FDi, LD = LDi; // Switch to unsigned type. 878 uint16_t MinL = LD-FD+1; 879 for (uint16_t L = MinL; L < W; ++L) { 880 LRSMapType::iterator F = LM.find(L); 881 if (F == LM.end()) 882 continue; 883 RSListType &LL = F->second; 884 for (const auto &I : LL) { 885 uint16_t S = I.second; 886 // MinL is the minimum length of the prefix. Any length above MinL 887 // allows some flexibility as to where the prefix can start: 888 // given the extra length EL=L-MinL, the prefix must start between 889 // max(0,FD-EL) and FD. 890 if (S > FD) // Starts too late. 891 continue; 892 uint16_t EL = L-MinL; 893 uint16_t LowS = (EL < FD) ? FD-EL : 0; 894 if (S < LowS) // Starts too early. 895 continue; 896 unsigned InsR = I.first; 897 if (!isValidInsertForm(VR, SrcR, InsR, L, S)) 898 continue; 899 if (isDebug()) { 900 dbgs() << printReg(VR, HRI) << " = insert(" << printReg(SrcR, HRI) 901 << ',' << printReg(InsR, HRI) << ",#" << L << ",#" 902 << S << ")\n"; 903 } 904 IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet()); 905 IFMap[VR].push_back(RR); 906 Recorded = true; 907 } 908 } 909 } 910 911 return Recorded; 912 } 913 914 void HexagonGenInsert::collectInBlock(MachineBasicBlock *B, 915 OrderedRegisterList &AVs) { 916 if (isDebug()) 917 dbgs() << "visiting block " << printMBBReference(*B) << "\n"; 918 919 // First, check if this block is reachable at all. If not, the bit tracker 920 // will not have any information about registers in it. 921 if (!CMS->BT.reached(B)) 922 return; 923 924 bool DoConst = OptConst; 925 // Keep a separate set of registers defined in this block, so that we 926 // can remove them from the list of available registers once all DT 927 // successors have been processed. 928 RegisterSet BlockDefs, InsDefs; 929 for (MachineInstr &MI : *B) { 930 InsDefs.clear(); 931 getInstrDefs(&MI, InsDefs); 932 // Leave those alone. They are more transparent than "insert". 933 bool Skip = MI.isCopy() || MI.isRegSequence(); 934 935 if (!Skip) { 936 // Visit all defined registers, and attempt to find the corresponding 937 // "insert" representations. 938 for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) { 939 // Do not collect registers that are known to be compile-time cons- 940 // tants, unless requested. 941 if (!DoConst && isConstant(VR)) 942 continue; 943 // If VR's cell contains a reference to VR, then VR cannot be defined 944 // via "insert". If VR is a constant that can be generated in a single 945 // instruction (without constant extenders), generating it via insert 946 // makes no sense. 947 if (findSelfReference(VR) || isSmallConstant(VR)) 948 continue; 949 950 findRecordInsertForms(VR, AVs); 951 // Stop if the map size is too large. 952 if (IFMap.size() > MaxIFMSize) 953 return; 954 } 955 } 956 957 // Insert the defined registers into the list of available registers 958 // after they have been processed. 959 for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) 960 AVs.insert(VR); 961 BlockDefs.insert(InsDefs); 962 } 963 964 for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) { 965 MachineBasicBlock *SB = DTN->getBlock(); 966 collectInBlock(SB, AVs); 967 } 968 969 for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR)) 970 AVs.remove(VR); 971 } 972 973 void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF, 974 RegisterSet &RMs) const { 975 // For a given register VR and a insert form, find the registers that are 976 // used by the current definition of VR, and which would no longer be 977 // needed for it after the definition of VR is replaced with the insert 978 // form. These are the registers that could potentially become dead. 979 RegisterSet Regs[2]; 980 981 unsigned S = 0; // Register set selector. 982 Regs[S].insert(VR); 983 984 while (!Regs[S].empty()) { 985 // Breadth-first search. 986 unsigned OtherS = 1-S; 987 Regs[OtherS].clear(); 988 for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) { 989 Regs[S].remove(R); 990 if (R == IF.SrcR || R == IF.InsR) 991 continue; 992 // Check if a given register has bits that are references to any other 993 // registers. This is to detect situations where the instruction that 994 // defines register R takes register Q as an operand, but R itself does 995 // not contain any bits from Q. Loads are examples of how this could 996 // happen: 997 // R = load Q 998 // In this case (assuming we do not have any knowledge about the loaded 999 // value), we must not treat R as a "conveyance" of the bits from Q. 1000 // (The information in BT about R's bits would have them as constants, 1001 // in case of zero-extending loads, or refs to R.) 1002 if (!findNonSelfReference(R)) 1003 continue; 1004 RMs.insert(R); 1005 const MachineInstr *DefI = MRI->getVRegDef(R); 1006 assert(DefI); 1007 // Do not iterate past PHI nodes to avoid infinite loops. This can 1008 // make the final set a bit less accurate, but the removable register 1009 // sets are an approximation anyway. 1010 if (DefI->isPHI()) 1011 continue; 1012 getInstrUses(DefI, Regs[OtherS]); 1013 } 1014 S = OtherS; 1015 } 1016 // The register VR is added to the list as a side-effect of the algorithm, 1017 // but it is not "potentially removable". A potentially removable register 1018 // is one that may become unused (dead) after conversion to the insert form 1019 // IF, and obviously VR (or its replacement) will not become dead by apply- 1020 // ing IF. 1021 RMs.remove(VR); 1022 } 1023 1024 void HexagonGenInsert::computeRemovableRegisters() { 1025 for (auto &I : IFMap) { 1026 IFListType &LL = I.second; 1027 for (auto &J : LL) 1028 findRemovableRegisters(I.first, J.first, J.second); 1029 } 1030 } 1031 1032 void HexagonGenInsert::pruneEmptyLists() { 1033 // Remove all entries from the map, where the register has no insert forms 1034 // associated with it. 1035 using IterListType = SmallVector<IFMapType::iterator, 16>; 1036 IterListType Prune; 1037 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1038 if (I->second.empty()) 1039 Prune.push_back(I); 1040 } 1041 for (unsigned i = 0, n = Prune.size(); i < n; ++i) 1042 IFMap.erase(Prune[i]); 1043 } 1044 1045 void HexagonGenInsert::pruneCoveredSets(unsigned VR) { 1046 IFMapType::iterator F = IFMap.find(VR); 1047 assert(F != IFMap.end()); 1048 IFListType &LL = F->second; 1049 1050 // First, examine the IF candidates for register VR whose removable-regis- 1051 // ter sets are empty. This means that a given candidate will not help eli- 1052 // minate any registers, but since "insert" is not a constant-extendable 1053 // instruction, using such a candidate may reduce code size if the defini- 1054 // tion of VR is constant-extended. 1055 // If there exists a candidate with a non-empty set, the ones with empty 1056 // sets will not be used and can be removed. 1057 MachineInstr *DefVR = MRI->getVRegDef(VR); 1058 bool DefEx = HII->isConstExtended(*DefVR); 1059 bool HasNE = false; 1060 for (const auto &I : LL) { 1061 if (I.second.empty()) 1062 continue; 1063 HasNE = true; 1064 break; 1065 } 1066 if (!DefEx || HasNE) { 1067 // The definition of VR is not constant-extended, or there is a candidate 1068 // with a non-empty set. Remove all candidates with empty sets. 1069 auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool { 1070 return IR.second.empty(); 1071 }; 1072 llvm::erase_if(LL, IsEmpty); 1073 } else { 1074 // The definition of VR is constant-extended, and all candidates have 1075 // empty removable-register sets. Pick the maximum candidate, and remove 1076 // all others. The "maximum" does not have any special meaning here, it 1077 // is only so that the candidate that will remain on the list is selec- 1078 // ted deterministically. 1079 IFRecord MaxIF = LL[0].first; 1080 for (unsigned i = 1, n = LL.size(); i < n; ++i) { 1081 // If LL[MaxI] < LL[i], then MaxI = i. 1082 const IFRecord &IF = LL[i].first; 1083 unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR]; 1084 unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR]; 1085 if (M0 > R0) 1086 continue; 1087 if (M0 == R0) { 1088 if (M1 > R1) 1089 continue; 1090 if (M1 == R1) { 1091 if (MaxIF.Wdh > IF.Wdh) 1092 continue; 1093 if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off) 1094 continue; 1095 } 1096 } 1097 // MaxIF < IF. 1098 MaxIF = IF; 1099 } 1100 // Remove everything except the maximum candidate. All register sets 1101 // are empty, so no need to preserve anything. 1102 LL.clear(); 1103 LL.push_back(std::make_pair(MaxIF, RegisterSet())); 1104 } 1105 1106 // Now, remove those whose sets of potentially removable registers are 1107 // contained in another IF candidate for VR. For example, given these 1108 // candidates for %45, 1109 // %45: 1110 // (%44,%41,#9,#8), { %42 } 1111 // (%43,%41,#9,#8), { %42 %44 } 1112 // remove the first one, since it is contained in the second one. 1113 for (unsigned i = 0, n = LL.size(); i < n; ) { 1114 const RegisterSet &RMi = LL[i].second; 1115 unsigned j = 0; 1116 while (j < n) { 1117 if (j != i && LL[j].second.includes(RMi)) 1118 break; 1119 j++; 1120 } 1121 if (j == n) { // RMi not contained in anything else. 1122 i++; 1123 continue; 1124 } 1125 LL.erase(LL.begin()+i); 1126 n = LL.size(); 1127 } 1128 } 1129 1130 void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, 1131 PairMapType &M) { 1132 IFMapType::iterator F = IFMap.find(VR); 1133 assert(F != IFMap.end()); 1134 IFListType &LL = F->second; 1135 unsigned Cutoff = VRegDistCutoff; 1136 const MachineInstr *DefV = MRI->getVRegDef(VR); 1137 1138 for (unsigned i = LL.size(); i > 0; --i) { 1139 unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR; 1140 const MachineInstr *DefS = MRI->getVRegDef(SR); 1141 const MachineInstr *DefI = MRI->getVRegDef(IR); 1142 unsigned DSV = distance(DefS, DefV, RPO, M); 1143 if (DSV < Cutoff) { 1144 unsigned DIV = distance(DefI, DefV, RPO, M); 1145 if (DIV < Cutoff) 1146 continue; 1147 } 1148 LL.erase(LL.begin()+(i-1)); 1149 } 1150 } 1151 1152 void HexagonGenInsert::pruneRegCopies(unsigned VR) { 1153 IFMapType::iterator F = IFMap.find(VR); 1154 assert(F != IFMap.end()); 1155 IFListType &LL = F->second; 1156 1157 auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool { 1158 return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32); 1159 }; 1160 llvm::erase_if(LL, IsCopy); 1161 } 1162 1163 void HexagonGenInsert::pruneCandidates() { 1164 // Remove candidates that are not beneficial, regardless of the final 1165 // selection method. 1166 // First, remove candidates whose potentially removable set is a subset 1167 // of another candidate's set. 1168 for (const auto &I : IFMap) 1169 pruneCoveredSets(I.first); 1170 1171 UnsignedMap RPO; 1172 1173 using RPOTType = ReversePostOrderTraversal<const MachineFunction *>; 1174 1175 RPOTType RPOT(MFN); 1176 unsigned RPON = 0; 1177 for (const auto &I : RPOT) 1178 RPO[I->getNumber()] = RPON++; 1179 1180 PairMapType Memo; // Memoization map for distance calculation. 1181 // Remove candidates that would use registers defined too far away. 1182 for (const auto &I : IFMap) 1183 pruneUsesTooFar(I.first, RPO, Memo); 1184 1185 pruneEmptyLists(); 1186 1187 for (const auto &I : IFMap) 1188 pruneRegCopies(I.first); 1189 } 1190 1191 namespace { 1192 1193 // Class for comparing IF candidates for registers that have multiple of 1194 // them. The smaller the candidate, according to this ordering, the better. 1195 // First, compare the number of zeros in the associated potentially remova- 1196 // ble register sets. "Zero" indicates that the register is very likely to 1197 // become dead after this transformation. 1198 // Second, compare "averages", i.e. use-count per size. The lower wins. 1199 // After that, it does not really matter which one is smaller. Resolve 1200 // the tie in some deterministic way. 1201 struct IFOrdering { 1202 IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO) 1203 : UseC(UC), BaseOrd(BO) {} 1204 1205 bool operator() (const IFRecordWithRegSet &A, 1206 const IFRecordWithRegSet &B) const; 1207 1208 private: 1209 void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, 1210 unsigned &Sum) const; 1211 1212 const UnsignedMap &UseC; 1213 const RegisterOrdering &BaseOrd; 1214 }; 1215 1216 } // end anonymous namespace 1217 1218 bool IFOrdering::operator() (const IFRecordWithRegSet &A, 1219 const IFRecordWithRegSet &B) const { 1220 unsigned SizeA = 0, ZeroA = 0, SumA = 0; 1221 unsigned SizeB = 0, ZeroB = 0, SumB = 0; 1222 stats(A.second, SizeA, ZeroA, SumA); 1223 stats(B.second, SizeB, ZeroB, SumB); 1224 1225 // We will pick the minimum element. The more zeros, the better. 1226 if (ZeroA != ZeroB) 1227 return ZeroA > ZeroB; 1228 // Compare SumA/SizeA with SumB/SizeB, lower is better. 1229 uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA; 1230 if (AvgA != AvgB) 1231 return AvgA < AvgB; 1232 1233 // The sets compare identical so far. Resort to comparing the IF records. 1234 // The actual values don't matter, this is only for determinism. 1235 unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR]; 1236 if (OSA != OSB) 1237 return OSA < OSB; 1238 unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR]; 1239 if (OIA != OIB) 1240 return OIA < OIB; 1241 if (A.first.Wdh != B.first.Wdh) 1242 return A.first.Wdh < B.first.Wdh; 1243 return A.first.Off < B.first.Off; 1244 } 1245 1246 void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, 1247 unsigned &Sum) const { 1248 for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) { 1249 UnsignedMap::const_iterator F = UseC.find(R); 1250 assert(F != UseC.end()); 1251 unsigned UC = F->second; 1252 if (UC == 0) 1253 Zero++; 1254 Sum += UC; 1255 Size++; 1256 } 1257 } 1258 1259 void HexagonGenInsert::selectCandidates() { 1260 // Some registers may have multiple valid candidates. Pick the best one 1261 // (or decide not to use any). 1262 1263 // Compute the "removability" measure of R: 1264 // For each potentially removable register R, record the number of regis- 1265 // ters with IF candidates, where R appears in at least one set. 1266 RegisterSet AllRMs; 1267 UnsignedMap UseC, RemC; 1268 IFMapType::iterator End = IFMap.end(); 1269 1270 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1271 const IFListType &LL = I->second; 1272 RegisterSet TT; 1273 for (const auto &J : LL) 1274 TT.insert(J.second); 1275 for (unsigned R = TT.find_first(); R; R = TT.find_next(R)) 1276 RemC[R]++; 1277 AllRMs.insert(TT); 1278 } 1279 1280 for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) { 1281 using use_iterator = MachineRegisterInfo::use_nodbg_iterator; 1282 using InstrSet = SmallSet<const MachineInstr *, 16>; 1283 1284 InstrSet UIs; 1285 // Count as the number of instructions in which R is used, not the 1286 // number of operands. 1287 use_iterator E = MRI->use_nodbg_end(); 1288 for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I) 1289 UIs.insert(I->getParent()); 1290 unsigned C = UIs.size(); 1291 // Calculate a measure, which is the number of instructions using R, 1292 // minus the "removability" count computed earlier. 1293 unsigned D = RemC[R]; 1294 UseC[R] = (C > D) ? C-D : 0; // doz 1295 } 1296 1297 bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0; 1298 if (!SelectAll0 && !SelectHas0) 1299 SelectAll0 = true; 1300 1301 // The smaller the number UseC for a given register R, the "less used" 1302 // R is aside from the opportunities for removal offered by generating 1303 // "insert" instructions. 1304 // Iterate over the IF map, and for those registers that have multiple 1305 // candidates, pick the minimum one according to IFOrdering. 1306 IFOrdering IFO(UseC, BaseOrd); 1307 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1308 IFListType &LL = I->second; 1309 if (LL.empty()) 1310 continue; 1311 // Get the minimum element, remember it and clear the list. If the 1312 // element found is adequate, we will put it back on the list, other- 1313 // wise the list will remain empty, and the entry for this register 1314 // will be removed (i.e. this register will not be replaced by insert). 1315 IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO); 1316 assert(MinI != LL.end()); 1317 IFRecordWithRegSet M = *MinI; 1318 LL.clear(); 1319 1320 // We want to make sure that this replacement will have a chance to be 1321 // beneficial, and that means that we want to have indication that some 1322 // register will be removed. The most likely registers to be eliminated 1323 // are the use operands in the definition of I->first. Accept/reject a 1324 // candidate based on how many of its uses it can potentially eliminate. 1325 1326 RegisterSet Us; 1327 const MachineInstr *DefI = MRI->getVRegDef(I->first); 1328 getInstrUses(DefI, Us); 1329 bool Accept = false; 1330 1331 if (SelectAll0) { 1332 bool All0 = true; 1333 for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { 1334 if (UseC[R] == 0) 1335 continue; 1336 All0 = false; 1337 break; 1338 } 1339 Accept = All0; 1340 } else if (SelectHas0) { 1341 bool Has0 = false; 1342 for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { 1343 if (UseC[R] != 0) 1344 continue; 1345 Has0 = true; 1346 break; 1347 } 1348 Accept = Has0; 1349 } 1350 if (Accept) 1351 LL.push_back(M); 1352 } 1353 1354 // Remove candidates that add uses of removable registers, unless the 1355 // removable registers are among replacement candidates. 1356 // Recompute the removable registers, since some candidates may have 1357 // been eliminated. 1358 AllRMs.clear(); 1359 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1360 const IFListType &LL = I->second; 1361 if (!LL.empty()) 1362 AllRMs.insert(LL[0].second); 1363 } 1364 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1365 IFListType &LL = I->second; 1366 if (LL.empty()) 1367 continue; 1368 unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR; 1369 if (AllRMs[SR] || AllRMs[IR]) 1370 LL.clear(); 1371 } 1372 1373 pruneEmptyLists(); 1374 } 1375 1376 bool HexagonGenInsert::generateInserts() { 1377 // Create a new register for each one from IFMap, and store them in the 1378 // map. 1379 UnsignedMap RegMap; 1380 for (auto &I : IFMap) { 1381 unsigned VR = I.first; 1382 const TargetRegisterClass *RC = MRI->getRegClass(VR); 1383 Register NewVR = MRI->createVirtualRegister(RC); 1384 RegMap[VR] = NewVR; 1385 } 1386 1387 // We can generate the "insert" instructions using potentially stale re- 1388 // gisters: SrcR and InsR for a given VR may be among other registers that 1389 // are also replaced. This is fine, we will do the mass "rauw" a bit later. 1390 for (auto &I : IFMap) { 1391 MachineInstr *MI = MRI->getVRegDef(I.first); 1392 MachineBasicBlock &B = *MI->getParent(); 1393 DebugLoc DL = MI->getDebugLoc(); 1394 unsigned NewR = RegMap[I.first]; 1395 bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass; 1396 const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert) 1397 : HII->get(Hexagon::S2_insertp); 1398 IFRecord IF = I.second[0].first; 1399 unsigned Wdh = IF.Wdh, Off = IF.Off; 1400 unsigned InsS = 0; 1401 if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) { 1402 InsS = Hexagon::isub_lo; 1403 if (Off >= 32) { 1404 InsS = Hexagon::isub_hi; 1405 Off -= 32; 1406 } 1407 } 1408 // Advance to the proper location for inserting instructions. This could 1409 // be B.end(). 1410 MachineBasicBlock::iterator At = MI; 1411 if (MI->isPHI()) 1412 At = B.getFirstNonPHI(); 1413 1414 BuildMI(B, At, DL, D, NewR) 1415 .addReg(IF.SrcR) 1416 .addReg(IF.InsR, 0, InsS) 1417 .addImm(Wdh) 1418 .addImm(Off); 1419 1420 MRI->clearKillFlags(IF.SrcR); 1421 MRI->clearKillFlags(IF.InsR); 1422 } 1423 1424 for (const auto &I : IFMap) { 1425 MachineInstr *DefI = MRI->getVRegDef(I.first); 1426 MRI->replaceRegWith(I.first, RegMap[I.first]); 1427 DefI->eraseFromParent(); 1428 } 1429 1430 return true; 1431 } 1432 1433 bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { 1434 bool Changed = false; 1435 1436 for (auto *DTN : children<MachineDomTreeNode*>(N)) 1437 Changed |= removeDeadCode(DTN); 1438 1439 MachineBasicBlock *B = N->getBlock(); 1440 std::vector<MachineInstr*> Instrs; 1441 for (MachineInstr &MI : llvm::reverse(*B)) 1442 Instrs.push_back(&MI); 1443 1444 for (MachineInstr *MI : Instrs) { 1445 unsigned Opc = MI->getOpcode(); 1446 // Do not touch lifetime markers. This is why the target-independent DCE 1447 // cannot be used. 1448 if (Opc == TargetOpcode::LIFETIME_START || 1449 Opc == TargetOpcode::LIFETIME_END) 1450 continue; 1451 bool Store = false; 1452 if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store)) 1453 continue; 1454 1455 bool AllDead = true; 1456 SmallVector<unsigned,2> Regs; 1457 for (const MachineOperand &MO : MI->operands()) { 1458 if (!MO.isReg() || !MO.isDef()) 1459 continue; 1460 Register R = MO.getReg(); 1461 if (!R.isVirtual() || !MRI->use_nodbg_empty(R)) { 1462 AllDead = false; 1463 break; 1464 } 1465 Regs.push_back(R); 1466 } 1467 if (!AllDead) 1468 continue; 1469 1470 B->erase(MI); 1471 for (unsigned I = 0, N = Regs.size(); I != N; ++I) 1472 MRI->markUsesInDebugValueAsUndef(Regs[I]); 1473 Changed = true; 1474 } 1475 1476 return Changed; 1477 } 1478 1479 bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { 1480 if (skipFunction(MF.getFunction())) 1481 return false; 1482 1483 bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail; 1484 bool Changed = false; 1485 1486 // Verify: one, but not both. 1487 assert(!OptSelectAll0 || !OptSelectHas0); 1488 1489 IFMap.clear(); 1490 BaseOrd.clear(); 1491 CellOrd.clear(); 1492 1493 const auto &ST = MF.getSubtarget<HexagonSubtarget>(); 1494 HII = ST.getInstrInfo(); 1495 HRI = ST.getRegisterInfo(); 1496 MFN = &MF; 1497 MRI = &MF.getRegInfo(); 1498 MDT = &getAnalysis<MachineDominatorTree>(); 1499 1500 // Clean up before any further processing, so that dead code does not 1501 // get used in a newly generated "insert" instruction. Have a custom 1502 // version of DCE that preserves lifetime markers. Without it, merging 1503 // of stack objects can fail to recognize and merge disjoint objects 1504 // leading to unnecessary stack growth. 1505 Changed = removeDeadCode(MDT->getRootNode()); 1506 1507 const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); 1508 BitTracker BTLoc(HE, MF); 1509 BTLoc.trace(isDebug()); 1510 BTLoc.run(); 1511 CellMapShadow MS(BTLoc); 1512 CMS = &MS; 1513 1514 buildOrderingMF(BaseOrd); 1515 buildOrderingBT(BaseOrd, CellOrd); 1516 1517 if (isDebug()) { 1518 dbgs() << "Cell ordering:\n"; 1519 for (const auto &I : CellOrd) { 1520 unsigned VR = I.first, Pos = I.second; 1521 dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n"; 1522 } 1523 } 1524 1525 // Collect candidates for conversion into the insert forms. 1526 MachineBasicBlock *RootB = MDT->getRoot(); 1527 OrderedRegisterList AvailR(CellOrd); 1528 1529 const char *const TGName = "hexinsert"; 1530 const char *const TGDesc = "Generate Insert Instructions"; 1531 1532 { 1533 NamedRegionTimer _T("collection", "collection", TGName, TGDesc, 1534 TimingDetail); 1535 collectInBlock(RootB, AvailR); 1536 // Complete the information gathered in IFMap. 1537 computeRemovableRegisters(); 1538 } 1539 1540 if (isDebug()) { 1541 dbgs() << "Candidates after collection:\n"; 1542 dump_map(); 1543 } 1544 1545 if (IFMap.empty()) 1546 return Changed; 1547 1548 { 1549 NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail); 1550 pruneCandidates(); 1551 } 1552 1553 if (isDebug()) { 1554 dbgs() << "Candidates after pruning:\n"; 1555 dump_map(); 1556 } 1557 1558 if (IFMap.empty()) 1559 return Changed; 1560 1561 { 1562 NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail); 1563 selectCandidates(); 1564 } 1565 1566 if (isDebug()) { 1567 dbgs() << "Candidates after selection:\n"; 1568 dump_map(); 1569 } 1570 1571 // Filter out vregs beyond the cutoff. 1572 if (VRegIndexCutoff.getPosition()) { 1573 unsigned Cutoff = VRegIndexCutoff; 1574 1575 using IterListType = SmallVector<IFMapType::iterator, 16>; 1576 1577 IterListType Out; 1578 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1579 unsigned Idx = Register::virtReg2Index(I->first); 1580 if (Idx >= Cutoff) 1581 Out.push_back(I); 1582 } 1583 for (unsigned i = 0, n = Out.size(); i < n; ++i) 1584 IFMap.erase(Out[i]); 1585 } 1586 if (IFMap.empty()) 1587 return Changed; 1588 1589 { 1590 NamedRegionTimer _T("generation", "generation", TGName, TGDesc, 1591 TimingDetail); 1592 generateInserts(); 1593 } 1594 1595 return true; 1596 } 1597 1598 FunctionPass *llvm::createHexagonGenInsert() { 1599 return new HexagonGenInsert(); 1600 } 1601 1602 //===----------------------------------------------------------------------===// 1603 // Public Constructor Functions 1604 //===----------------------------------------------------------------------===// 1605 1606 INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert", 1607 "Hexagon generate \"insert\" instructions", false, false) 1608 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1609 INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert", 1610 "Hexagon generate \"insert\" instructions", false, false) 1611