1 //===--- HexagonBitSimplify.cpp -------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #define DEBUG_TYPE "hexbit" 11 12 #include "HexagonBitTracker.h" 13 #include "HexagonTargetMachine.h" 14 #include "llvm/CodeGen/MachineDominators.h" 15 #include "llvm/CodeGen/MachineFunctionPass.h" 16 #include "llvm/CodeGen/MachineInstrBuilder.h" 17 #include "llvm/CodeGen/MachineRegisterInfo.h" 18 #include "llvm/CodeGen/Passes.h" 19 #include "llvm/Support/CommandLine.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include "llvm/Target/TargetInstrInfo.h" 23 #include "llvm/Target/TargetMachine.h" 24 25 using namespace llvm; 26 27 static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden, 28 cl::init(true), cl::desc("Preserve subregisters in tied operands")); 29 30 namespace llvm { 31 void initializeHexagonBitSimplifyPass(PassRegistry& Registry); 32 FunctionPass *createHexagonBitSimplify(); 33 } 34 35 namespace { 36 // Set of virtual registers, based on BitVector. 37 struct RegisterSet : private BitVector { 38 RegisterSet() : BitVector() {} 39 explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} 40 RegisterSet(const RegisterSet &RS) : BitVector(RS) {} 41 42 using BitVector::clear; 43 using BitVector::count; 44 45 unsigned find_first() const { 46 int First = BitVector::find_first(); 47 if (First < 0) 48 return 0; 49 return x2v(First); 50 } 51 52 unsigned find_next(unsigned Prev) const { 53 int Next = BitVector::find_next(v2x(Prev)); 54 if (Next < 0) 55 return 0; 56 return x2v(Next); 57 } 58 59 RegisterSet &insert(unsigned R) { 60 unsigned Idx = v2x(R); 61 ensure(Idx); 62 return static_cast<RegisterSet&>(BitVector::set(Idx)); 63 } 64 RegisterSet &remove(unsigned R) { 65 unsigned Idx = v2x(R); 66 if (Idx >= size()) 67 return *this; 68 return static_cast<RegisterSet&>(BitVector::reset(Idx)); 69 } 70 71 RegisterSet &insert(const RegisterSet &Rs) { 72 return static_cast<RegisterSet&>(BitVector::operator|=(Rs)); 73 } 74 RegisterSet &remove(const RegisterSet &Rs) { 75 return static_cast<RegisterSet&>(BitVector::reset(Rs)); 76 } 77 78 reference operator[](unsigned R) { 79 unsigned Idx = v2x(R); 80 ensure(Idx); 81 return BitVector::operator[](Idx); 82 } 83 bool operator[](unsigned R) const { 84 unsigned Idx = v2x(R); 85 assert(Idx < size()); 86 return BitVector::operator[](Idx); 87 } 88 bool has(unsigned R) const { 89 unsigned Idx = v2x(R); 90 if (Idx >= size()) 91 return false; 92 return BitVector::test(Idx); 93 } 94 95 bool empty() const { 96 return !BitVector::any(); 97 } 98 bool includes(const RegisterSet &Rs) const { 99 // A.BitVector::test(B) <=> A-B != {} 100 return !Rs.BitVector::test(*this); 101 } 102 bool intersects(const RegisterSet &Rs) const { 103 return BitVector::anyCommon(Rs); 104 } 105 106 private: 107 void ensure(unsigned Idx) { 108 if (size() <= Idx) 109 resize(std::max(Idx+1, 32U)); 110 } 111 static inline unsigned v2x(unsigned v) { 112 return TargetRegisterInfo::virtReg2Index(v); 113 } 114 static inline unsigned x2v(unsigned x) { 115 return TargetRegisterInfo::index2VirtReg(x); 116 } 117 }; 118 119 120 struct PrintRegSet { 121 PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) 122 : RS(S), TRI(RI) {} 123 friend raw_ostream &operator<< (raw_ostream &OS, 124 const PrintRegSet &P); 125 private: 126 const RegisterSet &RS; 127 const TargetRegisterInfo *TRI; 128 }; 129 130 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) 131 LLVM_ATTRIBUTE_UNUSED; 132 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { 133 OS << '{'; 134 for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) 135 OS << ' ' << PrintReg(R, P.TRI); 136 OS << " }"; 137 return OS; 138 } 139 } 140 141 142 namespace { 143 class Transformation; 144 145 class HexagonBitSimplify : public MachineFunctionPass { 146 public: 147 static char ID; 148 HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) { 149 initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry()); 150 } 151 virtual StringRef getPassName() const { 152 return "Hexagon bit simplification"; 153 } 154 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 155 AU.addRequired<MachineDominatorTree>(); 156 AU.addPreserved<MachineDominatorTree>(); 157 MachineFunctionPass::getAnalysisUsage(AU); 158 } 159 virtual bool runOnMachineFunction(MachineFunction &MF); 160 161 static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs); 162 static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses); 163 static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1, 164 const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W); 165 static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B, 166 uint16_t W); 167 static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B, 168 uint16_t W, uint64_t &U); 169 static bool replaceReg(unsigned OldR, unsigned NewR, 170 MachineRegisterInfo &MRI); 171 static bool getSubregMask(const BitTracker::RegisterRef &RR, 172 unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI); 173 static bool replaceRegWithSub(unsigned OldR, unsigned NewR, 174 unsigned NewSR, MachineRegisterInfo &MRI); 175 static bool replaceSubWithSub(unsigned OldR, unsigned OldSR, 176 unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI); 177 static bool parseRegSequence(const MachineInstr &I, 178 BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH, 179 const MachineRegisterInfo &MRI); 180 181 static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits, 182 uint16_t Begin); 183 static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits, 184 uint16_t Begin, const HexagonInstrInfo &HII); 185 186 static const TargetRegisterClass *getFinalVRegClass( 187 const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI); 188 static bool isTransparentCopy(const BitTracker::RegisterRef &RD, 189 const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI); 190 191 private: 192 MachineDominatorTree *MDT; 193 194 bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs); 195 static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI, 196 unsigned NewSub = Hexagon::NoSubRegister); 197 }; 198 199 char HexagonBitSimplify::ID = 0; 200 typedef HexagonBitSimplify HBS; 201 202 203 // The purpose of this class is to provide a common facility to traverse 204 // the function top-down or bottom-up via the dominator tree, and keep 205 // track of the available registers. 206 class Transformation { 207 public: 208 bool TopDown; 209 Transformation(bool TD) : TopDown(TD) {} 210 virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0; 211 virtual ~Transformation() {} 212 }; 213 } 214 215 INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit", 216 "Hexagon bit simplification", false, false) 217 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 218 INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit", 219 "Hexagon bit simplification", false, false) 220 221 222 bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, 223 RegisterSet &AVs) { 224 MachineDomTreeNode *N = MDT->getNode(&B); 225 typedef GraphTraits<MachineDomTreeNode*> GTN; 226 bool Changed = false; 227 228 if (T.TopDown) 229 Changed = T.processBlock(B, AVs); 230 231 RegisterSet Defs; 232 for (auto &I : B) 233 getInstrDefs(I, Defs); 234 RegisterSet NewAVs = AVs; 235 NewAVs.insert(Defs); 236 237 for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) { 238 MachineBasicBlock *SB = (*I)->getBlock(); 239 Changed |= visitBlock(*SB, T, NewAVs); 240 } 241 if (!T.TopDown) 242 Changed |= T.processBlock(B, AVs); 243 244 return Changed; 245 } 246 247 // 248 // Utility functions: 249 // 250 void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI, 251 RegisterSet &Defs) { 252 for (auto &Op : MI.operands()) { 253 if (!Op.isReg() || !Op.isDef()) 254 continue; 255 unsigned R = Op.getReg(); 256 if (!TargetRegisterInfo::isVirtualRegister(R)) 257 continue; 258 Defs.insert(R); 259 } 260 } 261 262 void HexagonBitSimplify::getInstrUses(const MachineInstr &MI, 263 RegisterSet &Uses) { 264 for (auto &Op : MI.operands()) { 265 if (!Op.isReg() || !Op.isUse()) 266 continue; 267 unsigned R = Op.getReg(); 268 if (!TargetRegisterInfo::isVirtualRegister(R)) 269 continue; 270 Uses.insert(R); 271 } 272 } 273 274 // Check if all the bits in range [B, E) in both cells are equal. 275 bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1, 276 uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2, 277 uint16_t W) { 278 for (uint16_t i = 0; i < W; ++i) { 279 // If RC1[i] is "bottom", it cannot be proven equal to RC2[i]. 280 if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0) 281 return false; 282 // Same for RC2[i]. 283 if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0) 284 return false; 285 if (RC1[B1+i] != RC2[B2+i]) 286 return false; 287 } 288 return true; 289 } 290 291 bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC, 292 uint16_t B, uint16_t W) { 293 assert(B < RC.width() && B+W <= RC.width()); 294 for (uint16_t i = B; i < B+W; ++i) 295 if (!RC[i].is(0)) 296 return false; 297 return true; 298 } 299 300 301 bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, 302 uint16_t B, uint16_t W, uint64_t &U) { 303 assert(B < RC.width() && B+W <= RC.width()); 304 int64_t T = 0; 305 for (uint16_t i = B+W; i > B; --i) { 306 const BitTracker::BitValue &BV = RC[i-1]; 307 T <<= 1; 308 if (BV.is(1)) 309 T |= 1; 310 else if (!BV.is(0)) 311 return false; 312 } 313 U = T; 314 return true; 315 } 316 317 318 bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR, 319 MachineRegisterInfo &MRI) { 320 if (!TargetRegisterInfo::isVirtualRegister(OldR) || 321 !TargetRegisterInfo::isVirtualRegister(NewR)) 322 return false; 323 auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); 324 decltype(End) NextI; 325 for (auto I = Begin; I != End; I = NextI) { 326 NextI = std::next(I); 327 I->setReg(NewR); 328 } 329 return Begin != End; 330 } 331 332 333 bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR, 334 unsigned NewSR, MachineRegisterInfo &MRI) { 335 if (!TargetRegisterInfo::isVirtualRegister(OldR) || 336 !TargetRegisterInfo::isVirtualRegister(NewR)) 337 return false; 338 if (hasTiedUse(OldR, MRI, NewSR)) 339 return false; 340 auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); 341 decltype(End) NextI; 342 for (auto I = Begin; I != End; I = NextI) { 343 NextI = std::next(I); 344 I->setReg(NewR); 345 I->setSubReg(NewSR); 346 } 347 return Begin != End; 348 } 349 350 351 bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR, 352 unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) { 353 if (!TargetRegisterInfo::isVirtualRegister(OldR) || 354 !TargetRegisterInfo::isVirtualRegister(NewR)) 355 return false; 356 if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR)) 357 return false; 358 auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); 359 decltype(End) NextI; 360 for (auto I = Begin; I != End; I = NextI) { 361 NextI = std::next(I); 362 if (I->getSubReg() != OldSR) 363 continue; 364 I->setReg(NewR); 365 I->setSubReg(NewSR); 366 } 367 return Begin != End; 368 } 369 370 371 // For a register ref (pair Reg:Sub), set Begin to the position of the LSB 372 // of Sub in Reg, and set Width to the size of Sub in bits. Return true, 373 // if this succeeded, otherwise return false. 374 bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, 375 unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) { 376 const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg); 377 if (RR.Sub == 0) { 378 Begin = 0; 379 Width = RC->getSize()*8; 380 return true; 381 } 382 383 Begin = 0; 384 385 switch (RC->getID()) { 386 case Hexagon::DoubleRegsRegClassID: 387 case Hexagon::VecDblRegsRegClassID: 388 case Hexagon::VecDblRegs128BRegClassID: 389 Width = RC->getSize()*8 / 2; 390 if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi) 391 Begin = Width; 392 break; 393 default: 394 return false; 395 } 396 return true; 397 } 398 399 400 // For a REG_SEQUENCE, set SL to the low subregister and SH to the high 401 // subregister. 402 bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I, 403 BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH, 404 const MachineRegisterInfo &MRI) { 405 assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE); 406 unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm(); 407 auto *DstRC = MRI.getRegClass(I.getOperand(0).getReg()); 408 auto &HRI = static_cast<const HexagonRegisterInfo&>( 409 *MRI.getTargetRegisterInfo()); 410 unsigned SubLo = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_lo); 411 unsigned SubHi = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_hi); 412 assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo)); 413 if (Sub1 == SubLo && Sub2 == SubHi) { 414 SL = I.getOperand(1); 415 SH = I.getOperand(3); 416 return true; 417 } 418 if (Sub1 == SubHi && Sub2 == SubLo) { 419 SH = I.getOperand(1); 420 SL = I.getOperand(3); 421 return true; 422 } 423 return false; 424 } 425 426 427 // All stores (except 64-bit stores) take a 32-bit register as the source 428 // of the value to be stored. If the instruction stores into a location 429 // that is shorter than 32 bits, some bits of the source register are not 430 // used. For each store instruction, calculate the set of used bits in 431 // the source register, and set appropriate bits in Bits. Return true if 432 // the bits are calculated, false otherwise. 433 bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits, 434 uint16_t Begin) { 435 using namespace Hexagon; 436 437 switch (Opc) { 438 // Store byte 439 case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32 440 case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new 441 case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32 442 case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32 443 case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32 444 case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32 445 case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new 446 case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new 447 case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new 448 case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new 449 case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32 450 case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new 451 case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32 452 case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32 453 case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32 454 case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32 455 case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new 456 case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new 457 case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new 458 case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new 459 case S4_storerb_ap: // memb(Re32=#U6)=Rt32 460 case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new 461 case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32 462 case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new 463 case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32 464 case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new 465 case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32 466 case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new 467 case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32 468 case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new 469 case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32 470 case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new 471 case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32 472 case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new 473 case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32 474 case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32 475 case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 476 case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 477 case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new 478 case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new 479 case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new 480 case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new 481 case S2_storerbgp: // memb(gp+#u16:0)=Rt32 482 case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new 483 case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32 484 case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32 485 case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32 486 case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32 487 case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new 488 case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new 489 case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new 490 case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new 491 Bits.set(Begin, Begin+8); 492 return true; 493 494 // Store low half 495 case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32 496 case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new 497 case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32 498 case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32 499 case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32 500 case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32 501 case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new 502 case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new 503 case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new 504 case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new 505 case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32 506 case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new 507 case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32 508 case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32 509 case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32 510 case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32 511 case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new 512 case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new 513 case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new 514 case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new 515 case S4_storerh_ap: // memh(Re32=#U6)=Rt32 516 case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new 517 case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32 518 case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new 519 case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32 520 case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new 521 case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32 522 case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new 523 case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32 524 case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new 525 case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32 526 case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new 527 case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32 528 case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32 529 case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32 530 case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 531 case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 532 case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new 533 case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new 534 case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new 535 case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new 536 case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new 537 case S2_storerhgp: // memh(gp+#u16:1)=Rt32 538 case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new 539 case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32 540 case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32 541 case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32 542 case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32 543 case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new 544 case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new 545 case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new 546 case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new 547 Bits.set(Begin, Begin+16); 548 return true; 549 550 // Store high half 551 case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32 552 case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32 553 case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32 554 case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32 555 case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32 556 case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32 557 case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32 558 case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32 559 case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32 560 case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32 561 case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32 562 case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32 563 case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32 564 case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32 565 case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32 566 case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32 567 case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32 568 case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 569 case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 570 case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 571 case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 572 case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32 573 case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32 574 case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32 575 case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32 576 case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32 577 Bits.set(Begin+16, Begin+32); 578 return true; 579 } 580 581 return false; 582 } 583 584 585 // For an instruction with opcode Opc, calculate the set of bits that it 586 // uses in a register in operand OpN. This only calculates the set of used 587 // bits for cases where it does not depend on any operands (as is the case 588 // in shifts, for example). For concrete instructions from a program, the 589 // operand may be a subregister of a larger register, while Bits would 590 // correspond to the larger register in its entirety. Because of that, 591 // the parameter Begin can be used to indicate which bit of Bits should be 592 // considered the LSB of of the operand. 593 bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, 594 BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) { 595 using namespace Hexagon; 596 597 const MCInstrDesc &D = HII.get(Opc); 598 if (D.mayStore()) { 599 if (OpN == D.getNumOperands()-1) 600 return getUsedBitsInStore(Opc, Bits, Begin); 601 return false; 602 } 603 604 switch (Opc) { 605 // One register source. Used bits: R1[0-7]. 606 case A2_sxtb: 607 case A2_zxtb: 608 case A4_cmpbeqi: 609 case A4_cmpbgti: 610 case A4_cmpbgtui: 611 if (OpN == 1) { 612 Bits.set(Begin, Begin+8); 613 return true; 614 } 615 break; 616 617 // One register source. Used bits: R1[0-15]. 618 case A2_aslh: 619 case A2_sxth: 620 case A2_zxth: 621 case A4_cmpheqi: 622 case A4_cmphgti: 623 case A4_cmphgtui: 624 if (OpN == 1) { 625 Bits.set(Begin, Begin+16); 626 return true; 627 } 628 break; 629 630 // One register source. Used bits: R1[16-31]. 631 case A2_asrh: 632 if (OpN == 1) { 633 Bits.set(Begin+16, Begin+32); 634 return true; 635 } 636 break; 637 638 // Two register sources. Used bits: R1[0-7], R2[0-7]. 639 case A4_cmpbeq: 640 case A4_cmpbgt: 641 case A4_cmpbgtu: 642 if (OpN == 1) { 643 Bits.set(Begin, Begin+8); 644 return true; 645 } 646 break; 647 648 // Two register sources. Used bits: R1[0-15], R2[0-15]. 649 case A4_cmpheq: 650 case A4_cmphgt: 651 case A4_cmphgtu: 652 case A2_addh_h16_ll: 653 case A2_addh_h16_sat_ll: 654 case A2_addh_l16_ll: 655 case A2_addh_l16_sat_ll: 656 case A2_combine_ll: 657 case A2_subh_h16_ll: 658 case A2_subh_h16_sat_ll: 659 case A2_subh_l16_ll: 660 case A2_subh_l16_sat_ll: 661 case M2_mpy_acc_ll_s0: 662 case M2_mpy_acc_ll_s1: 663 case M2_mpy_acc_sat_ll_s0: 664 case M2_mpy_acc_sat_ll_s1: 665 case M2_mpy_ll_s0: 666 case M2_mpy_ll_s1: 667 case M2_mpy_nac_ll_s0: 668 case M2_mpy_nac_ll_s1: 669 case M2_mpy_nac_sat_ll_s0: 670 case M2_mpy_nac_sat_ll_s1: 671 case M2_mpy_rnd_ll_s0: 672 case M2_mpy_rnd_ll_s1: 673 case M2_mpy_sat_ll_s0: 674 case M2_mpy_sat_ll_s1: 675 case M2_mpy_sat_rnd_ll_s0: 676 case M2_mpy_sat_rnd_ll_s1: 677 case M2_mpyd_acc_ll_s0: 678 case M2_mpyd_acc_ll_s1: 679 case M2_mpyd_ll_s0: 680 case M2_mpyd_ll_s1: 681 case M2_mpyd_nac_ll_s0: 682 case M2_mpyd_nac_ll_s1: 683 case M2_mpyd_rnd_ll_s0: 684 case M2_mpyd_rnd_ll_s1: 685 case M2_mpyu_acc_ll_s0: 686 case M2_mpyu_acc_ll_s1: 687 case M2_mpyu_ll_s0: 688 case M2_mpyu_ll_s1: 689 case M2_mpyu_nac_ll_s0: 690 case M2_mpyu_nac_ll_s1: 691 case M2_mpyud_acc_ll_s0: 692 case M2_mpyud_acc_ll_s1: 693 case M2_mpyud_ll_s0: 694 case M2_mpyud_ll_s1: 695 case M2_mpyud_nac_ll_s0: 696 case M2_mpyud_nac_ll_s1: 697 if (OpN == 1 || OpN == 2) { 698 Bits.set(Begin, Begin+16); 699 return true; 700 } 701 break; 702 703 // Two register sources. Used bits: R1[0-15], R2[16-31]. 704 case A2_addh_h16_lh: 705 case A2_addh_h16_sat_lh: 706 case A2_combine_lh: 707 case A2_subh_h16_lh: 708 case A2_subh_h16_sat_lh: 709 case M2_mpy_acc_lh_s0: 710 case M2_mpy_acc_lh_s1: 711 case M2_mpy_acc_sat_lh_s0: 712 case M2_mpy_acc_sat_lh_s1: 713 case M2_mpy_lh_s0: 714 case M2_mpy_lh_s1: 715 case M2_mpy_nac_lh_s0: 716 case M2_mpy_nac_lh_s1: 717 case M2_mpy_nac_sat_lh_s0: 718 case M2_mpy_nac_sat_lh_s1: 719 case M2_mpy_rnd_lh_s0: 720 case M2_mpy_rnd_lh_s1: 721 case M2_mpy_sat_lh_s0: 722 case M2_mpy_sat_lh_s1: 723 case M2_mpy_sat_rnd_lh_s0: 724 case M2_mpy_sat_rnd_lh_s1: 725 case M2_mpyd_acc_lh_s0: 726 case M2_mpyd_acc_lh_s1: 727 case M2_mpyd_lh_s0: 728 case M2_mpyd_lh_s1: 729 case M2_mpyd_nac_lh_s0: 730 case M2_mpyd_nac_lh_s1: 731 case M2_mpyd_rnd_lh_s0: 732 case M2_mpyd_rnd_lh_s1: 733 case M2_mpyu_acc_lh_s0: 734 case M2_mpyu_acc_lh_s1: 735 case M2_mpyu_lh_s0: 736 case M2_mpyu_lh_s1: 737 case M2_mpyu_nac_lh_s0: 738 case M2_mpyu_nac_lh_s1: 739 case M2_mpyud_acc_lh_s0: 740 case M2_mpyud_acc_lh_s1: 741 case M2_mpyud_lh_s0: 742 case M2_mpyud_lh_s1: 743 case M2_mpyud_nac_lh_s0: 744 case M2_mpyud_nac_lh_s1: 745 // These four are actually LH. 746 case A2_addh_l16_hl: 747 case A2_addh_l16_sat_hl: 748 case A2_subh_l16_hl: 749 case A2_subh_l16_sat_hl: 750 if (OpN == 1) { 751 Bits.set(Begin, Begin+16); 752 return true; 753 } 754 if (OpN == 2) { 755 Bits.set(Begin+16, Begin+32); 756 return true; 757 } 758 break; 759 760 // Two register sources, used bits: R1[16-31], R2[0-15]. 761 case A2_addh_h16_hl: 762 case A2_addh_h16_sat_hl: 763 case A2_combine_hl: 764 case A2_subh_h16_hl: 765 case A2_subh_h16_sat_hl: 766 case M2_mpy_acc_hl_s0: 767 case M2_mpy_acc_hl_s1: 768 case M2_mpy_acc_sat_hl_s0: 769 case M2_mpy_acc_sat_hl_s1: 770 case M2_mpy_hl_s0: 771 case M2_mpy_hl_s1: 772 case M2_mpy_nac_hl_s0: 773 case M2_mpy_nac_hl_s1: 774 case M2_mpy_nac_sat_hl_s0: 775 case M2_mpy_nac_sat_hl_s1: 776 case M2_mpy_rnd_hl_s0: 777 case M2_mpy_rnd_hl_s1: 778 case M2_mpy_sat_hl_s0: 779 case M2_mpy_sat_hl_s1: 780 case M2_mpy_sat_rnd_hl_s0: 781 case M2_mpy_sat_rnd_hl_s1: 782 case M2_mpyd_acc_hl_s0: 783 case M2_mpyd_acc_hl_s1: 784 case M2_mpyd_hl_s0: 785 case M2_mpyd_hl_s1: 786 case M2_mpyd_nac_hl_s0: 787 case M2_mpyd_nac_hl_s1: 788 case M2_mpyd_rnd_hl_s0: 789 case M2_mpyd_rnd_hl_s1: 790 case M2_mpyu_acc_hl_s0: 791 case M2_mpyu_acc_hl_s1: 792 case M2_mpyu_hl_s0: 793 case M2_mpyu_hl_s1: 794 case M2_mpyu_nac_hl_s0: 795 case M2_mpyu_nac_hl_s1: 796 case M2_mpyud_acc_hl_s0: 797 case M2_mpyud_acc_hl_s1: 798 case M2_mpyud_hl_s0: 799 case M2_mpyud_hl_s1: 800 case M2_mpyud_nac_hl_s0: 801 case M2_mpyud_nac_hl_s1: 802 if (OpN == 1) { 803 Bits.set(Begin+16, Begin+32); 804 return true; 805 } 806 if (OpN == 2) { 807 Bits.set(Begin, Begin+16); 808 return true; 809 } 810 break; 811 812 // Two register sources, used bits: R1[16-31], R2[16-31]. 813 case A2_addh_h16_hh: 814 case A2_addh_h16_sat_hh: 815 case A2_combine_hh: 816 case A2_subh_h16_hh: 817 case A2_subh_h16_sat_hh: 818 case M2_mpy_acc_hh_s0: 819 case M2_mpy_acc_hh_s1: 820 case M2_mpy_acc_sat_hh_s0: 821 case M2_mpy_acc_sat_hh_s1: 822 case M2_mpy_hh_s0: 823 case M2_mpy_hh_s1: 824 case M2_mpy_nac_hh_s0: 825 case M2_mpy_nac_hh_s1: 826 case M2_mpy_nac_sat_hh_s0: 827 case M2_mpy_nac_sat_hh_s1: 828 case M2_mpy_rnd_hh_s0: 829 case M2_mpy_rnd_hh_s1: 830 case M2_mpy_sat_hh_s0: 831 case M2_mpy_sat_hh_s1: 832 case M2_mpy_sat_rnd_hh_s0: 833 case M2_mpy_sat_rnd_hh_s1: 834 case M2_mpyd_acc_hh_s0: 835 case M2_mpyd_acc_hh_s1: 836 case M2_mpyd_hh_s0: 837 case M2_mpyd_hh_s1: 838 case M2_mpyd_nac_hh_s0: 839 case M2_mpyd_nac_hh_s1: 840 case M2_mpyd_rnd_hh_s0: 841 case M2_mpyd_rnd_hh_s1: 842 case M2_mpyu_acc_hh_s0: 843 case M2_mpyu_acc_hh_s1: 844 case M2_mpyu_hh_s0: 845 case M2_mpyu_hh_s1: 846 case M2_mpyu_nac_hh_s0: 847 case M2_mpyu_nac_hh_s1: 848 case M2_mpyud_acc_hh_s0: 849 case M2_mpyud_acc_hh_s1: 850 case M2_mpyud_hh_s0: 851 case M2_mpyud_hh_s1: 852 case M2_mpyud_nac_hh_s0: 853 case M2_mpyud_nac_hh_s1: 854 if (OpN == 1 || OpN == 2) { 855 Bits.set(Begin+16, Begin+32); 856 return true; 857 } 858 break; 859 } 860 861 return false; 862 } 863 864 865 // Calculate the register class that matches Reg:Sub. For example, if 866 // vreg1 is a double register, then vreg1:isub_hi would match the "int" 867 // register class. 868 const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( 869 const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { 870 if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) 871 return nullptr; 872 auto *RC = MRI.getRegClass(RR.Reg); 873 if (RR.Sub == 0) 874 return RC; 875 auto &HRI = static_cast<const HexagonRegisterInfo&>( 876 *MRI.getTargetRegisterInfo()); 877 878 auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void { 879 assert(Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo) || 880 Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi)); 881 }; 882 883 switch (RC->getID()) { 884 case Hexagon::DoubleRegsRegClassID: 885 VerifySR(RC, RR.Sub); 886 return &Hexagon::IntRegsRegClass; 887 case Hexagon::VecDblRegsRegClassID: 888 VerifySR(RC, RR.Sub); 889 return &Hexagon::VectorRegsRegClass; 890 case Hexagon::VecDblRegs128BRegClassID: 891 VerifySR(RC, RR.Sub); 892 return &Hexagon::VectorRegs128BRegClass; 893 } 894 return nullptr; 895 } 896 897 898 // Check if RD could be replaced with RS at any possible use of RD. 899 // For example a predicate register cannot be replaced with a integer 900 // register, but a 64-bit register with a subregister can be replaced 901 // with a 32-bit register. 902 bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD, 903 const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) { 904 if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) || 905 !TargetRegisterInfo::isVirtualRegister(RS.Reg)) 906 return false; 907 // Return false if one (or both) classes are nullptr. 908 auto *DRC = getFinalVRegClass(RD, MRI); 909 if (!DRC) 910 return false; 911 912 return DRC == getFinalVRegClass(RS, MRI); 913 } 914 915 916 bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI, 917 unsigned NewSub) { 918 if (!PreserveTiedOps) 919 return false; 920 return any_of(MRI.use_operands(Reg), 921 [NewSub] (const MachineOperand &Op) -> bool { 922 return Op.getSubReg() != NewSub && Op.isTied(); 923 }); 924 } 925 926 // 927 // Dead code elimination 928 // 929 namespace { 930 class DeadCodeElimination { 931 public: 932 DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt) 933 : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()), 934 MDT(mdt), MRI(mf.getRegInfo()) {} 935 936 bool run() { 937 return runOnNode(MDT.getRootNode()); 938 } 939 940 private: 941 bool isDead(unsigned R) const; 942 bool runOnNode(MachineDomTreeNode *N); 943 944 MachineFunction &MF; 945 const HexagonInstrInfo &HII; 946 MachineDominatorTree &MDT; 947 MachineRegisterInfo &MRI; 948 }; 949 } 950 951 952 bool DeadCodeElimination::isDead(unsigned R) const { 953 for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) { 954 MachineInstr *UseI = I->getParent(); 955 if (UseI->isDebugValue()) 956 continue; 957 if (UseI->isPHI()) { 958 assert(!UseI->getOperand(0).getSubReg()); 959 unsigned DR = UseI->getOperand(0).getReg(); 960 if (DR == R) 961 continue; 962 } 963 return false; 964 } 965 return true; 966 } 967 968 969 bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { 970 bool Changed = false; 971 typedef GraphTraits<MachineDomTreeNode*> GTN; 972 for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) 973 Changed |= runOnNode(*I); 974 975 MachineBasicBlock *B = N->getBlock(); 976 std::vector<MachineInstr*> Instrs; 977 for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) 978 Instrs.push_back(&*I); 979 980 for (auto MI : Instrs) { 981 unsigned Opc = MI->getOpcode(); 982 // Do not touch lifetime markers. This is why the target-independent DCE 983 // cannot be used. 984 if (Opc == TargetOpcode::LIFETIME_START || 985 Opc == TargetOpcode::LIFETIME_END) 986 continue; 987 bool Store = false; 988 if (MI->isInlineAsm()) 989 continue; 990 // Delete PHIs if possible. 991 if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store)) 992 continue; 993 994 bool AllDead = true; 995 SmallVector<unsigned,2> Regs; 996 for (auto &Op : MI->operands()) { 997 if (!Op.isReg() || !Op.isDef()) 998 continue; 999 unsigned R = Op.getReg(); 1000 if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) { 1001 AllDead = false; 1002 break; 1003 } 1004 Regs.push_back(R); 1005 } 1006 if (!AllDead) 1007 continue; 1008 1009 B->erase(MI); 1010 for (unsigned i = 0, n = Regs.size(); i != n; ++i) 1011 MRI.markUsesInDebugValueAsUndef(Regs[i]); 1012 Changed = true; 1013 } 1014 1015 return Changed; 1016 } 1017 1018 1019 // 1020 // Eliminate redundant instructions 1021 // 1022 // This transformation will identify instructions where the output register 1023 // is the same as one of its input registers. This only works on instructions 1024 // that define a single register (unlike post-increment loads, for example). 1025 // The equality check is actually more detailed: the code calculates which 1026 // bits of the output are used, and only compares these bits with the input 1027 // registers. 1028 // If the output matches an input, the instruction is replaced with COPY. 1029 // The copies will be removed by another transformation. 1030 namespace { 1031 class RedundantInstrElimination : public Transformation { 1032 public: 1033 RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, 1034 MachineRegisterInfo &mri) 1035 : Transformation(true), HII(hii), MRI(mri), BT(bt) {} 1036 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1037 private: 1038 bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN, 1039 unsigned &LostB, unsigned &LostE); 1040 bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN, 1041 unsigned &LostB, unsigned &LostE); 1042 bool computeUsedBits(unsigned Reg, BitVector &Bits); 1043 bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits, 1044 uint16_t Begin); 1045 bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); 1046 1047 const HexagonInstrInfo &HII; 1048 MachineRegisterInfo &MRI; 1049 BitTracker &BT; 1050 }; 1051 } 1052 1053 1054 // Check if the instruction is a lossy shift left, where the input being 1055 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range 1056 // of bit indices that are lost. 1057 bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI, 1058 unsigned OpN, unsigned &LostB, unsigned &LostE) { 1059 using namespace Hexagon; 1060 unsigned Opc = MI.getOpcode(); 1061 unsigned ImN, RegN, Width; 1062 switch (Opc) { 1063 case S2_asl_i_p: 1064 ImN = 2; 1065 RegN = 1; 1066 Width = 64; 1067 break; 1068 case S2_asl_i_p_acc: 1069 case S2_asl_i_p_and: 1070 case S2_asl_i_p_nac: 1071 case S2_asl_i_p_or: 1072 case S2_asl_i_p_xacc: 1073 ImN = 3; 1074 RegN = 2; 1075 Width = 64; 1076 break; 1077 case S2_asl_i_r: 1078 ImN = 2; 1079 RegN = 1; 1080 Width = 32; 1081 break; 1082 case S2_addasl_rrri: 1083 case S4_andi_asl_ri: 1084 case S4_ori_asl_ri: 1085 case S4_addi_asl_ri: 1086 case S4_subi_asl_ri: 1087 case S2_asl_i_r_acc: 1088 case S2_asl_i_r_and: 1089 case S2_asl_i_r_nac: 1090 case S2_asl_i_r_or: 1091 case S2_asl_i_r_sat: 1092 case S2_asl_i_r_xacc: 1093 ImN = 3; 1094 RegN = 2; 1095 Width = 32; 1096 break; 1097 default: 1098 return false; 1099 } 1100 1101 if (RegN != OpN) 1102 return false; 1103 1104 assert(MI.getOperand(ImN).isImm()); 1105 unsigned S = MI.getOperand(ImN).getImm(); 1106 if (S == 0) 1107 return false; 1108 LostB = Width-S; 1109 LostE = Width; 1110 return true; 1111 } 1112 1113 1114 // Check if the instruction is a lossy shift right, where the input being 1115 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range 1116 // of bit indices that are lost. 1117 bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI, 1118 unsigned OpN, unsigned &LostB, unsigned &LostE) { 1119 using namespace Hexagon; 1120 unsigned Opc = MI.getOpcode(); 1121 unsigned ImN, RegN; 1122 switch (Opc) { 1123 case S2_asr_i_p: 1124 case S2_lsr_i_p: 1125 ImN = 2; 1126 RegN = 1; 1127 break; 1128 case S2_asr_i_p_acc: 1129 case S2_asr_i_p_and: 1130 case S2_asr_i_p_nac: 1131 case S2_asr_i_p_or: 1132 case S2_lsr_i_p_acc: 1133 case S2_lsr_i_p_and: 1134 case S2_lsr_i_p_nac: 1135 case S2_lsr_i_p_or: 1136 case S2_lsr_i_p_xacc: 1137 ImN = 3; 1138 RegN = 2; 1139 break; 1140 case S2_asr_i_r: 1141 case S2_lsr_i_r: 1142 ImN = 2; 1143 RegN = 1; 1144 break; 1145 case S4_andi_lsr_ri: 1146 case S4_ori_lsr_ri: 1147 case S4_addi_lsr_ri: 1148 case S4_subi_lsr_ri: 1149 case S2_asr_i_r_acc: 1150 case S2_asr_i_r_and: 1151 case S2_asr_i_r_nac: 1152 case S2_asr_i_r_or: 1153 case S2_lsr_i_r_acc: 1154 case S2_lsr_i_r_and: 1155 case S2_lsr_i_r_nac: 1156 case S2_lsr_i_r_or: 1157 case S2_lsr_i_r_xacc: 1158 ImN = 3; 1159 RegN = 2; 1160 break; 1161 1162 default: 1163 return false; 1164 } 1165 1166 if (RegN != OpN) 1167 return false; 1168 1169 assert(MI.getOperand(ImN).isImm()); 1170 unsigned S = MI.getOperand(ImN).getImm(); 1171 LostB = 0; 1172 LostE = S; 1173 return true; 1174 } 1175 1176 1177 // Calculate the bit vector that corresponds to the used bits of register Reg. 1178 // The vector Bits has the same size, as the size of Reg in bits. If the cal- 1179 // culation fails (i.e. the used bits are unknown), it returns false. Other- 1180 // wise, it returns true and sets the corresponding bits in Bits. 1181 bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { 1182 BitVector Used(Bits.size()); 1183 RegisterSet Visited; 1184 std::vector<unsigned> Pending; 1185 Pending.push_back(Reg); 1186 1187 for (unsigned i = 0; i < Pending.size(); ++i) { 1188 unsigned R = Pending[i]; 1189 if (Visited.has(R)) 1190 continue; 1191 Visited.insert(R); 1192 for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) { 1193 BitTracker::RegisterRef UR = *I; 1194 unsigned B, W; 1195 if (!HBS::getSubregMask(UR, B, W, MRI)) 1196 return false; 1197 MachineInstr &UseI = *I->getParent(); 1198 if (UseI.isPHI() || UseI.isCopy()) { 1199 unsigned DefR = UseI.getOperand(0).getReg(); 1200 if (!TargetRegisterInfo::isVirtualRegister(DefR)) 1201 return false; 1202 Pending.push_back(DefR); 1203 } else { 1204 if (!computeUsedBits(UseI, I.getOperandNo(), Used, B)) 1205 return false; 1206 } 1207 } 1208 } 1209 Bits |= Used; 1210 return true; 1211 } 1212 1213 1214 // Calculate the bits used by instruction MI in a register in operand OpN. 1215 // Return true/false if the calculation succeeds/fails. If is succeeds, set 1216 // used bits in Bits. This function does not reset any bits in Bits, so 1217 // subsequent calls over different instructions will result in the union 1218 // of the used bits in all these instructions. 1219 // The register in question may be used with a sub-register, whereas Bits 1220 // holds the bits for the entire register. To keep track of that, the 1221 // argument Begin indicates where in Bits is the lowest-significant bit 1222 // of the register used in operand OpN. For example, in instruction: 1223 // vreg1 = S2_lsr_i_r vreg2:isub_hi, 10 1224 // the operand 1 is a 32-bit register, which happens to be a subregister 1225 // of the 64-bit register vreg2, and that subregister starts at position 32. 1226 // In this case Begin=32, since Bits[32] would be the lowest-significant bit 1227 // of vreg2:isub_hi. 1228 bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, 1229 unsigned OpN, BitVector &Bits, uint16_t Begin) { 1230 unsigned Opc = MI.getOpcode(); 1231 BitVector T(Bits.size()); 1232 bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII); 1233 // Even if we don't have bits yet, we could still provide some information 1234 // if the instruction is a lossy shift: the lost bits will be marked as 1235 // not used. 1236 unsigned LB, LE; 1237 if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) { 1238 assert(MI.getOperand(OpN).isReg()); 1239 BitTracker::RegisterRef RR = MI.getOperand(OpN); 1240 const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); 1241 uint16_t Width = RC->getSize()*8; 1242 1243 if (!GotBits) 1244 T.set(Begin, Begin+Width); 1245 assert(LB <= LE && LB < Width && LE <= Width); 1246 T.reset(Begin+LB, Begin+LE); 1247 GotBits = true; 1248 } 1249 if (GotBits) 1250 Bits |= T; 1251 return GotBits; 1252 } 1253 1254 1255 // Calculates the used bits in RD ("defined register"), and checks if these 1256 // bits in RS ("used register") and RD are identical. 1257 bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD, 1258 BitTracker::RegisterRef RS) { 1259 const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); 1260 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); 1261 1262 unsigned DB, DW; 1263 if (!HBS::getSubregMask(RD, DB, DW, MRI)) 1264 return false; 1265 unsigned SB, SW; 1266 if (!HBS::getSubregMask(RS, SB, SW, MRI)) 1267 return false; 1268 if (SW != DW) 1269 return false; 1270 1271 BitVector Used(DC.width()); 1272 if (!computeUsedBits(RD.Reg, Used)) 1273 return false; 1274 1275 for (unsigned i = 0; i != DW; ++i) 1276 if (Used[i+DB] && DC[DB+i] != SC[SB+i]) 1277 return false; 1278 return true; 1279 } 1280 1281 1282 bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, 1283 const RegisterSet&) { 1284 if (!BT.reached(&B)) 1285 return false; 1286 bool Changed = false; 1287 1288 for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) { 1289 NextI = std::next(I); 1290 MachineInstr *MI = &*I; 1291 1292 if (MI->getOpcode() == TargetOpcode::COPY) 1293 continue; 1294 if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) 1295 continue; 1296 unsigned NumD = MI->getDesc().getNumDefs(); 1297 if (NumD != 1) 1298 continue; 1299 1300 BitTracker::RegisterRef RD = MI->getOperand(0); 1301 if (!BT.has(RD.Reg)) 1302 continue; 1303 const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); 1304 auto At = MI->isPHI() ? B.getFirstNonPHI() 1305 : MachineBasicBlock::iterator(MI); 1306 1307 // Find a source operand that is equal to the result. 1308 for (auto &Op : MI->uses()) { 1309 if (!Op.isReg()) 1310 continue; 1311 BitTracker::RegisterRef RS = Op; 1312 if (!BT.has(RS.Reg)) 1313 continue; 1314 if (!HBS::isTransparentCopy(RD, RS, MRI)) 1315 continue; 1316 1317 unsigned BN, BW; 1318 if (!HBS::getSubregMask(RS, BN, BW, MRI)) 1319 continue; 1320 1321 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); 1322 if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW)) 1323 continue; 1324 1325 // If found, replace the instruction with a COPY. 1326 const DebugLoc &DL = MI->getDebugLoc(); 1327 const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); 1328 unsigned NewR = MRI.createVirtualRegister(FRC); 1329 MachineInstr *CopyI = 1330 BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) 1331 .addReg(RS.Reg, 0, RS.Sub); 1332 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 1333 // This pass can create copies between registers that don't have the 1334 // exact same values. Updating the tracker has to involve updating 1335 // all dependent cells. Example: 1336 // vreg1 = inst vreg2 ; vreg1 != vreg2, but used bits are equal 1337 // 1338 // vreg3 = copy vreg2 ; <- inserted 1339 // ... = vreg3 ; <- replaced from vreg2 1340 // Indirectly, we can create a "copy" between vreg1 and vreg2 even 1341 // though their exact values do not match. 1342 BT.visit(*CopyI); 1343 Changed = true; 1344 break; 1345 } 1346 } 1347 1348 return Changed; 1349 } 1350 1351 1352 // 1353 // Const generation 1354 // 1355 // Recognize instructions that produce constant values known at compile-time. 1356 // Replace them with register definitions that load these constants directly. 1357 namespace { 1358 class ConstGeneration : public Transformation { 1359 public: 1360 ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii, 1361 MachineRegisterInfo &mri) 1362 : Transformation(true), HII(hii), MRI(mri), BT(bt) {} 1363 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1364 static bool isTfrConst(const MachineInstr &MI); 1365 private: 1366 unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C, 1367 MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL); 1368 1369 const HexagonInstrInfo &HII; 1370 MachineRegisterInfo &MRI; 1371 BitTracker &BT; 1372 }; 1373 } 1374 1375 bool ConstGeneration::isTfrConst(const MachineInstr &MI) { 1376 unsigned Opc = MI.getOpcode(); 1377 switch (Opc) { 1378 case Hexagon::A2_combineii: 1379 case Hexagon::A4_combineii: 1380 case Hexagon::A2_tfrsi: 1381 case Hexagon::A2_tfrpi: 1382 case Hexagon::PS_true: 1383 case Hexagon::PS_false: 1384 case Hexagon::CONST32: 1385 case Hexagon::CONST64: 1386 return true; 1387 } 1388 return false; 1389 } 1390 1391 1392 // Generate a transfer-immediate instruction that is appropriate for the 1393 // register class and the actual value being transferred. 1394 unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, 1395 MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) { 1396 unsigned Reg = MRI.createVirtualRegister(RC); 1397 if (RC == &Hexagon::IntRegsRegClass) { 1398 BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg) 1399 .addImm(int32_t(C)); 1400 return Reg; 1401 } 1402 1403 if (RC == &Hexagon::DoubleRegsRegClass) { 1404 if (isInt<8>(C)) { 1405 BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg) 1406 .addImm(C); 1407 return Reg; 1408 } 1409 1410 unsigned Lo = Lo_32(C), Hi = Hi_32(C); 1411 if (isInt<8>(Lo) || isInt<8>(Hi)) { 1412 unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii 1413 : Hexagon::A4_combineii; 1414 BuildMI(B, At, DL, HII.get(Opc), Reg) 1415 .addImm(int32_t(Hi)) 1416 .addImm(int32_t(Lo)); 1417 return Reg; 1418 } 1419 1420 BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg) 1421 .addImm(C); 1422 return Reg; 1423 } 1424 1425 if (RC == &Hexagon::PredRegsRegClass) { 1426 unsigned Opc; 1427 if (C == 0) 1428 Opc = Hexagon::PS_false; 1429 else if ((C & 0xFF) == 0xFF) 1430 Opc = Hexagon::PS_true; 1431 else 1432 return 0; 1433 BuildMI(B, At, DL, HII.get(Opc), Reg); 1434 return Reg; 1435 } 1436 1437 return 0; 1438 } 1439 1440 1441 bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { 1442 if (!BT.reached(&B)) 1443 return false; 1444 bool Changed = false; 1445 RegisterSet Defs; 1446 1447 for (auto I = B.begin(), E = B.end(); I != E; ++I) { 1448 if (isTfrConst(*I)) 1449 continue; 1450 Defs.clear(); 1451 HBS::getInstrDefs(*I, Defs); 1452 if (Defs.count() != 1) 1453 continue; 1454 unsigned DR = Defs.find_first(); 1455 if (!TargetRegisterInfo::isVirtualRegister(DR)) 1456 continue; 1457 uint64_t U; 1458 const BitTracker::RegisterCell &DRC = BT.lookup(DR); 1459 if (HBS::getConst(DRC, 0, DRC.width(), U)) { 1460 int64_t C = U; 1461 DebugLoc DL = I->getDebugLoc(); 1462 auto At = I->isPHI() ? B.getFirstNonPHI() : I; 1463 unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL); 1464 if (ImmReg) { 1465 HBS::replaceReg(DR, ImmReg, MRI); 1466 BT.put(ImmReg, DRC); 1467 Changed = true; 1468 } 1469 } 1470 } 1471 return Changed; 1472 } 1473 1474 1475 // 1476 // Copy generation 1477 // 1478 // Identify pairs of available registers which hold identical values. 1479 // In such cases, only one of them needs to be calculated, the other one 1480 // will be defined as a copy of the first. 1481 // 1482 // Copy propagation 1483 // 1484 // Eliminate register copies RD = RS, by replacing the uses of RD with 1485 // with uses of RS. 1486 namespace { 1487 class CopyGeneration : public Transformation { 1488 public: 1489 CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii, 1490 const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) 1491 : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} 1492 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1493 private: 1494 bool findMatch(const BitTracker::RegisterRef &Inp, 1495 BitTracker::RegisterRef &Out, const RegisterSet &AVs); 1496 1497 const HexagonInstrInfo &HII; 1498 const HexagonRegisterInfo &HRI; 1499 MachineRegisterInfo &MRI; 1500 BitTracker &BT; 1501 RegisterSet Forbidden; 1502 }; 1503 1504 class CopyPropagation : public Transformation { 1505 public: 1506 CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) 1507 : Transformation(false), HRI(hri), MRI(mri) {} 1508 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1509 static bool isCopyReg(unsigned Opc, bool NoConv); 1510 private: 1511 bool propagateRegCopy(MachineInstr &MI); 1512 1513 const HexagonRegisterInfo &HRI; 1514 MachineRegisterInfo &MRI; 1515 }; 1516 1517 } 1518 1519 1520 /// Check if there is a register in AVs that is identical to Inp. If so, 1521 /// set Out to the found register. The output may be a pair Reg:Sub. 1522 bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, 1523 BitTracker::RegisterRef &Out, const RegisterSet &AVs) { 1524 if (!BT.has(Inp.Reg)) 1525 return false; 1526 const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg); 1527 auto *FRC = HBS::getFinalVRegClass(Inp, MRI); 1528 unsigned B, W; 1529 if (!HBS::getSubregMask(Inp, B, W, MRI)) 1530 return false; 1531 1532 for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) { 1533 if (!BT.has(R) || Forbidden[R]) 1534 continue; 1535 const BitTracker::RegisterCell &RC = BT.lookup(R); 1536 unsigned RW = RC.width(); 1537 if (W == RW) { 1538 if (FRC != MRI.getRegClass(R)) 1539 continue; 1540 if (!HBS::isTransparentCopy(R, Inp, MRI)) 1541 continue; 1542 if (!HBS::isEqual(InpRC, B, RC, 0, W)) 1543 continue; 1544 Out.Reg = R; 1545 Out.Sub = 0; 1546 return true; 1547 } 1548 // Check if there is a super-register, whose part (with a subregister) 1549 // is equal to the input. 1550 // Only do double registers for now. 1551 if (W*2 != RW) 1552 continue; 1553 if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass) 1554 continue; 1555 1556 if (HBS::isEqual(InpRC, B, RC, 0, W)) 1557 Out.Sub = Hexagon::isub_lo; 1558 else if (HBS::isEqual(InpRC, B, RC, W, W)) 1559 Out.Sub = Hexagon::isub_hi; 1560 else 1561 continue; 1562 Out.Reg = R; 1563 if (HBS::isTransparentCopy(Out, Inp, MRI)) 1564 return true; 1565 } 1566 return false; 1567 } 1568 1569 1570 bool CopyGeneration::processBlock(MachineBasicBlock &B, 1571 const RegisterSet &AVs) { 1572 if (!BT.reached(&B)) 1573 return false; 1574 RegisterSet AVB(AVs); 1575 bool Changed = false; 1576 RegisterSet Defs; 1577 1578 for (auto I = B.begin(), E = B.end(), NextI = I; I != E; 1579 ++I, AVB.insert(Defs)) { 1580 NextI = std::next(I); 1581 Defs.clear(); 1582 HBS::getInstrDefs(*I, Defs); 1583 1584 unsigned Opc = I->getOpcode(); 1585 if (CopyPropagation::isCopyReg(Opc, false) || 1586 ConstGeneration::isTfrConst(*I)) 1587 continue; 1588 1589 DebugLoc DL = I->getDebugLoc(); 1590 auto At = I->isPHI() ? B.getFirstNonPHI() : I; 1591 1592 for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) { 1593 BitTracker::RegisterRef MR; 1594 auto *FRC = HBS::getFinalVRegClass(R, MRI); 1595 1596 if (findMatch(R, MR, AVB)) { 1597 unsigned NewR = MRI.createVirtualRegister(FRC); 1598 BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) 1599 .addReg(MR.Reg, 0, MR.Sub); 1600 BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); 1601 HBS::replaceReg(R, NewR, MRI); 1602 Forbidden.insert(R); 1603 continue; 1604 } 1605 1606 if (FRC == &Hexagon::DoubleRegsRegClass || 1607 FRC == &Hexagon::VecDblRegsRegClass || 1608 FRC == &Hexagon::VecDblRegs128BRegClass) { 1609 // Try to generate REG_SEQUENCE. 1610 unsigned SubLo = HRI.getHexagonSubRegIndex(FRC, Hexagon::ps_sub_lo); 1611 unsigned SubHi = HRI.getHexagonSubRegIndex(FRC, Hexagon::ps_sub_hi); 1612 BitTracker::RegisterRef TL = { R, SubLo }; 1613 BitTracker::RegisterRef TH = { R, SubHi }; 1614 BitTracker::RegisterRef ML, MH; 1615 if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) { 1616 auto *FRC = HBS::getFinalVRegClass(R, MRI); 1617 unsigned NewR = MRI.createVirtualRegister(FRC); 1618 BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR) 1619 .addReg(ML.Reg, 0, ML.Sub) 1620 .addImm(SubLo) 1621 .addReg(MH.Reg, 0, MH.Sub) 1622 .addImm(SubHi); 1623 BT.put(BitTracker::RegisterRef(NewR), BT.get(R)); 1624 HBS::replaceReg(R, NewR, MRI); 1625 Forbidden.insert(R); 1626 } 1627 } 1628 } 1629 } 1630 1631 return Changed; 1632 } 1633 1634 1635 bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) { 1636 switch (Opc) { 1637 case TargetOpcode::COPY: 1638 case TargetOpcode::REG_SEQUENCE: 1639 case Hexagon::A4_combineir: 1640 case Hexagon::A4_combineri: 1641 return true; 1642 case Hexagon::A2_tfr: 1643 case Hexagon::A2_tfrp: 1644 case Hexagon::A2_combinew: 1645 case Hexagon::V6_vcombine: 1646 case Hexagon::V6_vcombine_128B: 1647 return NoConv; 1648 default: 1649 break; 1650 } 1651 return false; 1652 } 1653 1654 1655 bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { 1656 bool Changed = false; 1657 unsigned Opc = MI.getOpcode(); 1658 BitTracker::RegisterRef RD = MI.getOperand(0); 1659 assert(MI.getOperand(0).getSubReg() == 0); 1660 1661 switch (Opc) { 1662 case TargetOpcode::COPY: 1663 case Hexagon::A2_tfr: 1664 case Hexagon::A2_tfrp: { 1665 BitTracker::RegisterRef RS = MI.getOperand(1); 1666 if (!HBS::isTransparentCopy(RD, RS, MRI)) 1667 break; 1668 if (RS.Sub != 0) 1669 Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI); 1670 else 1671 Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI); 1672 break; 1673 } 1674 case TargetOpcode::REG_SEQUENCE: { 1675 BitTracker::RegisterRef SL, SH; 1676 if (HBS::parseRegSequence(MI, SL, SH, MRI)) { 1677 const TargetRegisterClass *RC = MRI.getRegClass(RD.Reg); 1678 unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo); 1679 unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi); 1680 Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, SL.Reg, SL.Sub, MRI); 1681 Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, SH.Reg, SH.Sub, MRI); 1682 } 1683 break; 1684 } 1685 case Hexagon::A2_combinew: 1686 case Hexagon::V6_vcombine: 1687 case Hexagon::V6_vcombine_128B: { 1688 const TargetRegisterClass *RC = MRI.getRegClass(RD.Reg); 1689 unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo); 1690 unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi); 1691 BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2); 1692 Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, RL.Reg, RL.Sub, MRI); 1693 Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, RH.Reg, RH.Sub, MRI); 1694 break; 1695 } 1696 case Hexagon::A4_combineir: 1697 case Hexagon::A4_combineri: { 1698 unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1; 1699 unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo 1700 : Hexagon::isub_hi; 1701 BitTracker::RegisterRef RS = MI.getOperand(SrcX); 1702 Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI); 1703 break; 1704 } 1705 } 1706 return Changed; 1707 } 1708 1709 1710 bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { 1711 std::vector<MachineInstr*> Instrs; 1712 for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) 1713 Instrs.push_back(&*I); 1714 1715 bool Changed = false; 1716 for (auto I : Instrs) { 1717 unsigned Opc = I->getOpcode(); 1718 if (!CopyPropagation::isCopyReg(Opc, true)) 1719 continue; 1720 Changed |= propagateRegCopy(*I); 1721 } 1722 1723 return Changed; 1724 } 1725 1726 1727 // 1728 // Bit simplification 1729 // 1730 // Recognize patterns that can be simplified and replace them with the 1731 // simpler forms. 1732 // This is by no means complete 1733 namespace { 1734 class BitSimplification : public Transformation { 1735 public: 1736 BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii, 1737 const HexagonRegisterInfo &hri, MachineRegisterInfo &mri, 1738 MachineFunction &mf) 1739 : Transformation(true), HII(hii), HRI(hri), MRI(mri), MF(mf), BT(bt) {} 1740 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1741 private: 1742 struct RegHalf : public BitTracker::RegisterRef { 1743 bool Low; // Low/High halfword. 1744 }; 1745 1746 bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC, 1747 unsigned B, RegHalf &RH); 1748 bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum); 1749 1750 bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC, 1751 BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt); 1752 unsigned getCombineOpcode(bool HLow, bool LLow); 1753 1754 bool genStoreUpperHalf(MachineInstr *MI); 1755 bool genStoreImmediate(MachineInstr *MI); 1756 bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD, 1757 const BitTracker::RegisterCell &RC); 1758 bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD, 1759 const BitTracker::RegisterCell &RC); 1760 bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD, 1761 const BitTracker::RegisterCell &RC); 1762 bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, 1763 const BitTracker::RegisterCell &RC); 1764 bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, 1765 const BitTracker::RegisterCell &RC); 1766 1767 const HexagonInstrInfo &HII; 1768 const HexagonRegisterInfo &HRI; 1769 MachineRegisterInfo &MRI; 1770 MachineFunction &MF; 1771 BitTracker &BT; 1772 }; 1773 } 1774 1775 1776 // Check if the bits [B..B+16) in register cell RC form a valid halfword, 1777 // i.e. [0..16), [16..32), etc. of some register. If so, return true and 1778 // set the information about the found register in RH. 1779 bool BitSimplification::matchHalf(unsigned SelfR, 1780 const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) { 1781 // XXX This could be searching in the set of available registers, in case 1782 // the match is not exact. 1783 1784 // Match 16-bit chunks, where the RC[B..B+15] references exactly one 1785 // register and all the bits B..B+15 match between RC and the register. 1786 // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... }, 1787 // and RC = { [0]:0 [1-15]:v1[1-15]... }. 1788 bool Low = false; 1789 unsigned I = B; 1790 while (I < B+16 && RC[I].num()) 1791 I++; 1792 if (I == B+16) 1793 return false; 1794 1795 unsigned Reg = RC[I].RefI.Reg; 1796 unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B. 1797 if (P < I-B) 1798 return false; 1799 unsigned Pos = P - (I-B); 1800 1801 if (Reg == 0 || Reg == SelfR) // Don't match "self". 1802 return false; 1803 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1804 return false; 1805 if (!BT.has(Reg)) 1806 return false; 1807 1808 const BitTracker::RegisterCell &SC = BT.lookup(Reg); 1809 if (Pos+16 > SC.width()) 1810 return false; 1811 1812 for (unsigned i = 0; i < 16; ++i) { 1813 const BitTracker::BitValue &RV = RC[i+B]; 1814 if (RV.Type == BitTracker::BitValue::Ref) { 1815 if (RV.RefI.Reg != Reg) 1816 return false; 1817 if (RV.RefI.Pos != i+Pos) 1818 return false; 1819 continue; 1820 } 1821 if (RC[i+B] != SC[i+Pos]) 1822 return false; 1823 } 1824 1825 unsigned Sub = 0; 1826 switch (Pos) { 1827 case 0: 1828 Sub = Hexagon::isub_lo; 1829 Low = true; 1830 break; 1831 case 16: 1832 Sub = Hexagon::isub_lo; 1833 Low = false; 1834 break; 1835 case 32: 1836 Sub = Hexagon::isub_hi; 1837 Low = true; 1838 break; 1839 case 48: 1840 Sub = Hexagon::isub_hi; 1841 Low = false; 1842 break; 1843 default: 1844 return false; 1845 } 1846 1847 RH.Reg = Reg; 1848 RH.Sub = Sub; 1849 RH.Low = Low; 1850 // If the subregister is not valid with the register, set it to 0. 1851 if (!HBS::getFinalVRegClass(RH, MRI)) 1852 RH.Sub = 0; 1853 1854 return true; 1855 } 1856 1857 1858 bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc, 1859 unsigned OpNum) { 1860 auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF); 1861 auto *RRC = HBS::getFinalVRegClass(R, MRI); 1862 return OpRC->hasSubClassEq(RRC); 1863 } 1864 1865 1866 // Check if RC matches the pattern of a S2_packhl. If so, return true and 1867 // set the inputs Rs and Rt. 1868 bool BitSimplification::matchPackhl(unsigned SelfR, 1869 const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs, 1870 BitTracker::RegisterRef &Rt) { 1871 RegHalf L1, H1, L2, H2; 1872 1873 if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1)) 1874 return false; 1875 if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1)) 1876 return false; 1877 1878 // Rs = H1.L1, Rt = H2.L2 1879 if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low) 1880 return false; 1881 if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low) 1882 return false; 1883 1884 Rs = H1; 1885 Rt = H2; 1886 return true; 1887 } 1888 1889 1890 unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) { 1891 return HLow ? LLow ? Hexagon::A2_combine_ll 1892 : Hexagon::A2_combine_lh 1893 : LLow ? Hexagon::A2_combine_hl 1894 : Hexagon::A2_combine_hh; 1895 } 1896 1897 1898 // If MI stores the upper halfword of a register (potentially obtained via 1899 // shifts or extracts), replace it with a storerf instruction. This could 1900 // cause the "extraction" code to become dead. 1901 bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) { 1902 unsigned Opc = MI->getOpcode(); 1903 if (Opc != Hexagon::S2_storerh_io) 1904 return false; 1905 1906 MachineOperand &ValOp = MI->getOperand(2); 1907 BitTracker::RegisterRef RS = ValOp; 1908 if (!BT.has(RS.Reg)) 1909 return false; 1910 const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); 1911 RegHalf H; 1912 if (!matchHalf(0, RC, 0, H)) 1913 return false; 1914 if (H.Low) 1915 return false; 1916 MI->setDesc(HII.get(Hexagon::S2_storerf_io)); 1917 ValOp.setReg(H.Reg); 1918 ValOp.setSubReg(H.Sub); 1919 return true; 1920 } 1921 1922 1923 // If MI stores a value known at compile-time, and the value is within a range 1924 // that avoids using constant-extenders, replace it with a store-immediate. 1925 bool BitSimplification::genStoreImmediate(MachineInstr *MI) { 1926 unsigned Opc = MI->getOpcode(); 1927 unsigned Align = 0; 1928 switch (Opc) { 1929 case Hexagon::S2_storeri_io: 1930 Align++; 1931 case Hexagon::S2_storerh_io: 1932 Align++; 1933 case Hexagon::S2_storerb_io: 1934 break; 1935 default: 1936 return false; 1937 } 1938 1939 // Avoid stores to frame-indices (due to an unknown offset). 1940 if (!MI->getOperand(0).isReg()) 1941 return false; 1942 MachineOperand &OffOp = MI->getOperand(1); 1943 if (!OffOp.isImm()) 1944 return false; 1945 1946 int64_t Off = OffOp.getImm(); 1947 // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x). 1948 if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1))) 1949 return false; 1950 // Source register: 1951 BitTracker::RegisterRef RS = MI->getOperand(2); 1952 if (!BT.has(RS.Reg)) 1953 return false; 1954 const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); 1955 uint64_t U; 1956 if (!HBS::getConst(RC, 0, RC.width(), U)) 1957 return false; 1958 1959 // Only consider 8-bit values to avoid constant-extenders. 1960 int V; 1961 switch (Opc) { 1962 case Hexagon::S2_storerb_io: 1963 V = int8_t(U); 1964 break; 1965 case Hexagon::S2_storerh_io: 1966 V = int16_t(U); 1967 break; 1968 case Hexagon::S2_storeri_io: 1969 V = int32_t(U); 1970 break; 1971 } 1972 if (!isInt<8>(V)) 1973 return false; 1974 1975 MI->RemoveOperand(2); 1976 switch (Opc) { 1977 case Hexagon::S2_storerb_io: 1978 MI->setDesc(HII.get(Hexagon::S4_storeirb_io)); 1979 break; 1980 case Hexagon::S2_storerh_io: 1981 MI->setDesc(HII.get(Hexagon::S4_storeirh_io)); 1982 break; 1983 case Hexagon::S2_storeri_io: 1984 MI->setDesc(HII.get(Hexagon::S4_storeiri_io)); 1985 break; 1986 } 1987 MI->addOperand(MachineOperand::CreateImm(V)); 1988 return true; 1989 } 1990 1991 1992 // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the 1993 // last instruction in a sequence that results in something equivalent to 1994 // the pack-halfwords. The intent is to cause the entire sequence to become 1995 // dead. 1996 bool BitSimplification::genPackhl(MachineInstr *MI, 1997 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 1998 unsigned Opc = MI->getOpcode(); 1999 if (Opc == Hexagon::S2_packhl) 2000 return false; 2001 BitTracker::RegisterRef Rs, Rt; 2002 if (!matchPackhl(RD.Reg, RC, Rs, Rt)) 2003 return false; 2004 if (!validateReg(Rs, Hexagon::S2_packhl, 1) || 2005 !validateReg(Rt, Hexagon::S2_packhl, 2)) 2006 return false; 2007 2008 MachineBasicBlock &B = *MI->getParent(); 2009 unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); 2010 DebugLoc DL = MI->getDebugLoc(); 2011 auto At = MI->isPHI() ? B.getFirstNonPHI() 2012 : MachineBasicBlock::iterator(MI); 2013 BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR) 2014 .addReg(Rs.Reg, 0, Rs.Sub) 2015 .addReg(Rt.Reg, 0, Rt.Sub); 2016 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 2017 BT.put(BitTracker::RegisterRef(NewR), RC); 2018 return true; 2019 } 2020 2021 2022 // If MI produces halfword of the input in the low half of the output, 2023 // replace it with zero-extend or extractu. 2024 bool BitSimplification::genExtractHalf(MachineInstr *MI, 2025 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 2026 RegHalf L; 2027 // Check for halfword in low 16 bits, zeros elsewhere. 2028 if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16)) 2029 return false; 2030 2031 unsigned Opc = MI->getOpcode(); 2032 MachineBasicBlock &B = *MI->getParent(); 2033 DebugLoc DL = MI->getDebugLoc(); 2034 2035 // Prefer zxth, since zxth can go in any slot, while extractu only in 2036 // slots 2 and 3. 2037 unsigned NewR = 0; 2038 auto At = MI->isPHI() ? B.getFirstNonPHI() 2039 : MachineBasicBlock::iterator(MI); 2040 if (L.Low && Opc != Hexagon::A2_zxth) { 2041 if (validateReg(L, Hexagon::A2_zxth, 1)) { 2042 NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 2043 BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR) 2044 .addReg(L.Reg, 0, L.Sub); 2045 } 2046 } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) { 2047 if (validateReg(L, Hexagon::S2_lsr_i_r, 1)) { 2048 NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 2049 BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR) 2050 .addReg(L.Reg, 0, L.Sub) 2051 .addImm(16); 2052 } 2053 } 2054 if (NewR == 0) 2055 return false; 2056 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 2057 BT.put(BitTracker::RegisterRef(NewR), RC); 2058 return true; 2059 } 2060 2061 2062 // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the 2063 // combine. 2064 bool BitSimplification::genCombineHalf(MachineInstr *MI, 2065 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 2066 RegHalf L, H; 2067 // Check for combine h/l 2068 if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H)) 2069 return false; 2070 // Do nothing if this is just a reg copy. 2071 if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low) 2072 return false; 2073 2074 unsigned Opc = MI->getOpcode(); 2075 unsigned COpc = getCombineOpcode(H.Low, L.Low); 2076 if (COpc == Opc) 2077 return false; 2078 if (!validateReg(H, COpc, 1) || !validateReg(L, COpc, 2)) 2079 return false; 2080 2081 MachineBasicBlock &B = *MI->getParent(); 2082 DebugLoc DL = MI->getDebugLoc(); 2083 unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 2084 auto At = MI->isPHI() ? B.getFirstNonPHI() 2085 : MachineBasicBlock::iterator(MI); 2086 BuildMI(B, At, DL, HII.get(COpc), NewR) 2087 .addReg(H.Reg, 0, H.Sub) 2088 .addReg(L.Reg, 0, L.Sub); 2089 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 2090 BT.put(BitTracker::RegisterRef(NewR), RC); 2091 return true; 2092 } 2093 2094 2095 // If MI resets high bits of a register and keeps the lower ones, replace it 2096 // with zero-extend byte/half, and-immediate, or extractu, as appropriate. 2097 bool BitSimplification::genExtractLow(MachineInstr *MI, 2098 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 2099 unsigned Opc = MI->getOpcode(); 2100 switch (Opc) { 2101 case Hexagon::A2_zxtb: 2102 case Hexagon::A2_zxth: 2103 case Hexagon::S2_extractu: 2104 return false; 2105 } 2106 if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) { 2107 int32_t Imm = MI->getOperand(2).getImm(); 2108 if (isInt<10>(Imm)) 2109 return false; 2110 } 2111 2112 if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) 2113 return false; 2114 unsigned W = RC.width(); 2115 while (W > 0 && RC[W-1].is(0)) 2116 W--; 2117 if (W == 0 || W == RC.width()) 2118 return false; 2119 unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb 2120 : (W == 16) ? Hexagon::A2_zxth 2121 : (W < 10) ? Hexagon::A2_andir 2122 : Hexagon::S2_extractu; 2123 MachineBasicBlock &B = *MI->getParent(); 2124 DebugLoc DL = MI->getDebugLoc(); 2125 2126 for (auto &Op : MI->uses()) { 2127 if (!Op.isReg()) 2128 continue; 2129 BitTracker::RegisterRef RS = Op; 2130 if (!BT.has(RS.Reg)) 2131 continue; 2132 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); 2133 unsigned BN, BW; 2134 if (!HBS::getSubregMask(RS, BN, BW, MRI)) 2135 continue; 2136 if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W)) 2137 continue; 2138 if (!validateReg(RS, NewOpc, 1)) 2139 continue; 2140 2141 unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 2142 auto At = MI->isPHI() ? B.getFirstNonPHI() 2143 : MachineBasicBlock::iterator(MI); 2144 auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR) 2145 .addReg(RS.Reg, 0, RS.Sub); 2146 if (NewOpc == Hexagon::A2_andir) 2147 MIB.addImm((1 << W) - 1); 2148 else if (NewOpc == Hexagon::S2_extractu) 2149 MIB.addImm(W).addImm(0); 2150 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 2151 BT.put(BitTracker::RegisterRef(NewR), RC); 2152 return true; 2153 } 2154 return false; 2155 } 2156 2157 2158 // Check for tstbit simplification opportunity, where the bit being checked 2159 // can be tracked back to another register. For example: 2160 // vreg2 = S2_lsr_i_r vreg1, 5 2161 // vreg3 = S2_tstbit_i vreg2, 0 2162 // => 2163 // vreg3 = S2_tstbit_i vreg1, 5 2164 bool BitSimplification::simplifyTstbit(MachineInstr *MI, 2165 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 2166 unsigned Opc = MI->getOpcode(); 2167 if (Opc != Hexagon::S2_tstbit_i) 2168 return false; 2169 2170 unsigned BN = MI->getOperand(2).getImm(); 2171 BitTracker::RegisterRef RS = MI->getOperand(1); 2172 unsigned F, W; 2173 DebugLoc DL = MI->getDebugLoc(); 2174 if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI)) 2175 return false; 2176 MachineBasicBlock &B = *MI->getParent(); 2177 auto At = MI->isPHI() ? B.getFirstNonPHI() 2178 : MachineBasicBlock::iterator(MI); 2179 2180 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); 2181 const BitTracker::BitValue &V = SC[F+BN]; 2182 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) { 2183 const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg); 2184 // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is 2185 // a double register, need to use a subregister and adjust bit 2186 // number. 2187 unsigned P = UINT_MAX; 2188 BitTracker::RegisterRef RR(V.RefI.Reg, 0); 2189 if (TC == &Hexagon::DoubleRegsRegClass) { 2190 P = V.RefI.Pos; 2191 RR.Sub = Hexagon::isub_lo; 2192 if (P >= 32) { 2193 P -= 32; 2194 RR.Sub = Hexagon::isub_hi; 2195 } 2196 } else if (TC == &Hexagon::IntRegsRegClass) { 2197 P = V.RefI.Pos; 2198 } 2199 if (P != UINT_MAX) { 2200 unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); 2201 BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR) 2202 .addReg(RR.Reg, 0, RR.Sub) 2203 .addImm(P); 2204 HBS::replaceReg(RD.Reg, NewR, MRI); 2205 BT.put(NewR, RC); 2206 return true; 2207 } 2208 } else if (V.is(0) || V.is(1)) { 2209 unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); 2210 unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true; 2211 BuildMI(B, At, DL, HII.get(NewOpc), NewR); 2212 HBS::replaceReg(RD.Reg, NewR, MRI); 2213 return true; 2214 } 2215 2216 return false; 2217 } 2218 2219 2220 bool BitSimplification::processBlock(MachineBasicBlock &B, 2221 const RegisterSet &AVs) { 2222 if (!BT.reached(&B)) 2223 return false; 2224 bool Changed = false; 2225 RegisterSet AVB = AVs; 2226 RegisterSet Defs; 2227 2228 for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) { 2229 MachineInstr *MI = &*I; 2230 Defs.clear(); 2231 HBS::getInstrDefs(*MI, Defs); 2232 2233 unsigned Opc = MI->getOpcode(); 2234 if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE) 2235 continue; 2236 2237 if (MI->mayStore()) { 2238 bool T = genStoreUpperHalf(MI); 2239 T = T || genStoreImmediate(MI); 2240 Changed |= T; 2241 continue; 2242 } 2243 2244 if (Defs.count() != 1) 2245 continue; 2246 const MachineOperand &Op0 = MI->getOperand(0); 2247 if (!Op0.isReg() || !Op0.isDef()) 2248 continue; 2249 BitTracker::RegisterRef RD = Op0; 2250 if (!BT.has(RD.Reg)) 2251 continue; 2252 const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); 2253 const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg); 2254 2255 if (FRC->getID() == Hexagon::DoubleRegsRegClassID) { 2256 bool T = genPackhl(MI, RD, RC); 2257 Changed |= T; 2258 continue; 2259 } 2260 2261 if (FRC->getID() == Hexagon::IntRegsRegClassID) { 2262 bool T = genExtractHalf(MI, RD, RC); 2263 T = T || genCombineHalf(MI, RD, RC); 2264 T = T || genExtractLow(MI, RD, RC); 2265 Changed |= T; 2266 continue; 2267 } 2268 2269 if (FRC->getID() == Hexagon::PredRegsRegClassID) { 2270 bool T = simplifyTstbit(MI, RD, RC); 2271 Changed |= T; 2272 continue; 2273 } 2274 } 2275 return Changed; 2276 } 2277 2278 2279 bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { 2280 if (skipFunction(*MF.getFunction())) 2281 return false; 2282 2283 auto &HST = MF.getSubtarget<HexagonSubtarget>(); 2284 auto &HRI = *HST.getRegisterInfo(); 2285 auto &HII = *HST.getInstrInfo(); 2286 2287 MDT = &getAnalysis<MachineDominatorTree>(); 2288 MachineRegisterInfo &MRI = MF.getRegInfo(); 2289 bool Changed; 2290 2291 Changed = DeadCodeElimination(MF, *MDT).run(); 2292 2293 const HexagonEvaluator HE(HRI, MRI, HII, MF); 2294 BitTracker BT(HE, MF); 2295 DEBUG(BT.trace(true)); 2296 BT.run(); 2297 2298 MachineBasicBlock &Entry = MF.front(); 2299 2300 RegisterSet AIG; // Available registers for IG. 2301 ConstGeneration ImmG(BT, HII, MRI); 2302 Changed |= visitBlock(Entry, ImmG, AIG); 2303 2304 RegisterSet ARE; // Available registers for RIE. 2305 RedundantInstrElimination RIE(BT, HII, MRI); 2306 bool Ried = visitBlock(Entry, RIE, ARE); 2307 if (Ried) { 2308 Changed = true; 2309 BT.run(); 2310 } 2311 2312 RegisterSet ACG; // Available registers for CG. 2313 CopyGeneration CopyG(BT, HII, HRI, MRI); 2314 Changed |= visitBlock(Entry, CopyG, ACG); 2315 2316 RegisterSet ACP; // Available registers for CP. 2317 CopyPropagation CopyP(HRI, MRI); 2318 Changed |= visitBlock(Entry, CopyP, ACP); 2319 2320 Changed = DeadCodeElimination(MF, *MDT).run() || Changed; 2321 2322 BT.run(); 2323 RegisterSet ABS; // Available registers for BS. 2324 BitSimplification BitS(BT, HII, HRI, MRI, MF); 2325 Changed |= visitBlock(Entry, BitS, ABS); 2326 2327 Changed = DeadCodeElimination(MF, *MDT).run() || Changed; 2328 2329 if (Changed) { 2330 for (auto &B : MF) 2331 for (auto &I : B) 2332 I.clearKillInfo(); 2333 DeadCodeElimination(MF, *MDT).run(); 2334 } 2335 return Changed; 2336 } 2337 2338 2339 // Recognize loops where the code at the end of the loop matches the code 2340 // before the entry of the loop, and the matching code is such that is can 2341 // be simplified. This pass relies on the bit simplification above and only 2342 // prepares code in a way that can be handled by the bit simplifcation. 2343 // 2344 // This is the motivating testcase (and explanation): 2345 // 2346 // { 2347 // loop0(.LBB0_2, r1) // %for.body.preheader 2348 // r5:4 = memd(r0++#8) 2349 // } 2350 // { 2351 // r3 = lsr(r4, #16) 2352 // r7:6 = combine(r5, r5) 2353 // } 2354 // { 2355 // r3 = insert(r5, #16, #16) 2356 // r7:6 = vlsrw(r7:6, #16) 2357 // } 2358 // .LBB0_2: 2359 // { 2360 // memh(r2+#4) = r5 2361 // memh(r2+#6) = r6 # R6 is really R5.H 2362 // } 2363 // { 2364 // r2 = add(r2, #8) 2365 // memh(r2+#0) = r4 2366 // memh(r2+#2) = r3 # R3 is really R4.H 2367 // } 2368 // { 2369 // r5:4 = memd(r0++#8) 2370 // } 2371 // { # "Shuffling" code that sets up R3 and R6 2372 // r3 = lsr(r4, #16) # so that their halves can be stored in the 2373 // r7:6 = combine(r5, r5) # next iteration. This could be folded into 2374 // } # the stores if the code was at the beginning 2375 // { # of the loop iteration. Since the same code 2376 // r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved 2377 // r7:6 = vlsrw(r7:6, #16) # there. 2378 // }:endloop0 2379 // 2380 // 2381 // The outcome: 2382 // 2383 // { 2384 // loop0(.LBB0_2, r1) 2385 // r5:4 = memd(r0++#8) 2386 // } 2387 // .LBB0_2: 2388 // { 2389 // memh(r2+#4) = r5 2390 // memh(r2+#6) = r5.h 2391 // } 2392 // { 2393 // r2 = add(r2, #8) 2394 // memh(r2+#0) = r4 2395 // memh(r2+#2) = r4.h 2396 // } 2397 // { 2398 // r5:4 = memd(r0++#8) 2399 // }:endloop0 2400 2401 namespace llvm { 2402 FunctionPass *createHexagonLoopRescheduling(); 2403 void initializeHexagonLoopReschedulingPass(PassRegistry&); 2404 } 2405 2406 namespace { 2407 class HexagonLoopRescheduling : public MachineFunctionPass { 2408 public: 2409 static char ID; 2410 HexagonLoopRescheduling() : MachineFunctionPass(ID), 2411 HII(0), HRI(0), MRI(0), BTP(0) { 2412 initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry()); 2413 } 2414 2415 bool runOnMachineFunction(MachineFunction &MF) override; 2416 2417 private: 2418 const HexagonInstrInfo *HII; 2419 const HexagonRegisterInfo *HRI; 2420 MachineRegisterInfo *MRI; 2421 BitTracker *BTP; 2422 2423 struct LoopCand { 2424 LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb, 2425 MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {} 2426 MachineBasicBlock *LB, *PB, *EB; 2427 }; 2428 typedef std::vector<MachineInstr*> InstrList; 2429 struct InstrGroup { 2430 BitTracker::RegisterRef Inp, Out; 2431 InstrList Ins; 2432 }; 2433 struct PhiInfo { 2434 PhiInfo(MachineInstr &P, MachineBasicBlock &B); 2435 unsigned DefR; 2436 BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register 2437 MachineBasicBlock *LB, *PB; // Loop Block, Preheader Block 2438 }; 2439 2440 static unsigned getDefReg(const MachineInstr *MI); 2441 bool isConst(unsigned Reg) const; 2442 bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const; 2443 bool isStoreInput(const MachineInstr *MI, unsigned DefR) const; 2444 bool isShuffleOf(unsigned OutR, unsigned InpR) const; 2445 bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2, 2446 unsigned &InpR2) const; 2447 void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB, 2448 MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR); 2449 bool processLoop(LoopCand &C); 2450 }; 2451 } 2452 2453 char HexagonLoopRescheduling::ID = 0; 2454 2455 INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched", 2456 "Hexagon Loop Rescheduling", false, false) 2457 2458 2459 HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P, 2460 MachineBasicBlock &B) { 2461 DefR = HexagonLoopRescheduling::getDefReg(&P); 2462 LB = &B; 2463 PB = nullptr; 2464 for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) { 2465 const MachineOperand &OpB = P.getOperand(i+1); 2466 if (OpB.getMBB() == &B) { 2467 LR = P.getOperand(i); 2468 continue; 2469 } 2470 PB = OpB.getMBB(); 2471 PR = P.getOperand(i); 2472 } 2473 } 2474 2475 2476 unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) { 2477 RegisterSet Defs; 2478 HBS::getInstrDefs(*MI, Defs); 2479 if (Defs.count() != 1) 2480 return 0; 2481 return Defs.find_first(); 2482 } 2483 2484 2485 bool HexagonLoopRescheduling::isConst(unsigned Reg) const { 2486 if (!BTP->has(Reg)) 2487 return false; 2488 const BitTracker::RegisterCell &RC = BTP->lookup(Reg); 2489 for (unsigned i = 0, w = RC.width(); i < w; ++i) { 2490 const BitTracker::BitValue &V = RC[i]; 2491 if (!V.is(0) && !V.is(1)) 2492 return false; 2493 } 2494 return true; 2495 } 2496 2497 2498 bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI, 2499 unsigned DefR) const { 2500 unsigned Opc = MI->getOpcode(); 2501 switch (Opc) { 2502 case TargetOpcode::COPY: 2503 case Hexagon::S2_lsr_i_r: 2504 case Hexagon::S2_asr_i_r: 2505 case Hexagon::S2_asl_i_r: 2506 case Hexagon::S2_lsr_i_p: 2507 case Hexagon::S2_asr_i_p: 2508 case Hexagon::S2_asl_i_p: 2509 case Hexagon::S2_insert: 2510 case Hexagon::A2_or: 2511 case Hexagon::A2_orp: 2512 case Hexagon::A2_and: 2513 case Hexagon::A2_andp: 2514 case Hexagon::A2_combinew: 2515 case Hexagon::A4_combineri: 2516 case Hexagon::A4_combineir: 2517 case Hexagon::A2_combineii: 2518 case Hexagon::A4_combineii: 2519 case Hexagon::A2_combine_ll: 2520 case Hexagon::A2_combine_lh: 2521 case Hexagon::A2_combine_hl: 2522 case Hexagon::A2_combine_hh: 2523 return true; 2524 } 2525 return false; 2526 } 2527 2528 2529 bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI, 2530 unsigned InpR) const { 2531 for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 2532 const MachineOperand &Op = MI->getOperand(i); 2533 if (!Op.isReg()) 2534 continue; 2535 if (Op.getReg() == InpR) 2536 return i == n-1; 2537 } 2538 return false; 2539 } 2540 2541 2542 bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const { 2543 if (!BTP->has(OutR) || !BTP->has(InpR)) 2544 return false; 2545 const BitTracker::RegisterCell &OutC = BTP->lookup(OutR); 2546 for (unsigned i = 0, w = OutC.width(); i < w; ++i) { 2547 const BitTracker::BitValue &V = OutC[i]; 2548 if (V.Type != BitTracker::BitValue::Ref) 2549 continue; 2550 if (V.RefI.Reg != InpR) 2551 return false; 2552 } 2553 return true; 2554 } 2555 2556 2557 bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1, 2558 unsigned OutR2, unsigned &InpR2) const { 2559 if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2)) 2560 return false; 2561 const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1); 2562 const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2); 2563 unsigned W = OutC1.width(); 2564 unsigned MatchR = 0; 2565 if (W != OutC2.width()) 2566 return false; 2567 for (unsigned i = 0; i < W; ++i) { 2568 const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i]; 2569 if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One) 2570 return false; 2571 if (V1.Type != BitTracker::BitValue::Ref) 2572 continue; 2573 if (V1.RefI.Pos != V2.RefI.Pos) 2574 return false; 2575 if (V1.RefI.Reg != InpR1) 2576 return false; 2577 if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2) 2578 return false; 2579 if (!MatchR) 2580 MatchR = V2.RefI.Reg; 2581 else if (V2.RefI.Reg != MatchR) 2582 return false; 2583 } 2584 InpR2 = MatchR; 2585 return true; 2586 } 2587 2588 2589 void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, 2590 MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR, 2591 unsigned NewPredR) { 2592 DenseMap<unsigned,unsigned> RegMap; 2593 2594 const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR); 2595 unsigned PhiR = MRI->createVirtualRegister(PhiRC); 2596 BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR) 2597 .addReg(NewPredR) 2598 .addMBB(&PB) 2599 .addReg(G.Inp.Reg) 2600 .addMBB(&LB); 2601 RegMap.insert(std::make_pair(G.Inp.Reg, PhiR)); 2602 2603 for (unsigned i = G.Ins.size(); i > 0; --i) { 2604 const MachineInstr *SI = G.Ins[i-1]; 2605 unsigned DR = getDefReg(SI); 2606 const TargetRegisterClass *RC = MRI->getRegClass(DR); 2607 unsigned NewDR = MRI->createVirtualRegister(RC); 2608 DebugLoc DL = SI->getDebugLoc(); 2609 2610 auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR); 2611 for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) { 2612 const MachineOperand &Op = SI->getOperand(j); 2613 if (!Op.isReg()) { 2614 MIB.addOperand(Op); 2615 continue; 2616 } 2617 if (!Op.isUse()) 2618 continue; 2619 unsigned UseR = RegMap[Op.getReg()]; 2620 MIB.addReg(UseR, 0, Op.getSubReg()); 2621 } 2622 RegMap.insert(std::make_pair(DR, NewDR)); 2623 } 2624 2625 HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI); 2626 } 2627 2628 2629 bool HexagonLoopRescheduling::processLoop(LoopCand &C) { 2630 DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n"); 2631 std::vector<PhiInfo> Phis; 2632 for (auto &I : *C.LB) { 2633 if (!I.isPHI()) 2634 break; 2635 unsigned PR = getDefReg(&I); 2636 if (isConst(PR)) 2637 continue; 2638 bool BadUse = false, GoodUse = false; 2639 for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) { 2640 MachineInstr *UseI = UI->getParent(); 2641 if (UseI->getParent() != C.LB) { 2642 BadUse = true; 2643 break; 2644 } 2645 if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR)) 2646 GoodUse = true; 2647 } 2648 if (BadUse || !GoodUse) 2649 continue; 2650 2651 Phis.push_back(PhiInfo(I, *C.LB)); 2652 } 2653 2654 DEBUG({ 2655 dbgs() << "Phis: {"; 2656 for (auto &I : Phis) { 2657 dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi(" 2658 << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber() 2659 << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b" 2660 << I.LB->getNumber() << ')'; 2661 } 2662 dbgs() << " }\n"; 2663 }); 2664 2665 if (Phis.empty()) 2666 return false; 2667 2668 bool Changed = false; 2669 InstrList ShufIns; 2670 2671 // Go backwards in the block: for each bit shuffling instruction, check 2672 // if that instruction could potentially be moved to the front of the loop: 2673 // the output of the loop cannot be used in a non-shuffling instruction 2674 // in this loop. 2675 for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) { 2676 if (I->isTerminator()) 2677 continue; 2678 if (I->isPHI()) 2679 break; 2680 2681 RegisterSet Defs; 2682 HBS::getInstrDefs(*I, Defs); 2683 if (Defs.count() != 1) 2684 continue; 2685 unsigned DefR = Defs.find_first(); 2686 if (!TargetRegisterInfo::isVirtualRegister(DefR)) 2687 continue; 2688 if (!isBitShuffle(&*I, DefR)) 2689 continue; 2690 2691 bool BadUse = false; 2692 for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) { 2693 MachineInstr *UseI = UI->getParent(); 2694 if (UseI->getParent() == C.LB) { 2695 if (UseI->isPHI()) { 2696 // If the use is in a phi node in this loop, then it should be 2697 // the value corresponding to the back edge. 2698 unsigned Idx = UI.getOperandNo(); 2699 if (UseI->getOperand(Idx+1).getMBB() != C.LB) 2700 BadUse = true; 2701 } else { 2702 auto F = find(ShufIns, UseI); 2703 if (F == ShufIns.end()) 2704 BadUse = true; 2705 } 2706 } else { 2707 // There is a use outside of the loop, but there is no epilog block 2708 // suitable for a copy-out. 2709 if (C.EB == nullptr) 2710 BadUse = true; 2711 } 2712 if (BadUse) 2713 break; 2714 } 2715 2716 if (BadUse) 2717 continue; 2718 ShufIns.push_back(&*I); 2719 } 2720 2721 // Partition the list of shuffling instructions into instruction groups, 2722 // where each group has to be moved as a whole (i.e. a group is a chain of 2723 // dependent instructions). A group produces a single live output register, 2724 // which is meant to be the input of the loop phi node (although this is 2725 // not checked here yet). It also uses a single register as its input, 2726 // which is some value produced in the loop body. After moving the group 2727 // to the beginning of the loop, that input register would need to be 2728 // the loop-carried register (through a phi node) instead of the (currently 2729 // loop-carried) output register. 2730 typedef std::vector<InstrGroup> InstrGroupList; 2731 InstrGroupList Groups; 2732 2733 for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) { 2734 MachineInstr *SI = ShufIns[i]; 2735 if (SI == nullptr) 2736 continue; 2737 2738 InstrGroup G; 2739 G.Ins.push_back(SI); 2740 G.Out.Reg = getDefReg(SI); 2741 RegisterSet Inputs; 2742 HBS::getInstrUses(*SI, Inputs); 2743 2744 for (unsigned j = i+1; j < n; ++j) { 2745 MachineInstr *MI = ShufIns[j]; 2746 if (MI == nullptr) 2747 continue; 2748 RegisterSet Defs; 2749 HBS::getInstrDefs(*MI, Defs); 2750 // If this instruction does not define any pending inputs, skip it. 2751 if (!Defs.intersects(Inputs)) 2752 continue; 2753 // Otherwise, add it to the current group and remove the inputs that 2754 // are defined by MI. 2755 G.Ins.push_back(MI); 2756 Inputs.remove(Defs); 2757 // Then add all registers used by MI. 2758 HBS::getInstrUses(*MI, Inputs); 2759 ShufIns[j] = nullptr; 2760 } 2761 2762 // Only add a group if it requires at most one register. 2763 if (Inputs.count() > 1) 2764 continue; 2765 auto LoopInpEq = [G] (const PhiInfo &P) -> bool { 2766 return G.Out.Reg == P.LR.Reg; 2767 }; 2768 if (find_if(Phis, LoopInpEq) == Phis.end()) 2769 continue; 2770 2771 G.Inp.Reg = Inputs.find_first(); 2772 Groups.push_back(G); 2773 } 2774 2775 DEBUG({ 2776 for (unsigned i = 0, n = Groups.size(); i < n; ++i) { 2777 InstrGroup &G = Groups[i]; 2778 dbgs() << "Group[" << i << "] inp: " 2779 << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub) 2780 << " out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n"; 2781 for (unsigned j = 0, m = G.Ins.size(); j < m; ++j) 2782 dbgs() << " " << *G.Ins[j]; 2783 } 2784 }); 2785 2786 for (unsigned i = 0, n = Groups.size(); i < n; ++i) { 2787 InstrGroup &G = Groups[i]; 2788 if (!isShuffleOf(G.Out.Reg, G.Inp.Reg)) 2789 continue; 2790 auto LoopInpEq = [G] (const PhiInfo &P) -> bool { 2791 return G.Out.Reg == P.LR.Reg; 2792 }; 2793 auto F = find_if(Phis, LoopInpEq); 2794 if (F == Phis.end()) 2795 continue; 2796 unsigned PrehR = 0; 2797 if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PrehR)) { 2798 const MachineInstr *DefPrehR = MRI->getVRegDef(F->PR.Reg); 2799 unsigned Opc = DefPrehR->getOpcode(); 2800 if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi) 2801 continue; 2802 if (!DefPrehR->getOperand(1).isImm()) 2803 continue; 2804 if (DefPrehR->getOperand(1).getImm() != 0) 2805 continue; 2806 const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg); 2807 if (RC != MRI->getRegClass(F->PR.Reg)) { 2808 PrehR = MRI->createVirtualRegister(RC); 2809 unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi 2810 : Hexagon::A2_tfrpi; 2811 auto T = C.PB->getFirstTerminator(); 2812 DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc(); 2813 BuildMI(*C.PB, T, DL, HII->get(TfrI), PrehR) 2814 .addImm(0); 2815 } else { 2816 PrehR = F->PR.Reg; 2817 } 2818 } 2819 // isSameShuffle could match with PrehR being of a wider class than 2820 // G.Inp.Reg, for example if G shuffles the low 32 bits of its input, 2821 // it would match for the input being a 32-bit register, and PrehR 2822 // being a 64-bit register (where the low 32 bits match). This could 2823 // be handled, but for now skip these cases. 2824 if (MRI->getRegClass(PrehR) != MRI->getRegClass(G.Inp.Reg)) 2825 continue; 2826 moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PrehR); 2827 Changed = true; 2828 } 2829 2830 return Changed; 2831 } 2832 2833 2834 bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { 2835 if (skipFunction(*MF.getFunction())) 2836 return false; 2837 2838 auto &HST = MF.getSubtarget<HexagonSubtarget>(); 2839 HII = HST.getInstrInfo(); 2840 HRI = HST.getRegisterInfo(); 2841 MRI = &MF.getRegInfo(); 2842 const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); 2843 BitTracker BT(HE, MF); 2844 DEBUG(BT.trace(true)); 2845 BT.run(); 2846 BTP = &BT; 2847 2848 std::vector<LoopCand> Cand; 2849 2850 for (auto &B : MF) { 2851 if (B.pred_size() != 2 || B.succ_size() != 2) 2852 continue; 2853 MachineBasicBlock *PB = nullptr; 2854 bool IsLoop = false; 2855 for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) { 2856 if (*PI != &B) 2857 PB = *PI; 2858 else 2859 IsLoop = true; 2860 } 2861 if (!IsLoop) 2862 continue; 2863 2864 MachineBasicBlock *EB = nullptr; 2865 for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) { 2866 if (*SI == &B) 2867 continue; 2868 // Set EP to the epilog block, if it has only 1 predecessor (i.e. the 2869 // edge from B to EP is non-critical. 2870 if ((*SI)->pred_size() == 1) 2871 EB = *SI; 2872 break; 2873 } 2874 2875 Cand.push_back(LoopCand(&B, PB, EB)); 2876 } 2877 2878 bool Changed = false; 2879 for (auto &C : Cand) 2880 Changed |= processLoop(C); 2881 2882 return Changed; 2883 } 2884 2885 //===----------------------------------------------------------------------===// 2886 // Public Constructor Functions 2887 //===----------------------------------------------------------------------===// 2888 2889 FunctionPass *llvm::createHexagonLoopRescheduling() { 2890 return new HexagonLoopRescheduling(); 2891 } 2892 2893 FunctionPass *llvm::createHexagonBitSimplify() { 2894 return new HexagonBitSimplify(); 2895 } 2896 2897