1 //===--- HexagonBitSimplify.cpp -------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #define DEBUG_TYPE "hexbit" 11 12 #include "HexagonBitTracker.h" 13 #include "HexagonTargetMachine.h" 14 #include "llvm/CodeGen/MachineDominators.h" 15 #include "llvm/CodeGen/MachineFunctionPass.h" 16 #include "llvm/CodeGen/MachineInstrBuilder.h" 17 #include "llvm/CodeGen/MachineRegisterInfo.h" 18 #include "llvm/CodeGen/Passes.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include "llvm/Target/TargetInstrInfo.h" 22 #include "llvm/Target/TargetMachine.h" 23 24 using namespace llvm; 25 26 namespace llvm { 27 void initializeHexagonBitSimplifyPass(PassRegistry& Registry); 28 FunctionPass *createHexagonBitSimplify(); 29 } 30 31 namespace { 32 // Set of virtual registers, based on BitVector. 33 struct RegisterSet : private BitVector { 34 RegisterSet() : BitVector() {} 35 explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} 36 RegisterSet(const RegisterSet &RS) : BitVector(RS) {} 37 38 using BitVector::clear; 39 using BitVector::count; 40 41 unsigned find_first() const { 42 int First = BitVector::find_first(); 43 if (First < 0) 44 return 0; 45 return x2v(First); 46 } 47 48 unsigned find_next(unsigned Prev) const { 49 int Next = BitVector::find_next(v2x(Prev)); 50 if (Next < 0) 51 return 0; 52 return x2v(Next); 53 } 54 55 RegisterSet &insert(unsigned R) { 56 unsigned Idx = v2x(R); 57 ensure(Idx); 58 return static_cast<RegisterSet&>(BitVector::set(Idx)); 59 } 60 RegisterSet &remove(unsigned R) { 61 unsigned Idx = v2x(R); 62 if (Idx >= size()) 63 return *this; 64 return static_cast<RegisterSet&>(BitVector::reset(Idx)); 65 } 66 67 RegisterSet &insert(const RegisterSet &Rs) { 68 return static_cast<RegisterSet&>(BitVector::operator|=(Rs)); 69 } 70 RegisterSet &remove(const RegisterSet &Rs) { 71 return static_cast<RegisterSet&>(BitVector::reset(Rs)); 72 } 73 74 reference operator[](unsigned R) { 75 unsigned Idx = v2x(R); 76 ensure(Idx); 77 return BitVector::operator[](Idx); 78 } 79 bool operator[](unsigned R) const { 80 unsigned Idx = v2x(R); 81 assert(Idx < size()); 82 return BitVector::operator[](Idx); 83 } 84 bool has(unsigned R) const { 85 unsigned Idx = v2x(R); 86 if (Idx >= size()) 87 return false; 88 return BitVector::test(Idx); 89 } 90 91 bool empty() const { 92 return !BitVector::any(); 93 } 94 bool includes(const RegisterSet &Rs) const { 95 // A.BitVector::test(B) <=> A-B != {} 96 return !Rs.BitVector::test(*this); 97 } 98 bool intersects(const RegisterSet &Rs) const { 99 return BitVector::anyCommon(Rs); 100 } 101 102 private: 103 void ensure(unsigned Idx) { 104 if (size() <= Idx) 105 resize(std::max(Idx+1, 32U)); 106 } 107 static inline unsigned v2x(unsigned v) { 108 return TargetRegisterInfo::virtReg2Index(v); 109 } 110 static inline unsigned x2v(unsigned x) { 111 return TargetRegisterInfo::index2VirtReg(x); 112 } 113 }; 114 115 116 struct PrintRegSet { 117 PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) 118 : RS(S), TRI(RI) {} 119 friend raw_ostream &operator<< (raw_ostream &OS, 120 const PrintRegSet &P); 121 private: 122 const RegisterSet &RS; 123 const TargetRegisterInfo *TRI; 124 }; 125 126 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) 127 LLVM_ATTRIBUTE_UNUSED; 128 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { 129 OS << '{'; 130 for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) 131 OS << ' ' << PrintReg(R, P.TRI); 132 OS << " }"; 133 return OS; 134 } 135 } 136 137 138 namespace { 139 class Transformation; 140 141 class HexagonBitSimplify : public MachineFunctionPass { 142 public: 143 static char ID; 144 HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) { 145 initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry()); 146 } 147 virtual const char *getPassName() const { 148 return "Hexagon bit simplification"; 149 } 150 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 151 AU.addRequired<MachineDominatorTree>(); 152 AU.addPreserved<MachineDominatorTree>(); 153 MachineFunctionPass::getAnalysisUsage(AU); 154 } 155 virtual bool runOnMachineFunction(MachineFunction &MF); 156 157 static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs); 158 static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses); 159 static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1, 160 const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W); 161 static bool isConst(const BitTracker::RegisterCell &RC, uint16_t B, 162 uint16_t W); 163 static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B, 164 uint16_t W); 165 static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B, 166 uint16_t W, uint64_t &U); 167 static bool replaceReg(unsigned OldR, unsigned NewR, 168 MachineRegisterInfo &MRI); 169 static bool getSubregMask(const BitTracker::RegisterRef &RR, 170 unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI); 171 static bool replaceRegWithSub(unsigned OldR, unsigned NewR, 172 unsigned NewSR, MachineRegisterInfo &MRI); 173 static bool replaceSubWithSub(unsigned OldR, unsigned OldSR, 174 unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI); 175 static bool parseRegSequence(const MachineInstr &I, 176 BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH); 177 178 static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits, 179 uint16_t Begin); 180 static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits, 181 uint16_t Begin, const HexagonInstrInfo &HII); 182 183 static const TargetRegisterClass *getFinalVRegClass( 184 const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI); 185 static bool isTransparentCopy(const BitTracker::RegisterRef &RD, 186 const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI); 187 188 private: 189 MachineDominatorTree *MDT; 190 191 bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs); 192 }; 193 194 char HexagonBitSimplify::ID = 0; 195 typedef HexagonBitSimplify HBS; 196 197 198 // The purpose of this class is to provide a common facility to traverse 199 // the function top-down or bottom-up via the dominator tree, and keep 200 // track of the available registers. 201 class Transformation { 202 public: 203 bool TopDown; 204 Transformation(bool TD) : TopDown(TD) {} 205 virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0; 206 virtual ~Transformation() {} 207 }; 208 } 209 210 INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit", 211 "Hexagon bit simplification", false, false) 212 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 213 INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit", 214 "Hexagon bit simplification", false, false) 215 216 217 bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, 218 RegisterSet &AVs) { 219 MachineDomTreeNode *N = MDT->getNode(&B); 220 typedef GraphTraits<MachineDomTreeNode*> GTN; 221 bool Changed = false; 222 223 if (T.TopDown) 224 Changed = T.processBlock(B, AVs); 225 226 RegisterSet Defs; 227 for (auto &I : B) 228 getInstrDefs(I, Defs); 229 RegisterSet NewAVs = AVs; 230 NewAVs.insert(Defs); 231 232 for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) { 233 MachineBasicBlock *SB = (*I)->getBlock(); 234 Changed |= visitBlock(*SB, T, NewAVs); 235 } 236 if (!T.TopDown) 237 Changed |= T.processBlock(B, AVs); 238 239 return Changed; 240 } 241 242 // 243 // Utility functions: 244 // 245 void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI, 246 RegisterSet &Defs) { 247 for (auto &Op : MI.operands()) { 248 if (!Op.isReg() || !Op.isDef()) 249 continue; 250 unsigned R = Op.getReg(); 251 if (!TargetRegisterInfo::isVirtualRegister(R)) 252 continue; 253 Defs.insert(R); 254 } 255 } 256 257 void HexagonBitSimplify::getInstrUses(const MachineInstr &MI, 258 RegisterSet &Uses) { 259 for (auto &Op : MI.operands()) { 260 if (!Op.isReg() || !Op.isUse()) 261 continue; 262 unsigned R = Op.getReg(); 263 if (!TargetRegisterInfo::isVirtualRegister(R)) 264 continue; 265 Uses.insert(R); 266 } 267 } 268 269 // Check if all the bits in range [B, E) in both cells are equal. 270 bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1, 271 uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2, 272 uint16_t W) { 273 for (uint16_t i = 0; i < W; ++i) { 274 // If RC1[i] is "bottom", it cannot be proven equal to RC2[i]. 275 if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0) 276 return false; 277 // Same for RC2[i]. 278 if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0) 279 return false; 280 if (RC1[B1+i] != RC2[B2+i]) 281 return false; 282 } 283 return true; 284 } 285 286 287 bool HexagonBitSimplify::isConst(const BitTracker::RegisterCell &RC, 288 uint16_t B, uint16_t W) { 289 assert(B < RC.width() && B+W <= RC.width()); 290 for (uint16_t i = B; i < B+W; ++i) 291 if (!RC[i].num()) 292 return false; 293 return true; 294 } 295 296 297 bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC, 298 uint16_t B, uint16_t W) { 299 assert(B < RC.width() && B+W <= RC.width()); 300 for (uint16_t i = B; i < B+W; ++i) 301 if (!RC[i].is(0)) 302 return false; 303 return true; 304 } 305 306 307 bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, 308 uint16_t B, uint16_t W, uint64_t &U) { 309 assert(B < RC.width() && B+W <= RC.width()); 310 int64_t T = 0; 311 for (uint16_t i = B+W; i > B; --i) { 312 const BitTracker::BitValue &BV = RC[i-1]; 313 T <<= 1; 314 if (BV.is(1)) 315 T |= 1; 316 else if (!BV.is(0)) 317 return false; 318 } 319 U = T; 320 return true; 321 } 322 323 324 bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR, 325 MachineRegisterInfo &MRI) { 326 if (!TargetRegisterInfo::isVirtualRegister(OldR) || 327 !TargetRegisterInfo::isVirtualRegister(NewR)) 328 return false; 329 auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); 330 decltype(End) NextI; 331 for (auto I = Begin; I != End; I = NextI) { 332 NextI = std::next(I); 333 I->setReg(NewR); 334 } 335 return Begin != End; 336 } 337 338 339 bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR, 340 unsigned NewSR, MachineRegisterInfo &MRI) { 341 if (!TargetRegisterInfo::isVirtualRegister(OldR) || 342 !TargetRegisterInfo::isVirtualRegister(NewR)) 343 return false; 344 auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); 345 decltype(End) NextI; 346 for (auto I = Begin; I != End; I = NextI) { 347 NextI = std::next(I); 348 I->setReg(NewR); 349 I->setSubReg(NewSR); 350 } 351 return Begin != End; 352 } 353 354 355 bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR, 356 unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) { 357 if (!TargetRegisterInfo::isVirtualRegister(OldR) || 358 !TargetRegisterInfo::isVirtualRegister(NewR)) 359 return false; 360 auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); 361 decltype(End) NextI; 362 for (auto I = Begin; I != End; I = NextI) { 363 NextI = std::next(I); 364 if (I->getSubReg() != OldSR) 365 continue; 366 I->setReg(NewR); 367 I->setSubReg(NewSR); 368 } 369 return Begin != End; 370 } 371 372 373 // For a register ref (pair Reg:Sub), set Begin to the position of the LSB 374 // of Sub in Reg, and set Width to the size of Sub in bits. Return true, 375 // if this succeeded, otherwise return false. 376 bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, 377 unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) { 378 const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg); 379 if (RC == &Hexagon::IntRegsRegClass) { 380 assert(RR.Sub == 0); 381 Begin = 0; 382 Width = 32; 383 return true; 384 } 385 if (RC == &Hexagon::DoubleRegsRegClass) { 386 if (RR.Sub == 0) { 387 Begin = 0; 388 Width = 64; 389 return true; 390 } 391 assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg); 392 Width = 32; 393 Begin = (RR.Sub == Hexagon::subreg_loreg ? 0 : 32); 394 return true; 395 } 396 return false; 397 } 398 399 400 // For a REG_SEQUENCE, set SL to the low subregister and SH to the high 401 // subregister. 402 bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I, 403 BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) { 404 assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE); 405 unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm(); 406 assert(Sub1 != Sub2); 407 if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) { 408 SL = I.getOperand(1); 409 SH = I.getOperand(3); 410 return true; 411 } 412 if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) { 413 SH = I.getOperand(1); 414 SL = I.getOperand(3); 415 return true; 416 } 417 return false; 418 } 419 420 421 // All stores (except 64-bit stores) take a 32-bit register as the source 422 // of the value to be stored. If the instruction stores into a location 423 // that is shorter than 32 bits, some bits of the source register are not 424 // used. For each store instruction, calculate the set of used bits in 425 // the source register, and set appropriate bits in Bits. Return true if 426 // the bits are calculated, false otherwise. 427 bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits, 428 uint16_t Begin) { 429 using namespace Hexagon; 430 431 switch (Opc) { 432 // Store byte 433 case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32 434 case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new 435 case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32 436 case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32 437 case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32 438 case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32 439 case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new 440 case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new 441 case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new 442 case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new 443 case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32 444 case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new 445 case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32 446 case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32 447 case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32 448 case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32 449 case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new 450 case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new 451 case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new 452 case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new 453 case S4_storerb_ap: // memb(Re32=#U6)=Rt32 454 case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new 455 case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32 456 case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new 457 case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32 458 case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new 459 case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32 460 case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new 461 case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32 462 case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new 463 case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32 464 case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new 465 case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32 466 case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new 467 case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32 468 case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32 469 case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 470 case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 471 case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new 472 case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new 473 case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new 474 case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new 475 case S2_storerbgp: // memb(gp+#u16:0)=Rt32 476 case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new 477 case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32 478 case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32 479 case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32 480 case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32 481 case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new 482 case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new 483 case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new 484 case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new 485 Bits.set(Begin, Begin+8); 486 return true; 487 488 // Store low half 489 case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32 490 case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new 491 case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32 492 case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32 493 case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32 494 case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32 495 case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new 496 case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new 497 case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new 498 case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new 499 case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32 500 case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new 501 case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32 502 case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32 503 case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32 504 case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32 505 case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new 506 case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new 507 case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new 508 case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new 509 case S4_storerh_ap: // memh(Re32=#U6)=Rt32 510 case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new 511 case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32 512 case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new 513 case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32 514 case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new 515 case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32 516 case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new 517 case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32 518 case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new 519 case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32 520 case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new 521 case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32 522 case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32 523 case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32 524 case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 525 case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 526 case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new 527 case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new 528 case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new 529 case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new 530 case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new 531 case S2_storerhgp: // memh(gp+#u16:1)=Rt32 532 case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new 533 case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32 534 case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32 535 case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32 536 case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32 537 case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new 538 case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new 539 case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new 540 case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new 541 Bits.set(Begin, Begin+16); 542 return true; 543 544 // Store high half 545 case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32 546 case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32 547 case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32 548 case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32 549 case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32 550 case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32 551 case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32 552 case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32 553 case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32 554 case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32 555 case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32 556 case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32 557 case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32 558 case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32 559 case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32 560 case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32 561 case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32 562 case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 563 case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 564 case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 565 case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 566 case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32 567 case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32 568 case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32 569 case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32 570 case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32 571 Bits.set(Begin+16, Begin+32); 572 return true; 573 } 574 575 return false; 576 } 577 578 579 // For an instruction with opcode Opc, calculate the set of bits that it 580 // uses in a register in operand OpN. This only calculates the set of used 581 // bits for cases where it does not depend on any operands (as is the case 582 // in shifts, for example). For concrete instructions from a program, the 583 // operand may be a subregister of a larger register, while Bits would 584 // correspond to the larger register in its entirety. Because of that, 585 // the parameter Begin can be used to indicate which bit of Bits should be 586 // considered the LSB of of the operand. 587 bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, 588 BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) { 589 using namespace Hexagon; 590 591 const MCInstrDesc &D = HII.get(Opc); 592 if (D.mayStore()) { 593 if (OpN == D.getNumOperands()-1) 594 return getUsedBitsInStore(Opc, Bits, Begin); 595 return false; 596 } 597 598 switch (Opc) { 599 // One register source. Used bits: R1[0-7]. 600 case A2_sxtb: 601 case A2_zxtb: 602 case A4_cmpbeqi: 603 case A4_cmpbgti: 604 case A4_cmpbgtui: 605 if (OpN == 1) { 606 Bits.set(Begin, Begin+8); 607 return true; 608 } 609 break; 610 611 // One register source. Used bits: R1[0-15]. 612 case A2_aslh: 613 case A2_sxth: 614 case A2_zxth: 615 case A4_cmpheqi: 616 case A4_cmphgti: 617 case A4_cmphgtui: 618 if (OpN == 1) { 619 Bits.set(Begin, Begin+16); 620 return true; 621 } 622 break; 623 624 // One register source. Used bits: R1[16-31]. 625 case A2_asrh: 626 if (OpN == 1) { 627 Bits.set(Begin+16, Begin+32); 628 return true; 629 } 630 break; 631 632 // Two register sources. Used bits: R1[0-7], R2[0-7]. 633 case A4_cmpbeq: 634 case A4_cmpbgt: 635 case A4_cmpbgtu: 636 if (OpN == 1) { 637 Bits.set(Begin, Begin+8); 638 return true; 639 } 640 break; 641 642 // Two register sources. Used bits: R1[0-15], R2[0-15]. 643 case A4_cmpheq: 644 case A4_cmphgt: 645 case A4_cmphgtu: 646 case A2_addh_h16_ll: 647 case A2_addh_h16_sat_ll: 648 case A2_addh_l16_ll: 649 case A2_addh_l16_sat_ll: 650 case A2_combine_ll: 651 case A2_subh_h16_ll: 652 case A2_subh_h16_sat_ll: 653 case A2_subh_l16_ll: 654 case A2_subh_l16_sat_ll: 655 case M2_mpy_acc_ll_s0: 656 case M2_mpy_acc_ll_s1: 657 case M2_mpy_acc_sat_ll_s0: 658 case M2_mpy_acc_sat_ll_s1: 659 case M2_mpy_ll_s0: 660 case M2_mpy_ll_s1: 661 case M2_mpy_nac_ll_s0: 662 case M2_mpy_nac_ll_s1: 663 case M2_mpy_nac_sat_ll_s0: 664 case M2_mpy_nac_sat_ll_s1: 665 case M2_mpy_rnd_ll_s0: 666 case M2_mpy_rnd_ll_s1: 667 case M2_mpy_sat_ll_s0: 668 case M2_mpy_sat_ll_s1: 669 case M2_mpy_sat_rnd_ll_s0: 670 case M2_mpy_sat_rnd_ll_s1: 671 case M2_mpyd_acc_ll_s0: 672 case M2_mpyd_acc_ll_s1: 673 case M2_mpyd_ll_s0: 674 case M2_mpyd_ll_s1: 675 case M2_mpyd_nac_ll_s0: 676 case M2_mpyd_nac_ll_s1: 677 case M2_mpyd_rnd_ll_s0: 678 case M2_mpyd_rnd_ll_s1: 679 case M2_mpyu_acc_ll_s0: 680 case M2_mpyu_acc_ll_s1: 681 case M2_mpyu_ll_s0: 682 case M2_mpyu_ll_s1: 683 case M2_mpyu_nac_ll_s0: 684 case M2_mpyu_nac_ll_s1: 685 case M2_mpyud_acc_ll_s0: 686 case M2_mpyud_acc_ll_s1: 687 case M2_mpyud_ll_s0: 688 case M2_mpyud_ll_s1: 689 case M2_mpyud_nac_ll_s0: 690 case M2_mpyud_nac_ll_s1: 691 if (OpN == 1 || OpN == 2) { 692 Bits.set(Begin, Begin+16); 693 return true; 694 } 695 break; 696 697 // Two register sources. Used bits: R1[0-15], R2[16-31]. 698 case A2_addh_h16_lh: 699 case A2_addh_h16_sat_lh: 700 case A2_combine_lh: 701 case A2_subh_h16_lh: 702 case A2_subh_h16_sat_lh: 703 case M2_mpy_acc_lh_s0: 704 case M2_mpy_acc_lh_s1: 705 case M2_mpy_acc_sat_lh_s0: 706 case M2_mpy_acc_sat_lh_s1: 707 case M2_mpy_lh_s0: 708 case M2_mpy_lh_s1: 709 case M2_mpy_nac_lh_s0: 710 case M2_mpy_nac_lh_s1: 711 case M2_mpy_nac_sat_lh_s0: 712 case M2_mpy_nac_sat_lh_s1: 713 case M2_mpy_rnd_lh_s0: 714 case M2_mpy_rnd_lh_s1: 715 case M2_mpy_sat_lh_s0: 716 case M2_mpy_sat_lh_s1: 717 case M2_mpy_sat_rnd_lh_s0: 718 case M2_mpy_sat_rnd_lh_s1: 719 case M2_mpyd_acc_lh_s0: 720 case M2_mpyd_acc_lh_s1: 721 case M2_mpyd_lh_s0: 722 case M2_mpyd_lh_s1: 723 case M2_mpyd_nac_lh_s0: 724 case M2_mpyd_nac_lh_s1: 725 case M2_mpyd_rnd_lh_s0: 726 case M2_mpyd_rnd_lh_s1: 727 case M2_mpyu_acc_lh_s0: 728 case M2_mpyu_acc_lh_s1: 729 case M2_mpyu_lh_s0: 730 case M2_mpyu_lh_s1: 731 case M2_mpyu_nac_lh_s0: 732 case M2_mpyu_nac_lh_s1: 733 case M2_mpyud_acc_lh_s0: 734 case M2_mpyud_acc_lh_s1: 735 case M2_mpyud_lh_s0: 736 case M2_mpyud_lh_s1: 737 case M2_mpyud_nac_lh_s0: 738 case M2_mpyud_nac_lh_s1: 739 // These four are actually LH. 740 case A2_addh_l16_hl: 741 case A2_addh_l16_sat_hl: 742 case A2_subh_l16_hl: 743 case A2_subh_l16_sat_hl: 744 if (OpN == 1) { 745 Bits.set(Begin, Begin+16); 746 return true; 747 } 748 if (OpN == 2) { 749 Bits.set(Begin+16, Begin+32); 750 return true; 751 } 752 break; 753 754 // Two register sources, used bits: R1[16-31], R2[0-15]. 755 case A2_addh_h16_hl: 756 case A2_addh_h16_sat_hl: 757 case A2_combine_hl: 758 case A2_subh_h16_hl: 759 case A2_subh_h16_sat_hl: 760 case M2_mpy_acc_hl_s0: 761 case M2_mpy_acc_hl_s1: 762 case M2_mpy_acc_sat_hl_s0: 763 case M2_mpy_acc_sat_hl_s1: 764 case M2_mpy_hl_s0: 765 case M2_mpy_hl_s1: 766 case M2_mpy_nac_hl_s0: 767 case M2_mpy_nac_hl_s1: 768 case M2_mpy_nac_sat_hl_s0: 769 case M2_mpy_nac_sat_hl_s1: 770 case M2_mpy_rnd_hl_s0: 771 case M2_mpy_rnd_hl_s1: 772 case M2_mpy_sat_hl_s0: 773 case M2_mpy_sat_hl_s1: 774 case M2_mpy_sat_rnd_hl_s0: 775 case M2_mpy_sat_rnd_hl_s1: 776 case M2_mpyd_acc_hl_s0: 777 case M2_mpyd_acc_hl_s1: 778 case M2_mpyd_hl_s0: 779 case M2_mpyd_hl_s1: 780 case M2_mpyd_nac_hl_s0: 781 case M2_mpyd_nac_hl_s1: 782 case M2_mpyd_rnd_hl_s0: 783 case M2_mpyd_rnd_hl_s1: 784 case M2_mpyu_acc_hl_s0: 785 case M2_mpyu_acc_hl_s1: 786 case M2_mpyu_hl_s0: 787 case M2_mpyu_hl_s1: 788 case M2_mpyu_nac_hl_s0: 789 case M2_mpyu_nac_hl_s1: 790 case M2_mpyud_acc_hl_s0: 791 case M2_mpyud_acc_hl_s1: 792 case M2_mpyud_hl_s0: 793 case M2_mpyud_hl_s1: 794 case M2_mpyud_nac_hl_s0: 795 case M2_mpyud_nac_hl_s1: 796 if (OpN == 1) { 797 Bits.set(Begin+16, Begin+32); 798 return true; 799 } 800 if (OpN == 2) { 801 Bits.set(Begin, Begin+16); 802 return true; 803 } 804 break; 805 806 // Two register sources, used bits: R1[16-31], R2[16-31]. 807 case A2_addh_h16_hh: 808 case A2_addh_h16_sat_hh: 809 case A2_combine_hh: 810 case A2_subh_h16_hh: 811 case A2_subh_h16_sat_hh: 812 case M2_mpy_acc_hh_s0: 813 case M2_mpy_acc_hh_s1: 814 case M2_mpy_acc_sat_hh_s0: 815 case M2_mpy_acc_sat_hh_s1: 816 case M2_mpy_hh_s0: 817 case M2_mpy_hh_s1: 818 case M2_mpy_nac_hh_s0: 819 case M2_mpy_nac_hh_s1: 820 case M2_mpy_nac_sat_hh_s0: 821 case M2_mpy_nac_sat_hh_s1: 822 case M2_mpy_rnd_hh_s0: 823 case M2_mpy_rnd_hh_s1: 824 case M2_mpy_sat_hh_s0: 825 case M2_mpy_sat_hh_s1: 826 case M2_mpy_sat_rnd_hh_s0: 827 case M2_mpy_sat_rnd_hh_s1: 828 case M2_mpyd_acc_hh_s0: 829 case M2_mpyd_acc_hh_s1: 830 case M2_mpyd_hh_s0: 831 case M2_mpyd_hh_s1: 832 case M2_mpyd_nac_hh_s0: 833 case M2_mpyd_nac_hh_s1: 834 case M2_mpyd_rnd_hh_s0: 835 case M2_mpyd_rnd_hh_s1: 836 case M2_mpyu_acc_hh_s0: 837 case M2_mpyu_acc_hh_s1: 838 case M2_mpyu_hh_s0: 839 case M2_mpyu_hh_s1: 840 case M2_mpyu_nac_hh_s0: 841 case M2_mpyu_nac_hh_s1: 842 case M2_mpyud_acc_hh_s0: 843 case M2_mpyud_acc_hh_s1: 844 case M2_mpyud_hh_s0: 845 case M2_mpyud_hh_s1: 846 case M2_mpyud_nac_hh_s0: 847 case M2_mpyud_nac_hh_s1: 848 if (OpN == 1 || OpN == 2) { 849 Bits.set(Begin+16, Begin+32); 850 return true; 851 } 852 break; 853 } 854 855 return false; 856 } 857 858 859 // Calculate the register class that matches Reg:Sub. For example, if 860 // vreg1 is a double register, then vreg1:subreg_hireg would match "int" 861 // register class. 862 const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( 863 const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { 864 if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) 865 return nullptr; 866 auto *RC = MRI.getRegClass(RR.Reg); 867 if (RR.Sub == 0) 868 return RC; 869 870 auto VerifySR = [] (unsigned Sub) -> void { 871 assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg); 872 }; 873 874 switch (RC->getID()) { 875 case Hexagon::DoubleRegsRegClassID: 876 VerifySR(RR.Sub); 877 return &Hexagon::IntRegsRegClass; 878 case Hexagon::VecDblRegsRegClassID: 879 VerifySR(RR.Sub); 880 return &Hexagon::VectorRegsRegClass; 881 case Hexagon::VecDblRegs128BRegClassID: 882 VerifySR(RR.Sub); 883 return &Hexagon::VectorRegs128BRegClass; 884 } 885 return nullptr; 886 } 887 888 889 // Check if RD could be replaced with RS at any possible use of RD. 890 // For example a predicate register cannot be replaced with a integer 891 // register, but a 64-bit register with a subregister can be replaced 892 // with a 32-bit register. 893 bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD, 894 const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) { 895 if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) || 896 !TargetRegisterInfo::isVirtualRegister(RS.Reg)) 897 return false; 898 // Return false if one (or both) classes are nullptr. 899 auto *DRC = getFinalVRegClass(RD, MRI); 900 if (!DRC) 901 return false; 902 903 return DRC == getFinalVRegClass(RS, MRI); 904 } 905 906 907 // 908 // Dead code elimination 909 // 910 namespace { 911 class DeadCodeElimination { 912 public: 913 DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt) 914 : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()), 915 MDT(mdt), MRI(mf.getRegInfo()) {} 916 917 bool run() { 918 return runOnNode(MDT.getRootNode()); 919 } 920 921 private: 922 bool isDead(unsigned R) const; 923 bool runOnNode(MachineDomTreeNode *N); 924 925 MachineFunction &MF; 926 const HexagonInstrInfo &HII; 927 MachineDominatorTree &MDT; 928 MachineRegisterInfo &MRI; 929 }; 930 } 931 932 933 bool DeadCodeElimination::isDead(unsigned R) const { 934 for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) { 935 MachineInstr *UseI = I->getParent(); 936 if (UseI->isDebugValue()) 937 continue; 938 if (UseI->isPHI()) { 939 assert(!UseI->getOperand(0).getSubReg()); 940 unsigned DR = UseI->getOperand(0).getReg(); 941 if (DR == R) 942 continue; 943 } 944 return false; 945 } 946 return true; 947 } 948 949 950 bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { 951 bool Changed = false; 952 typedef GraphTraits<MachineDomTreeNode*> GTN; 953 for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) 954 Changed |= runOnNode(*I); 955 956 MachineBasicBlock *B = N->getBlock(); 957 std::vector<MachineInstr*> Instrs; 958 for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) 959 Instrs.push_back(&*I); 960 961 for (auto MI : Instrs) { 962 unsigned Opc = MI->getOpcode(); 963 // Do not touch lifetime markers. This is why the target-independent DCE 964 // cannot be used. 965 if (Opc == TargetOpcode::LIFETIME_START || 966 Opc == TargetOpcode::LIFETIME_END) 967 continue; 968 bool Store = false; 969 if (MI->isInlineAsm()) 970 continue; 971 // Delete PHIs if possible. 972 if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store)) 973 continue; 974 975 bool AllDead = true; 976 SmallVector<unsigned,2> Regs; 977 for (auto &Op : MI->operands()) { 978 if (!Op.isReg() || !Op.isDef()) 979 continue; 980 unsigned R = Op.getReg(); 981 if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) { 982 AllDead = false; 983 break; 984 } 985 Regs.push_back(R); 986 } 987 if (!AllDead) 988 continue; 989 990 B->erase(MI); 991 for (unsigned i = 0, n = Regs.size(); i != n; ++i) 992 MRI.markUsesInDebugValueAsUndef(Regs[i]); 993 Changed = true; 994 } 995 996 return Changed; 997 } 998 999 1000 // 1001 // Eliminate redundant instructions 1002 // 1003 // This transformation will identify instructions where the output register 1004 // is the same as one of its input registers. This only works on instructions 1005 // that define a single register (unlike post-increment loads, for example). 1006 // The equality check is actually more detailed: the code calculates which 1007 // bits of the output are used, and only compares these bits with the input 1008 // registers. 1009 // If the output matches an input, the instruction is replaced with COPY. 1010 // The copies will be removed by another transformation. 1011 namespace { 1012 class RedundantInstrElimination : public Transformation { 1013 public: 1014 RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, 1015 MachineRegisterInfo &mri) 1016 : Transformation(true), HII(hii), MRI(mri), BT(bt) {} 1017 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1018 private: 1019 bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN, 1020 unsigned &LostB, unsigned &LostE); 1021 bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN, 1022 unsigned &LostB, unsigned &LostE); 1023 bool computeUsedBits(unsigned Reg, BitVector &Bits); 1024 bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits, 1025 uint16_t Begin); 1026 bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); 1027 1028 const HexagonInstrInfo &HII; 1029 MachineRegisterInfo &MRI; 1030 BitTracker &BT; 1031 }; 1032 } 1033 1034 1035 // Check if the instruction is a lossy shift left, where the input being 1036 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range 1037 // of bit indices that are lost. 1038 bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI, 1039 unsigned OpN, unsigned &LostB, unsigned &LostE) { 1040 using namespace Hexagon; 1041 unsigned Opc = MI.getOpcode(); 1042 unsigned ImN, RegN, Width; 1043 switch (Opc) { 1044 case S2_asl_i_p: 1045 ImN = 2; 1046 RegN = 1; 1047 Width = 64; 1048 break; 1049 case S2_asl_i_p_acc: 1050 case S2_asl_i_p_and: 1051 case S2_asl_i_p_nac: 1052 case S2_asl_i_p_or: 1053 case S2_asl_i_p_xacc: 1054 ImN = 3; 1055 RegN = 2; 1056 Width = 64; 1057 break; 1058 case S2_asl_i_r: 1059 ImN = 2; 1060 RegN = 1; 1061 Width = 32; 1062 break; 1063 case S2_addasl_rrri: 1064 case S4_andi_asl_ri: 1065 case S4_ori_asl_ri: 1066 case S4_addi_asl_ri: 1067 case S4_subi_asl_ri: 1068 case S2_asl_i_r_acc: 1069 case S2_asl_i_r_and: 1070 case S2_asl_i_r_nac: 1071 case S2_asl_i_r_or: 1072 case S2_asl_i_r_sat: 1073 case S2_asl_i_r_xacc: 1074 ImN = 3; 1075 RegN = 2; 1076 Width = 32; 1077 break; 1078 default: 1079 return false; 1080 } 1081 1082 if (RegN != OpN) 1083 return false; 1084 1085 assert(MI.getOperand(ImN).isImm()); 1086 unsigned S = MI.getOperand(ImN).getImm(); 1087 if (S == 0) 1088 return false; 1089 LostB = Width-S; 1090 LostE = Width; 1091 return true; 1092 } 1093 1094 1095 // Check if the instruction is a lossy shift right, where the input being 1096 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range 1097 // of bit indices that are lost. 1098 bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI, 1099 unsigned OpN, unsigned &LostB, unsigned &LostE) { 1100 using namespace Hexagon; 1101 unsigned Opc = MI.getOpcode(); 1102 unsigned ImN, RegN; 1103 switch (Opc) { 1104 case S2_asr_i_p: 1105 case S2_lsr_i_p: 1106 ImN = 2; 1107 RegN = 1; 1108 break; 1109 case S2_asr_i_p_acc: 1110 case S2_asr_i_p_and: 1111 case S2_asr_i_p_nac: 1112 case S2_asr_i_p_or: 1113 case S2_lsr_i_p_acc: 1114 case S2_lsr_i_p_and: 1115 case S2_lsr_i_p_nac: 1116 case S2_lsr_i_p_or: 1117 case S2_lsr_i_p_xacc: 1118 ImN = 3; 1119 RegN = 2; 1120 break; 1121 case S2_asr_i_r: 1122 case S2_lsr_i_r: 1123 ImN = 2; 1124 RegN = 1; 1125 break; 1126 case S4_andi_lsr_ri: 1127 case S4_ori_lsr_ri: 1128 case S4_addi_lsr_ri: 1129 case S4_subi_lsr_ri: 1130 case S2_asr_i_r_acc: 1131 case S2_asr_i_r_and: 1132 case S2_asr_i_r_nac: 1133 case S2_asr_i_r_or: 1134 case S2_lsr_i_r_acc: 1135 case S2_lsr_i_r_and: 1136 case S2_lsr_i_r_nac: 1137 case S2_lsr_i_r_or: 1138 case S2_lsr_i_r_xacc: 1139 ImN = 3; 1140 RegN = 2; 1141 break; 1142 1143 default: 1144 return false; 1145 } 1146 1147 if (RegN != OpN) 1148 return false; 1149 1150 assert(MI.getOperand(ImN).isImm()); 1151 unsigned S = MI.getOperand(ImN).getImm(); 1152 LostB = 0; 1153 LostE = S; 1154 return true; 1155 } 1156 1157 1158 // Calculate the bit vector that corresponds to the used bits of register Reg. 1159 // The vector Bits has the same size, as the size of Reg in bits. If the cal- 1160 // culation fails (i.e. the used bits are unknown), it returns false. Other- 1161 // wise, it returns true and sets the corresponding bits in Bits. 1162 bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { 1163 BitVector Used(Bits.size()); 1164 RegisterSet Visited; 1165 std::vector<unsigned> Pending; 1166 Pending.push_back(Reg); 1167 1168 for (unsigned i = 0; i < Pending.size(); ++i) { 1169 unsigned R = Pending[i]; 1170 if (Visited.has(R)) 1171 continue; 1172 Visited.insert(R); 1173 for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) { 1174 BitTracker::RegisterRef UR = *I; 1175 unsigned B, W; 1176 if (!HBS::getSubregMask(UR, B, W, MRI)) 1177 return false; 1178 MachineInstr &UseI = *I->getParent(); 1179 if (UseI.isPHI() || UseI.isCopy()) { 1180 unsigned DefR = UseI.getOperand(0).getReg(); 1181 if (!TargetRegisterInfo::isVirtualRegister(DefR)) 1182 return false; 1183 Pending.push_back(DefR); 1184 } else { 1185 if (!computeUsedBits(UseI, I.getOperandNo(), Used, B)) 1186 return false; 1187 } 1188 } 1189 } 1190 Bits |= Used; 1191 return true; 1192 } 1193 1194 1195 // Calculate the bits used by instruction MI in a register in operand OpN. 1196 // Return true/false if the calculation succeeds/fails. If is succeeds, set 1197 // used bits in Bits. This function does not reset any bits in Bits, so 1198 // subsequent calls over different instructions will result in the union 1199 // of the used bits in all these instructions. 1200 // The register in question may be used with a sub-register, whereas Bits 1201 // holds the bits for the entire register. To keep track of that, the 1202 // argument Begin indicates where in Bits is the lowest-significant bit 1203 // of the register used in operand OpN. For example, in instruction: 1204 // vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10 1205 // the operand 1 is a 32-bit register, which happens to be a subregister 1206 // of the 64-bit register vreg2, and that subregister starts at position 32. 1207 // In this case Begin=32, since Bits[32] would be the lowest-significant bit 1208 // of vreg2:subreg_hireg. 1209 bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, 1210 unsigned OpN, BitVector &Bits, uint16_t Begin) { 1211 unsigned Opc = MI.getOpcode(); 1212 BitVector T(Bits.size()); 1213 bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII); 1214 // Even if we don't have bits yet, we could still provide some information 1215 // if the instruction is a lossy shift: the lost bits will be marked as 1216 // not used. 1217 unsigned LB, LE; 1218 if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) { 1219 assert(MI.getOperand(OpN).isReg()); 1220 BitTracker::RegisterRef RR = MI.getOperand(OpN); 1221 const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); 1222 uint16_t Width = RC->getSize()*8; 1223 1224 if (!GotBits) 1225 T.set(Begin, Begin+Width); 1226 assert(LB <= LE && LB < Width && LE <= Width); 1227 T.reset(Begin+LB, Begin+LE); 1228 GotBits = true; 1229 } 1230 if (GotBits) 1231 Bits |= T; 1232 return GotBits; 1233 } 1234 1235 1236 // Calculates the used bits in RD ("defined register"), and checks if these 1237 // bits in RS ("used register") and RD are identical. 1238 bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD, 1239 BitTracker::RegisterRef RS) { 1240 const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); 1241 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); 1242 1243 unsigned DB, DW; 1244 if (!HBS::getSubregMask(RD, DB, DW, MRI)) 1245 return false; 1246 unsigned SB, SW; 1247 if (!HBS::getSubregMask(RS, SB, SW, MRI)) 1248 return false; 1249 if (SW != DW) 1250 return false; 1251 1252 BitVector Used(DC.width()); 1253 if (!computeUsedBits(RD.Reg, Used)) 1254 return false; 1255 1256 for (unsigned i = 0; i != DW; ++i) 1257 if (Used[i+DB] && DC[DB+i] != SC[SB+i]) 1258 return false; 1259 return true; 1260 } 1261 1262 1263 bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, 1264 const RegisterSet&) { 1265 bool Changed = false; 1266 1267 for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) { 1268 NextI = std::next(I); 1269 MachineInstr *MI = &*I; 1270 1271 if (MI->getOpcode() == TargetOpcode::COPY) 1272 continue; 1273 if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) 1274 continue; 1275 unsigned NumD = MI->getDesc().getNumDefs(); 1276 if (NumD != 1) 1277 continue; 1278 1279 BitTracker::RegisterRef RD = MI->getOperand(0); 1280 if (!BT.has(RD.Reg)) 1281 continue; 1282 const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); 1283 auto At = MI->isPHI() ? B.getFirstNonPHI() 1284 : MachineBasicBlock::iterator(MI); 1285 1286 // Find a source operand that is equal to the result. 1287 for (auto &Op : MI->uses()) { 1288 if (!Op.isReg()) 1289 continue; 1290 BitTracker::RegisterRef RS = Op; 1291 if (!BT.has(RS.Reg)) 1292 continue; 1293 if (!HBS::isTransparentCopy(RD, RS, MRI)) 1294 continue; 1295 1296 unsigned BN, BW; 1297 if (!HBS::getSubregMask(RS, BN, BW, MRI)) 1298 continue; 1299 1300 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); 1301 if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW)) 1302 continue; 1303 1304 // If found, replace the instruction with a COPY. 1305 DebugLoc DL = MI->getDebugLoc(); 1306 const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); 1307 unsigned NewR = MRI.createVirtualRegister(FRC); 1308 BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) 1309 .addReg(RS.Reg, 0, RS.Sub); 1310 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 1311 BT.put(BitTracker::RegisterRef(NewR), SC); 1312 Changed = true; 1313 break; 1314 } 1315 } 1316 1317 return Changed; 1318 } 1319 1320 1321 // 1322 // Const generation 1323 // 1324 // Recognize instructions that produce constant values known at compile-time. 1325 // Replace them with register definitions that load these constants directly. 1326 namespace { 1327 class ConstGeneration : public Transformation { 1328 public: 1329 ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii, 1330 MachineRegisterInfo &mri) 1331 : Transformation(true), HII(hii), MRI(mri), BT(bt) {} 1332 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1333 private: 1334 bool isTfrConst(const MachineInstr *MI) const; 1335 bool isConst(unsigned R, int64_t &V) const; 1336 unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C, 1337 MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL); 1338 1339 const HexagonInstrInfo &HII; 1340 MachineRegisterInfo &MRI; 1341 BitTracker &BT; 1342 }; 1343 } 1344 1345 bool ConstGeneration::isConst(unsigned R, int64_t &C) const { 1346 if (!BT.has(R)) 1347 return false; 1348 const BitTracker::RegisterCell &RC = BT.lookup(R); 1349 int64_t T = 0; 1350 for (unsigned i = RC.width(); i > 0; --i) { 1351 const BitTracker::BitValue &V = RC[i-1]; 1352 T <<= 1; 1353 if (V.is(1)) 1354 T |= 1; 1355 else if (!V.is(0)) 1356 return false; 1357 } 1358 C = T; 1359 return true; 1360 } 1361 1362 1363 bool ConstGeneration::isTfrConst(const MachineInstr *MI) const { 1364 unsigned Opc = MI->getOpcode(); 1365 switch (Opc) { 1366 case Hexagon::A2_combineii: 1367 case Hexagon::A4_combineii: 1368 case Hexagon::A2_tfrsi: 1369 case Hexagon::A2_tfrpi: 1370 case Hexagon::TFR_PdTrue: 1371 case Hexagon::TFR_PdFalse: 1372 case Hexagon::CONST32_Int_Real: 1373 case Hexagon::CONST64_Int_Real: 1374 return true; 1375 } 1376 return false; 1377 } 1378 1379 1380 // Generate a transfer-immediate instruction that is appropriate for the 1381 // register class and the actual value being transferred. 1382 unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, 1383 MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) { 1384 unsigned Reg = MRI.createVirtualRegister(RC); 1385 if (RC == &Hexagon::IntRegsRegClass) { 1386 BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg) 1387 .addImm(int32_t(C)); 1388 return Reg; 1389 } 1390 1391 if (RC == &Hexagon::DoubleRegsRegClass) { 1392 if (isInt<8>(C)) { 1393 BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg) 1394 .addImm(C); 1395 return Reg; 1396 } 1397 1398 unsigned Lo = Lo_32(C), Hi = Hi_32(C); 1399 if (isInt<8>(Lo) || isInt<8>(Hi)) { 1400 unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii 1401 : Hexagon::A4_combineii; 1402 BuildMI(B, At, DL, HII.get(Opc), Reg) 1403 .addImm(int32_t(Hi)) 1404 .addImm(int32_t(Lo)); 1405 return Reg; 1406 } 1407 1408 BuildMI(B, At, DL, HII.get(Hexagon::CONST64_Int_Real), Reg) 1409 .addImm(C); 1410 return Reg; 1411 } 1412 1413 if (RC == &Hexagon::PredRegsRegClass) { 1414 unsigned Opc; 1415 if (C == 0) 1416 Opc = Hexagon::TFR_PdFalse; 1417 else if ((C & 0xFF) == 0xFF) 1418 Opc = Hexagon::TFR_PdTrue; 1419 else 1420 return 0; 1421 BuildMI(B, At, DL, HII.get(Opc), Reg); 1422 return Reg; 1423 } 1424 1425 return 0; 1426 } 1427 1428 1429 bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { 1430 bool Changed = false; 1431 RegisterSet Defs; 1432 1433 for (auto I = B.begin(), E = B.end(); I != E; ++I) { 1434 if (isTfrConst(I)) 1435 continue; 1436 Defs.clear(); 1437 HBS::getInstrDefs(*I, Defs); 1438 if (Defs.count() != 1) 1439 continue; 1440 unsigned DR = Defs.find_first(); 1441 if (!TargetRegisterInfo::isVirtualRegister(DR)) 1442 continue; 1443 int64_t C; 1444 if (isConst(DR, C)) { 1445 DebugLoc DL = I->getDebugLoc(); 1446 auto At = I->isPHI() ? B.getFirstNonPHI() : I; 1447 unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL); 1448 if (ImmReg) { 1449 HBS::replaceReg(DR, ImmReg, MRI); 1450 BT.put(ImmReg, BT.lookup(DR)); 1451 Changed = true; 1452 } 1453 } 1454 } 1455 return Changed; 1456 } 1457 1458 1459 // 1460 // Copy generation 1461 // 1462 // Identify pairs of available registers which hold identical values. 1463 // In such cases, only one of them needs to be calculated, the other one 1464 // will be defined as a copy of the first. 1465 // 1466 // Copy propagation 1467 // 1468 // Eliminate register copies RD = RS, by replacing the uses of RD with 1469 // with uses of RS. 1470 namespace { 1471 class CopyGeneration : public Transformation { 1472 public: 1473 CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii, 1474 MachineRegisterInfo &mri) 1475 : Transformation(true), HII(hii), MRI(mri), BT(bt) {} 1476 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1477 private: 1478 bool findMatch(const BitTracker::RegisterRef &Inp, 1479 BitTracker::RegisterRef &Out, const RegisterSet &AVs); 1480 1481 const HexagonInstrInfo &HII; 1482 MachineRegisterInfo &MRI; 1483 BitTracker &BT; 1484 }; 1485 1486 class CopyPropagation : public Transformation { 1487 public: 1488 CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) 1489 : Transformation(false), MRI(mri) {} 1490 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1491 static bool isCopyReg(unsigned Opc); 1492 private: 1493 bool propagateRegCopy(MachineInstr &MI); 1494 1495 MachineRegisterInfo &MRI; 1496 }; 1497 1498 } 1499 1500 1501 /// Check if there is a register in AVs that is identical to Inp. If so, 1502 /// set Out to the found register. The output may be a pair Reg:Sub. 1503 bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, 1504 BitTracker::RegisterRef &Out, const RegisterSet &AVs) { 1505 if (!BT.has(Inp.Reg)) 1506 return false; 1507 const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg); 1508 unsigned B, W; 1509 if (!HBS::getSubregMask(Inp, B, W, MRI)) 1510 return false; 1511 1512 for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) { 1513 if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI)) 1514 continue; 1515 const BitTracker::RegisterCell &RC = BT.lookup(R); 1516 unsigned RW = RC.width(); 1517 if (W == RW) { 1518 if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R)) 1519 continue; 1520 if (!HBS::isEqual(InpRC, B, RC, 0, W)) 1521 continue; 1522 Out.Reg = R; 1523 Out.Sub = 0; 1524 return true; 1525 } 1526 // Check if there is a super-register, whose part (with a subregister) 1527 // is equal to the input. 1528 // Only do double registers for now. 1529 if (W*2 != RW) 1530 continue; 1531 if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass) 1532 continue; 1533 1534 if (HBS::isEqual(InpRC, B, RC, 0, W)) 1535 Out.Sub = Hexagon::subreg_loreg; 1536 else if (HBS::isEqual(InpRC, B, RC, W, W)) 1537 Out.Sub = Hexagon::subreg_hireg; 1538 else 1539 continue; 1540 Out.Reg = R; 1541 return true; 1542 } 1543 return false; 1544 } 1545 1546 1547 bool CopyGeneration::processBlock(MachineBasicBlock &B, 1548 const RegisterSet &AVs) { 1549 RegisterSet AVB(AVs); 1550 bool Changed = false; 1551 RegisterSet Defs; 1552 1553 for (auto I = B.begin(), E = B.end(), NextI = I; I != E; 1554 ++I, AVB.insert(Defs)) { 1555 NextI = std::next(I); 1556 Defs.clear(); 1557 HBS::getInstrDefs(*I, Defs); 1558 1559 unsigned Opc = I->getOpcode(); 1560 if (CopyPropagation::isCopyReg(Opc)) 1561 continue; 1562 1563 for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) { 1564 BitTracker::RegisterRef MR; 1565 if (!findMatch(R, MR, AVB)) 1566 continue; 1567 DebugLoc DL = I->getDebugLoc(); 1568 auto *FRC = HBS::getFinalVRegClass(MR, MRI); 1569 unsigned NewR = MRI.createVirtualRegister(FRC); 1570 auto At = I->isPHI() ? B.getFirstNonPHI() : I; 1571 BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) 1572 .addReg(MR.Reg, 0, MR.Sub); 1573 BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); 1574 } 1575 } 1576 1577 return Changed; 1578 } 1579 1580 1581 bool CopyPropagation::isCopyReg(unsigned Opc) { 1582 switch (Opc) { 1583 case TargetOpcode::COPY: 1584 case TargetOpcode::REG_SEQUENCE: 1585 case Hexagon::A2_tfr: 1586 case Hexagon::A2_tfrp: 1587 case Hexagon::A2_combinew: 1588 case Hexagon::A4_combineir: 1589 case Hexagon::A4_combineri: 1590 return true; 1591 default: 1592 break; 1593 } 1594 return false; 1595 } 1596 1597 1598 bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { 1599 bool Changed = false; 1600 unsigned Opc = MI.getOpcode(); 1601 BitTracker::RegisterRef RD = MI.getOperand(0); 1602 assert(MI.getOperand(0).getSubReg() == 0); 1603 1604 switch (Opc) { 1605 case TargetOpcode::COPY: 1606 case Hexagon::A2_tfr: 1607 case Hexagon::A2_tfrp: { 1608 BitTracker::RegisterRef RS = MI.getOperand(1); 1609 if (!HBS::isTransparentCopy(RD, RS, MRI)) 1610 break; 1611 if (RS.Sub != 0) 1612 Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI); 1613 else 1614 Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI); 1615 break; 1616 } 1617 case TargetOpcode::REG_SEQUENCE: { 1618 BitTracker::RegisterRef SL, SH; 1619 if (HBS::parseRegSequence(MI, SL, SH)) { 1620 Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg, 1621 SL.Reg, SL.Sub, MRI); 1622 Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg, 1623 SH.Reg, SH.Sub, MRI); 1624 } 1625 break; 1626 } 1627 case Hexagon::A2_combinew: { 1628 BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2); 1629 Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg, 1630 RL.Reg, RL.Sub, MRI); 1631 Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg, 1632 RH.Reg, RH.Sub, MRI); 1633 break; 1634 } 1635 case Hexagon::A4_combineir: 1636 case Hexagon::A4_combineri: { 1637 unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1; 1638 unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg 1639 : Hexagon::subreg_hireg; 1640 BitTracker::RegisterRef RS = MI.getOperand(SrcX); 1641 Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI); 1642 break; 1643 } 1644 } 1645 return Changed; 1646 } 1647 1648 1649 bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { 1650 std::vector<MachineInstr*> Instrs; 1651 for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) 1652 Instrs.push_back(&*I); 1653 1654 bool Changed = false; 1655 for (auto I : Instrs) { 1656 unsigned Opc = I->getOpcode(); 1657 if (!CopyPropagation::isCopyReg(Opc)) 1658 continue; 1659 Changed |= propagateRegCopy(*I); 1660 } 1661 1662 return Changed; 1663 } 1664 1665 1666 // 1667 // Bit simplification 1668 // 1669 // Recognize patterns that can be simplified and replace them with the 1670 // simpler forms. 1671 // This is by no means complete 1672 namespace { 1673 class BitSimplification : public Transformation { 1674 public: 1675 BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii, 1676 MachineRegisterInfo &mri) 1677 : Transformation(true), HII(hii), MRI(mri), BT(bt) {} 1678 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; 1679 private: 1680 struct RegHalf : public BitTracker::RegisterRef { 1681 bool Low; // Low/High halfword. 1682 }; 1683 1684 bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC, 1685 unsigned B, RegHalf &RH); 1686 1687 bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC, 1688 BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt); 1689 unsigned getCombineOpcode(bool HLow, bool LLow); 1690 1691 bool genStoreUpperHalf(MachineInstr *MI); 1692 bool genStoreImmediate(MachineInstr *MI); 1693 bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD, 1694 const BitTracker::RegisterCell &RC); 1695 bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD, 1696 const BitTracker::RegisterCell &RC); 1697 bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD, 1698 const BitTracker::RegisterCell &RC); 1699 bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, 1700 const BitTracker::RegisterCell &RC); 1701 bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, 1702 const BitTracker::RegisterCell &RC); 1703 1704 const HexagonInstrInfo &HII; 1705 MachineRegisterInfo &MRI; 1706 BitTracker &BT; 1707 }; 1708 } 1709 1710 1711 // Check if the bits [B..B+16) in register cell RC form a valid halfword, 1712 // i.e. [0..16), [16..32), etc. of some register. If so, return true and 1713 // set the information about the found register in RH. 1714 bool BitSimplification::matchHalf(unsigned SelfR, 1715 const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) { 1716 // XXX This could be searching in the set of available registers, in case 1717 // the match is not exact. 1718 1719 // Match 16-bit chunks, where the RC[B..B+15] references exactly one 1720 // register and all the bits B..B+15 match between RC and the register. 1721 // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... }, 1722 // and RC = { [0]:0 [1-15]:v1[1-15]... }. 1723 bool Low = false; 1724 unsigned I = B; 1725 while (I < B+16 && RC[I].num()) 1726 I++; 1727 if (I == B+16) 1728 return false; 1729 1730 unsigned Reg = RC[I].RefI.Reg; 1731 unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B. 1732 if (P < I-B) 1733 return false; 1734 unsigned Pos = P - (I-B); 1735 1736 if (Reg == 0 || Reg == SelfR) // Don't match "self". 1737 return false; 1738 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1739 return false; 1740 if (!BT.has(Reg)) 1741 return false; 1742 1743 const BitTracker::RegisterCell &SC = BT.lookup(Reg); 1744 if (Pos+16 > SC.width()) 1745 return false; 1746 1747 for (unsigned i = 0; i < 16; ++i) { 1748 const BitTracker::BitValue &RV = RC[i+B]; 1749 if (RV.Type == BitTracker::BitValue::Ref) { 1750 if (RV.RefI.Reg != Reg) 1751 return false; 1752 if (RV.RefI.Pos != i+Pos) 1753 return false; 1754 continue; 1755 } 1756 if (RC[i+B] != SC[i+Pos]) 1757 return false; 1758 } 1759 1760 unsigned Sub = 0; 1761 switch (Pos) { 1762 case 0: 1763 Sub = Hexagon::subreg_loreg; 1764 Low = true; 1765 break; 1766 case 16: 1767 Sub = Hexagon::subreg_loreg; 1768 Low = false; 1769 break; 1770 case 32: 1771 Sub = Hexagon::subreg_hireg; 1772 Low = true; 1773 break; 1774 case 48: 1775 Sub = Hexagon::subreg_hireg; 1776 Low = false; 1777 break; 1778 default: 1779 return false; 1780 } 1781 1782 RH.Reg = Reg; 1783 RH.Sub = Sub; 1784 RH.Low = Low; 1785 // If the subregister is not valid with the register, set it to 0. 1786 if (!HBS::getFinalVRegClass(RH, MRI)) 1787 RH.Sub = 0; 1788 1789 return true; 1790 } 1791 1792 1793 // Check if RC matches the pattern of a S2_packhl. If so, return true and 1794 // set the inputs Rs and Rt. 1795 bool BitSimplification::matchPackhl(unsigned SelfR, 1796 const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs, 1797 BitTracker::RegisterRef &Rt) { 1798 RegHalf L1, H1, L2, H2; 1799 1800 if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1)) 1801 return false; 1802 if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1)) 1803 return false; 1804 1805 // Rs = H1.L1, Rt = H2.L2 1806 if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low) 1807 return false; 1808 if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low) 1809 return false; 1810 1811 Rs = H1; 1812 Rt = H2; 1813 return true; 1814 } 1815 1816 1817 unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) { 1818 return HLow ? LLow ? Hexagon::A2_combine_ll 1819 : Hexagon::A2_combine_lh 1820 : LLow ? Hexagon::A2_combine_hl 1821 : Hexagon::A2_combine_hh; 1822 } 1823 1824 1825 // If MI stores the upper halfword of a register (potentially obtained via 1826 // shifts or extracts), replace it with a storerf instruction. This could 1827 // cause the "extraction" code to become dead. 1828 bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) { 1829 unsigned Opc = MI->getOpcode(); 1830 if (Opc != Hexagon::S2_storerh_io) 1831 return false; 1832 1833 MachineOperand &ValOp = MI->getOperand(2); 1834 BitTracker::RegisterRef RS = ValOp; 1835 if (!BT.has(RS.Reg)) 1836 return false; 1837 const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); 1838 RegHalf H; 1839 if (!matchHalf(0, RC, 0, H)) 1840 return false; 1841 if (H.Low) 1842 return false; 1843 MI->setDesc(HII.get(Hexagon::S2_storerf_io)); 1844 ValOp.setReg(H.Reg); 1845 ValOp.setSubReg(H.Sub); 1846 return true; 1847 } 1848 1849 1850 // If MI stores a value known at compile-time, and the value is within a range 1851 // that avoids using constant-extenders, replace it with a store-immediate. 1852 bool BitSimplification::genStoreImmediate(MachineInstr *MI) { 1853 unsigned Opc = MI->getOpcode(); 1854 unsigned Align = 0; 1855 switch (Opc) { 1856 case Hexagon::S2_storeri_io: 1857 Align++; 1858 case Hexagon::S2_storerh_io: 1859 Align++; 1860 case Hexagon::S2_storerb_io: 1861 break; 1862 default: 1863 return false; 1864 } 1865 1866 // Avoid stores to frame-indices (due to an unknown offset). 1867 if (!MI->getOperand(0).isReg()) 1868 return false; 1869 MachineOperand &OffOp = MI->getOperand(1); 1870 if (!OffOp.isImm()) 1871 return false; 1872 1873 int64_t Off = OffOp.getImm(); 1874 // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x). 1875 if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1))) 1876 return false; 1877 // Source register: 1878 BitTracker::RegisterRef RS = MI->getOperand(2); 1879 if (!BT.has(RS.Reg)) 1880 return false; 1881 const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); 1882 uint64_t U; 1883 if (!HBS::getConst(RC, 0, RC.width(), U)) 1884 return false; 1885 1886 // Only consider 8-bit values to avoid constant-extenders. 1887 int V; 1888 switch (Opc) { 1889 case Hexagon::S2_storerb_io: 1890 V = int8_t(U); 1891 break; 1892 case Hexagon::S2_storerh_io: 1893 V = int16_t(U); 1894 break; 1895 case Hexagon::S2_storeri_io: 1896 V = int32_t(U); 1897 break; 1898 } 1899 if (!isInt<8>(V)) 1900 return false; 1901 1902 MI->RemoveOperand(2); 1903 switch (Opc) { 1904 case Hexagon::S2_storerb_io: 1905 MI->setDesc(HII.get(Hexagon::S4_storeirb_io)); 1906 break; 1907 case Hexagon::S2_storerh_io: 1908 MI->setDesc(HII.get(Hexagon::S4_storeirh_io)); 1909 break; 1910 case Hexagon::S2_storeri_io: 1911 MI->setDesc(HII.get(Hexagon::S4_storeiri_io)); 1912 break; 1913 } 1914 MI->addOperand(MachineOperand::CreateImm(V)); 1915 return true; 1916 } 1917 1918 1919 // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the 1920 // last instruction in a sequence that results in something equivalent to 1921 // the pack-halfwords. The intent is to cause the entire sequence to become 1922 // dead. 1923 bool BitSimplification::genPackhl(MachineInstr *MI, 1924 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 1925 unsigned Opc = MI->getOpcode(); 1926 if (Opc == Hexagon::S2_packhl) 1927 return false; 1928 BitTracker::RegisterRef Rs, Rt; 1929 if (!matchPackhl(RD.Reg, RC, Rs, Rt)) 1930 return false; 1931 1932 MachineBasicBlock &B = *MI->getParent(); 1933 unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); 1934 DebugLoc DL = MI->getDebugLoc(); 1935 auto At = MI->isPHI() ? B.getFirstNonPHI() 1936 : MachineBasicBlock::iterator(MI); 1937 BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR) 1938 .addReg(Rs.Reg, 0, Rs.Sub) 1939 .addReg(Rt.Reg, 0, Rt.Sub); 1940 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 1941 BT.put(BitTracker::RegisterRef(NewR), RC); 1942 return true; 1943 } 1944 1945 1946 // If MI produces halfword of the input in the low half of the output, 1947 // replace it with zero-extend or extractu. 1948 bool BitSimplification::genExtractHalf(MachineInstr *MI, 1949 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 1950 RegHalf L; 1951 // Check for halfword in low 16 bits, zeros elsewhere. 1952 if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16)) 1953 return false; 1954 1955 unsigned Opc = MI->getOpcode(); 1956 MachineBasicBlock &B = *MI->getParent(); 1957 DebugLoc DL = MI->getDebugLoc(); 1958 1959 // Prefer zxth, since zxth can go in any slot, while extractu only in 1960 // slots 2 and 3. 1961 unsigned NewR = 0; 1962 auto At = MI->isPHI() ? B.getFirstNonPHI() 1963 : MachineBasicBlock::iterator(MI); 1964 if (L.Low && Opc != Hexagon::A2_zxth) { 1965 NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 1966 BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR) 1967 .addReg(L.Reg, 0, L.Sub); 1968 } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) { 1969 NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 1970 BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR) 1971 .addReg(L.Reg, 0, L.Sub) 1972 .addImm(16); 1973 } 1974 if (NewR == 0) 1975 return false; 1976 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 1977 BT.put(BitTracker::RegisterRef(NewR), RC); 1978 return true; 1979 } 1980 1981 1982 // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the 1983 // combine. 1984 bool BitSimplification::genCombineHalf(MachineInstr *MI, 1985 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 1986 RegHalf L, H; 1987 // Check for combine h/l 1988 if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H)) 1989 return false; 1990 // Do nothing if this is just a reg copy. 1991 if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low) 1992 return false; 1993 1994 unsigned Opc = MI->getOpcode(); 1995 unsigned COpc = getCombineOpcode(H.Low, L.Low); 1996 if (COpc == Opc) 1997 return false; 1998 1999 MachineBasicBlock &B = *MI->getParent(); 2000 DebugLoc DL = MI->getDebugLoc(); 2001 unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 2002 auto At = MI->isPHI() ? B.getFirstNonPHI() 2003 : MachineBasicBlock::iterator(MI); 2004 BuildMI(B, At, DL, HII.get(COpc), NewR) 2005 .addReg(H.Reg, 0, H.Sub) 2006 .addReg(L.Reg, 0, L.Sub); 2007 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 2008 BT.put(BitTracker::RegisterRef(NewR), RC); 2009 return true; 2010 } 2011 2012 2013 // If MI resets high bits of a register and keeps the lower ones, replace it 2014 // with zero-extend byte/half, and-immediate, or extractu, as appropriate. 2015 bool BitSimplification::genExtractLow(MachineInstr *MI, 2016 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 2017 unsigned Opc = MI->getOpcode(); 2018 switch (Opc) { 2019 case Hexagon::A2_zxtb: 2020 case Hexagon::A2_zxth: 2021 case Hexagon::S2_extractu: 2022 return false; 2023 } 2024 if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) { 2025 int32_t Imm = MI->getOperand(2).getImm(); 2026 if (isInt<10>(Imm)) 2027 return false; 2028 } 2029 2030 if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) 2031 return false; 2032 unsigned W = RC.width(); 2033 while (W > 0 && RC[W-1].is(0)) 2034 W--; 2035 if (W == 0 || W == RC.width()) 2036 return false; 2037 unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb 2038 : (W == 16) ? Hexagon::A2_zxth 2039 : (W < 10) ? Hexagon::A2_andir 2040 : Hexagon::S2_extractu; 2041 MachineBasicBlock &B = *MI->getParent(); 2042 DebugLoc DL = MI->getDebugLoc(); 2043 2044 for (auto &Op : MI->uses()) { 2045 if (!Op.isReg()) 2046 continue; 2047 BitTracker::RegisterRef RS = Op; 2048 if (!BT.has(RS.Reg)) 2049 continue; 2050 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); 2051 unsigned BN, BW; 2052 if (!HBS::getSubregMask(RS, BN, BW, MRI)) 2053 continue; 2054 if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W)) 2055 continue; 2056 2057 unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 2058 auto At = MI->isPHI() ? B.getFirstNonPHI() 2059 : MachineBasicBlock::iterator(MI); 2060 auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR) 2061 .addReg(RS.Reg, 0, RS.Sub); 2062 if (NewOpc == Hexagon::A2_andir) 2063 MIB.addImm((1 << W) - 1); 2064 else if (NewOpc == Hexagon::S2_extractu) 2065 MIB.addImm(W).addImm(0); 2066 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); 2067 BT.put(BitTracker::RegisterRef(NewR), RC); 2068 return true; 2069 } 2070 return false; 2071 } 2072 2073 2074 // Check for tstbit simplification opportunity, where the bit being checked 2075 // can be tracked back to another register. For example: 2076 // vreg2 = S2_lsr_i_r vreg1, 5 2077 // vreg3 = S2_tstbit_i vreg2, 0 2078 // => 2079 // vreg3 = S2_tstbit_i vreg1, 5 2080 bool BitSimplification::simplifyTstbit(MachineInstr *MI, 2081 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { 2082 unsigned Opc = MI->getOpcode(); 2083 if (Opc != Hexagon::S2_tstbit_i) 2084 return false; 2085 2086 unsigned BN = MI->getOperand(2).getImm(); 2087 BitTracker::RegisterRef RS = MI->getOperand(1); 2088 unsigned F, W; 2089 DebugLoc DL = MI->getDebugLoc(); 2090 if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI)) 2091 return false; 2092 MachineBasicBlock &B = *MI->getParent(); 2093 auto At = MI->isPHI() ? B.getFirstNonPHI() 2094 : MachineBasicBlock::iterator(MI); 2095 2096 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); 2097 const BitTracker::BitValue &V = SC[F+BN]; 2098 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) { 2099 const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg); 2100 // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is 2101 // a double register, need to use a subregister and adjust bit 2102 // number. 2103 unsigned P = UINT_MAX; 2104 BitTracker::RegisterRef RR(V.RefI.Reg, 0); 2105 if (TC == &Hexagon::DoubleRegsRegClass) { 2106 P = V.RefI.Pos; 2107 RR.Sub = Hexagon::subreg_loreg; 2108 if (P >= 32) { 2109 P -= 32; 2110 RR.Sub = Hexagon::subreg_hireg; 2111 } 2112 } else if (TC == &Hexagon::IntRegsRegClass) { 2113 P = V.RefI.Pos; 2114 } 2115 if (P != UINT_MAX) { 2116 unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); 2117 BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR) 2118 .addReg(RR.Reg, 0, RR.Sub) 2119 .addImm(P); 2120 HBS::replaceReg(RD.Reg, NewR, MRI); 2121 BT.put(NewR, RC); 2122 return true; 2123 } 2124 } else if (V.is(0) || V.is(1)) { 2125 unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); 2126 unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue; 2127 BuildMI(B, At, DL, HII.get(NewOpc), NewR); 2128 HBS::replaceReg(RD.Reg, NewR, MRI); 2129 return true; 2130 } 2131 2132 return false; 2133 } 2134 2135 2136 bool BitSimplification::processBlock(MachineBasicBlock &B, 2137 const RegisterSet &AVs) { 2138 bool Changed = false; 2139 RegisterSet AVB = AVs; 2140 RegisterSet Defs; 2141 2142 for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) { 2143 MachineInstr *MI = &*I; 2144 Defs.clear(); 2145 HBS::getInstrDefs(*MI, Defs); 2146 2147 unsigned Opc = MI->getOpcode(); 2148 if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE) 2149 continue; 2150 2151 if (MI->mayStore()) { 2152 bool T = genStoreUpperHalf(MI); 2153 T = T || genStoreImmediate(MI); 2154 Changed |= T; 2155 continue; 2156 } 2157 2158 if (Defs.count() != 1) 2159 continue; 2160 const MachineOperand &Op0 = MI->getOperand(0); 2161 if (!Op0.isReg() || !Op0.isDef()) 2162 continue; 2163 BitTracker::RegisterRef RD = Op0; 2164 if (!BT.has(RD.Reg)) 2165 continue; 2166 const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); 2167 const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg); 2168 2169 if (FRC->getID() == Hexagon::DoubleRegsRegClassID) { 2170 bool T = genPackhl(MI, RD, RC); 2171 Changed |= T; 2172 continue; 2173 } 2174 2175 if (FRC->getID() == Hexagon::IntRegsRegClassID) { 2176 bool T = genExtractHalf(MI, RD, RC); 2177 T = T || genCombineHalf(MI, RD, RC); 2178 T = T || genExtractLow(MI, RD, RC); 2179 Changed |= T; 2180 continue; 2181 } 2182 2183 if (FRC->getID() == Hexagon::PredRegsRegClassID) { 2184 bool T = simplifyTstbit(MI, RD, RC); 2185 Changed |= T; 2186 continue; 2187 } 2188 } 2189 return Changed; 2190 } 2191 2192 2193 bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { 2194 auto &HST = MF.getSubtarget<HexagonSubtarget>(); 2195 auto &HRI = *HST.getRegisterInfo(); 2196 auto &HII = *HST.getInstrInfo(); 2197 2198 MDT = &getAnalysis<MachineDominatorTree>(); 2199 MachineRegisterInfo &MRI = MF.getRegInfo(); 2200 bool Changed; 2201 2202 Changed = DeadCodeElimination(MF, *MDT).run(); 2203 2204 const HexagonEvaluator HE(HRI, MRI, HII, MF); 2205 BitTracker BT(HE, MF); 2206 DEBUG(BT.trace(true)); 2207 BT.run(); 2208 2209 MachineBasicBlock &Entry = MF.front(); 2210 2211 RegisterSet AIG; // Available registers for IG. 2212 ConstGeneration ImmG(BT, HII, MRI); 2213 Changed |= visitBlock(Entry, ImmG, AIG); 2214 2215 RegisterSet ARE; // Available registers for RIE. 2216 RedundantInstrElimination RIE(BT, HII, MRI); 2217 Changed |= visitBlock(Entry, RIE, ARE); 2218 2219 RegisterSet ACG; // Available registers for CG. 2220 CopyGeneration CopyG(BT, HII, MRI); 2221 Changed |= visitBlock(Entry, CopyG, ACG); 2222 2223 RegisterSet ACP; // Available registers for CP. 2224 CopyPropagation CopyP(HRI, MRI); 2225 Changed |= visitBlock(Entry, CopyP, ACP); 2226 2227 Changed = DeadCodeElimination(MF, *MDT).run() || Changed; 2228 2229 BT.run(); 2230 RegisterSet ABS; // Available registers for BS. 2231 BitSimplification BitS(BT, HII, MRI); 2232 Changed |= visitBlock(Entry, BitS, ABS); 2233 2234 Changed = DeadCodeElimination(MF, *MDT).run() || Changed; 2235 2236 if (Changed) { 2237 for (auto &B : MF) 2238 for (auto &I : B) 2239 I.clearKillInfo(); 2240 DeadCodeElimination(MF, *MDT).run(); 2241 } 2242 return Changed; 2243 } 2244 2245 2246 // Recognize loops where the code at the end of the loop matches the code 2247 // before the entry of the loop, and the matching code is such that is can 2248 // be simplified. This pass relies on the bit simplification above and only 2249 // prepares code in a way that can be handled by the bit simplifcation. 2250 // 2251 // This is the motivating testcase (and explanation): 2252 // 2253 // { 2254 // loop0(.LBB0_2, r1) // %for.body.preheader 2255 // r5:4 = memd(r0++#8) 2256 // } 2257 // { 2258 // r3 = lsr(r4, #16) 2259 // r7:6 = combine(r5, r5) 2260 // } 2261 // { 2262 // r3 = insert(r5, #16, #16) 2263 // r7:6 = vlsrw(r7:6, #16) 2264 // } 2265 // .LBB0_2: 2266 // { 2267 // memh(r2+#4) = r5 2268 // memh(r2+#6) = r6 # R6 is really R5.H 2269 // } 2270 // { 2271 // r2 = add(r2, #8) 2272 // memh(r2+#0) = r4 2273 // memh(r2+#2) = r3 # R3 is really R4.H 2274 // } 2275 // { 2276 // r5:4 = memd(r0++#8) 2277 // } 2278 // { # "Shuffling" code that sets up R3 and R6 2279 // r3 = lsr(r4, #16) # so that their halves can be stored in the 2280 // r7:6 = combine(r5, r5) # next iteration. This could be folded into 2281 // } # the stores if the code was at the beginning 2282 // { # of the loop iteration. Since the same code 2283 // r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved 2284 // r7:6 = vlsrw(r7:6, #16) # there. 2285 // }:endloop0 2286 // 2287 // 2288 // The outcome: 2289 // 2290 // { 2291 // loop0(.LBB0_2, r1) 2292 // r5:4 = memd(r0++#8) 2293 // } 2294 // .LBB0_2: 2295 // { 2296 // memh(r2+#4) = r5 2297 // memh(r2+#6) = r5.h 2298 // } 2299 // { 2300 // r2 = add(r2, #8) 2301 // memh(r2+#0) = r4 2302 // memh(r2+#2) = r4.h 2303 // } 2304 // { 2305 // r5:4 = memd(r0++#8) 2306 // }:endloop0 2307 2308 namespace llvm { 2309 FunctionPass *createHexagonLoopRescheduling(); 2310 void initializeHexagonLoopReschedulingPass(PassRegistry&); 2311 } 2312 2313 namespace { 2314 class HexagonLoopRescheduling : public MachineFunctionPass { 2315 public: 2316 static char ID; 2317 HexagonLoopRescheduling() : MachineFunctionPass(ID), 2318 HII(0), HRI(0), MRI(0), BTP(0) { 2319 initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry()); 2320 } 2321 2322 bool runOnMachineFunction(MachineFunction &MF) override; 2323 2324 private: 2325 const HexagonInstrInfo *HII; 2326 const HexagonRegisterInfo *HRI; 2327 MachineRegisterInfo *MRI; 2328 BitTracker *BTP; 2329 2330 struct LoopCand { 2331 LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb, 2332 MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {} 2333 MachineBasicBlock *LB, *PB, *EB; 2334 }; 2335 typedef std::vector<MachineInstr*> InstrList; 2336 struct InstrGroup { 2337 BitTracker::RegisterRef Inp, Out; 2338 InstrList Ins; 2339 }; 2340 struct PhiInfo { 2341 PhiInfo(MachineInstr &P, MachineBasicBlock &B); 2342 unsigned DefR; 2343 BitTracker::RegisterRef LR, PR; 2344 MachineBasicBlock *LB, *PB; 2345 }; 2346 2347 static unsigned getDefReg(const MachineInstr *MI); 2348 bool isConst(unsigned Reg) const; 2349 bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const; 2350 bool isStoreInput(const MachineInstr *MI, unsigned DefR) const; 2351 bool isShuffleOf(unsigned OutR, unsigned InpR) const; 2352 bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2, 2353 unsigned &InpR2) const; 2354 void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB, 2355 MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR); 2356 bool processLoop(LoopCand &C); 2357 }; 2358 } 2359 2360 char HexagonLoopRescheduling::ID = 0; 2361 2362 INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched", 2363 "Hexagon Loop Rescheduling", false, false) 2364 2365 2366 HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P, 2367 MachineBasicBlock &B) { 2368 DefR = HexagonLoopRescheduling::getDefReg(&P); 2369 LB = &B; 2370 PB = nullptr; 2371 for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) { 2372 const MachineOperand &OpB = P.getOperand(i+1); 2373 if (OpB.getMBB() == &B) { 2374 LR = P.getOperand(i); 2375 continue; 2376 } 2377 PB = OpB.getMBB(); 2378 PR = P.getOperand(i); 2379 } 2380 } 2381 2382 2383 unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) { 2384 RegisterSet Defs; 2385 HBS::getInstrDefs(*MI, Defs); 2386 if (Defs.count() != 1) 2387 return 0; 2388 return Defs.find_first(); 2389 } 2390 2391 2392 bool HexagonLoopRescheduling::isConst(unsigned Reg) const { 2393 if (!BTP->has(Reg)) 2394 return false; 2395 const BitTracker::RegisterCell &RC = BTP->lookup(Reg); 2396 for (unsigned i = 0, w = RC.width(); i < w; ++i) { 2397 const BitTracker::BitValue &V = RC[i]; 2398 if (!V.is(0) && !V.is(1)) 2399 return false; 2400 } 2401 return true; 2402 } 2403 2404 2405 bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI, 2406 unsigned DefR) const { 2407 unsigned Opc = MI->getOpcode(); 2408 switch (Opc) { 2409 case TargetOpcode::COPY: 2410 case Hexagon::S2_lsr_i_r: 2411 case Hexagon::S2_asr_i_r: 2412 case Hexagon::S2_asl_i_r: 2413 case Hexagon::S2_lsr_i_p: 2414 case Hexagon::S2_asr_i_p: 2415 case Hexagon::S2_asl_i_p: 2416 case Hexagon::S2_insert: 2417 case Hexagon::A2_or: 2418 case Hexagon::A2_orp: 2419 case Hexagon::A2_and: 2420 case Hexagon::A2_andp: 2421 case Hexagon::A2_combinew: 2422 case Hexagon::A4_combineri: 2423 case Hexagon::A4_combineir: 2424 case Hexagon::A2_combineii: 2425 case Hexagon::A4_combineii: 2426 case Hexagon::A2_combine_ll: 2427 case Hexagon::A2_combine_lh: 2428 case Hexagon::A2_combine_hl: 2429 case Hexagon::A2_combine_hh: 2430 return true; 2431 } 2432 return false; 2433 } 2434 2435 2436 bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI, 2437 unsigned InpR) const { 2438 for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 2439 const MachineOperand &Op = MI->getOperand(i); 2440 if (!Op.isReg()) 2441 continue; 2442 if (Op.getReg() == InpR) 2443 return i == n-1; 2444 } 2445 return false; 2446 } 2447 2448 2449 bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const { 2450 if (!BTP->has(OutR) || !BTP->has(InpR)) 2451 return false; 2452 const BitTracker::RegisterCell &OutC = BTP->lookup(OutR); 2453 for (unsigned i = 0, w = OutC.width(); i < w; ++i) { 2454 const BitTracker::BitValue &V = OutC[i]; 2455 if (V.Type != BitTracker::BitValue::Ref) 2456 continue; 2457 if (V.RefI.Reg != InpR) 2458 return false; 2459 } 2460 return true; 2461 } 2462 2463 2464 bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1, 2465 unsigned OutR2, unsigned &InpR2) const { 2466 if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2)) 2467 return false; 2468 const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1); 2469 const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2); 2470 unsigned W = OutC1.width(); 2471 unsigned MatchR = 0; 2472 if (W != OutC2.width()) 2473 return false; 2474 for (unsigned i = 0; i < W; ++i) { 2475 const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i]; 2476 if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One) 2477 return false; 2478 if (V1.Type != BitTracker::BitValue::Ref) 2479 continue; 2480 if (V1.RefI.Pos != V2.RefI.Pos) 2481 return false; 2482 if (V1.RefI.Reg != InpR1) 2483 return false; 2484 if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2) 2485 return false; 2486 if (!MatchR) 2487 MatchR = V2.RefI.Reg; 2488 else if (V2.RefI.Reg != MatchR) 2489 return false; 2490 } 2491 InpR2 = MatchR; 2492 return true; 2493 } 2494 2495 2496 void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, 2497 MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR, 2498 unsigned NewPredR) { 2499 DenseMap<unsigned,unsigned> RegMap; 2500 2501 const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR); 2502 unsigned PhiR = MRI->createVirtualRegister(PhiRC); 2503 BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR) 2504 .addReg(NewPredR) 2505 .addMBB(&PB) 2506 .addReg(G.Inp.Reg) 2507 .addMBB(&LB); 2508 RegMap.insert(std::make_pair(G.Inp.Reg, PhiR)); 2509 2510 for (unsigned i = G.Ins.size(); i > 0; --i) { 2511 const MachineInstr *SI = G.Ins[i-1]; 2512 unsigned DR = getDefReg(SI); 2513 const TargetRegisterClass *RC = MRI->getRegClass(DR); 2514 unsigned NewDR = MRI->createVirtualRegister(RC); 2515 DebugLoc DL = SI->getDebugLoc(); 2516 2517 auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR); 2518 for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) { 2519 const MachineOperand &Op = SI->getOperand(j); 2520 if (!Op.isReg()) { 2521 MIB.addOperand(Op); 2522 continue; 2523 } 2524 if (!Op.isUse()) 2525 continue; 2526 unsigned UseR = RegMap[Op.getReg()]; 2527 MIB.addReg(UseR, 0, Op.getSubReg()); 2528 } 2529 RegMap.insert(std::make_pair(DR, NewDR)); 2530 } 2531 2532 HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI); 2533 } 2534 2535 2536 bool HexagonLoopRescheduling::processLoop(LoopCand &C) { 2537 DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n"); 2538 std::vector<PhiInfo> Phis; 2539 for (auto &I : *C.LB) { 2540 if (!I.isPHI()) 2541 break; 2542 unsigned PR = getDefReg(&I); 2543 if (isConst(PR)) 2544 continue; 2545 bool BadUse = false, GoodUse = false; 2546 for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) { 2547 MachineInstr *UseI = UI->getParent(); 2548 if (UseI->getParent() != C.LB) { 2549 BadUse = true; 2550 break; 2551 } 2552 if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR)) 2553 GoodUse = true; 2554 } 2555 if (BadUse || !GoodUse) 2556 continue; 2557 2558 Phis.push_back(PhiInfo(I, *C.LB)); 2559 } 2560 2561 DEBUG({ 2562 dbgs() << "Phis: {"; 2563 for (auto &I : Phis) { 2564 dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi(" 2565 << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber() 2566 << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b" 2567 << I.LB->getNumber() << ')'; 2568 } 2569 dbgs() << " }\n"; 2570 }); 2571 2572 if (Phis.empty()) 2573 return false; 2574 2575 bool Changed = false; 2576 InstrList ShufIns; 2577 2578 // Go backwards in the block: for each bit shuffling instruction, check 2579 // if that instruction could potentially be moved to the front of the loop: 2580 // the output of the loop cannot be used in a non-shuffling instruction 2581 // in this loop. 2582 for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) { 2583 if (I->isTerminator()) 2584 continue; 2585 if (I->isPHI()) 2586 break; 2587 2588 RegisterSet Defs; 2589 HBS::getInstrDefs(*I, Defs); 2590 if (Defs.count() != 1) 2591 continue; 2592 unsigned DefR = Defs.find_first(); 2593 if (!TargetRegisterInfo::isVirtualRegister(DefR)) 2594 continue; 2595 if (!isBitShuffle(&*I, DefR)) 2596 continue; 2597 2598 bool BadUse = false; 2599 for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) { 2600 MachineInstr *UseI = UI->getParent(); 2601 if (UseI->getParent() == C.LB) { 2602 if (UseI->isPHI()) { 2603 // If the use is in a phi node in this loop, then it should be 2604 // the value corresponding to the back edge. 2605 unsigned Idx = UI.getOperandNo(); 2606 if (UseI->getOperand(Idx+1).getMBB() != C.LB) 2607 BadUse = true; 2608 } else { 2609 auto F = std::find(ShufIns.begin(), ShufIns.end(), UseI); 2610 if (F == ShufIns.end()) 2611 BadUse = true; 2612 } 2613 } else { 2614 // There is a use outside of the loop, but there is no epilog block 2615 // suitable for a copy-out. 2616 if (C.EB == nullptr) 2617 BadUse = true; 2618 } 2619 if (BadUse) 2620 break; 2621 } 2622 2623 if (BadUse) 2624 continue; 2625 ShufIns.push_back(&*I); 2626 } 2627 2628 // Partition the list of shuffling instructions into instruction groups, 2629 // where each group has to be moved as a whole (i.e. a group is a chain of 2630 // dependent instructions). A group produces a single live output register, 2631 // which is meant to be the input of the loop phi node (although this is 2632 // not checked here yet). It also uses a single register as its input, 2633 // which is some value produced in the loop body. After moving the group 2634 // to the beginning of the loop, that input register would need to be 2635 // the loop-carried register (through a phi node) instead of the (currently 2636 // loop-carried) output register. 2637 typedef std::vector<InstrGroup> InstrGroupList; 2638 InstrGroupList Groups; 2639 2640 for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) { 2641 MachineInstr *SI = ShufIns[i]; 2642 if (SI == nullptr) 2643 continue; 2644 2645 InstrGroup G; 2646 G.Ins.push_back(SI); 2647 G.Out.Reg = getDefReg(SI); 2648 RegisterSet Inputs; 2649 HBS::getInstrUses(*SI, Inputs); 2650 2651 for (unsigned j = i+1; j < n; ++j) { 2652 MachineInstr *MI = ShufIns[j]; 2653 if (MI == nullptr) 2654 continue; 2655 RegisterSet Defs; 2656 HBS::getInstrDefs(*MI, Defs); 2657 // If this instruction does not define any pending inputs, skip it. 2658 if (!Defs.intersects(Inputs)) 2659 continue; 2660 // Otherwise, add it to the current group and remove the inputs that 2661 // are defined by MI. 2662 G.Ins.push_back(MI); 2663 Inputs.remove(Defs); 2664 // Then add all registers used by MI. 2665 HBS::getInstrUses(*MI, Inputs); 2666 ShufIns[j] = nullptr; 2667 } 2668 2669 // Only add a group if it requires at most one register. 2670 if (Inputs.count() > 1) 2671 continue; 2672 auto LoopInpEq = [G] (const PhiInfo &P) -> bool { 2673 return G.Out.Reg == P.LR.Reg; 2674 }; 2675 if (std::find_if(Phis.begin(), Phis.end(), LoopInpEq) == Phis.end()) 2676 continue; 2677 2678 G.Inp.Reg = Inputs.find_first(); 2679 Groups.push_back(G); 2680 } 2681 2682 DEBUG({ 2683 for (unsigned i = 0, n = Groups.size(); i < n; ++i) { 2684 InstrGroup &G = Groups[i]; 2685 dbgs() << "Group[" << i << "] inp: " 2686 << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub) 2687 << " out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n"; 2688 for (unsigned j = 0, m = G.Ins.size(); j < m; ++j) 2689 dbgs() << " " << *G.Ins[j]; 2690 } 2691 }); 2692 2693 for (unsigned i = 0, n = Groups.size(); i < n; ++i) { 2694 InstrGroup &G = Groups[i]; 2695 if (!isShuffleOf(G.Out.Reg, G.Inp.Reg)) 2696 continue; 2697 auto LoopInpEq = [G] (const PhiInfo &P) -> bool { 2698 return G.Out.Reg == P.LR.Reg; 2699 }; 2700 auto F = std::find_if(Phis.begin(), Phis.end(), LoopInpEq); 2701 if (F == Phis.end()) 2702 continue; 2703 unsigned PredR = 0; 2704 if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PredR)) { 2705 const MachineInstr *DefPredR = MRI->getVRegDef(F->PR.Reg); 2706 unsigned Opc = DefPredR->getOpcode(); 2707 if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi) 2708 continue; 2709 if (!DefPredR->getOperand(1).isImm()) 2710 continue; 2711 if (DefPredR->getOperand(1).getImm() != 0) 2712 continue; 2713 const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg); 2714 if (RC != MRI->getRegClass(F->PR.Reg)) { 2715 PredR = MRI->createVirtualRegister(RC); 2716 unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi 2717 : Hexagon::A2_tfrpi; 2718 auto T = C.PB->getFirstTerminator(); 2719 DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc(); 2720 BuildMI(*C.PB, T, DL, HII->get(TfrI), PredR) 2721 .addImm(0); 2722 } else { 2723 PredR = F->PR.Reg; 2724 } 2725 } 2726 assert(MRI->getRegClass(PredR) == MRI->getRegClass(G.Inp.Reg)); 2727 moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PredR); 2728 Changed = true; 2729 } 2730 2731 return Changed; 2732 } 2733 2734 2735 bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { 2736 auto &HST = MF.getSubtarget<HexagonSubtarget>(); 2737 HII = HST.getInstrInfo(); 2738 HRI = HST.getRegisterInfo(); 2739 MRI = &MF.getRegInfo(); 2740 const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); 2741 BitTracker BT(HE, MF); 2742 DEBUG(BT.trace(true)); 2743 BT.run(); 2744 BTP = &BT; 2745 2746 std::vector<LoopCand> Cand; 2747 2748 for (auto &B : MF) { 2749 if (B.pred_size() != 2 || B.succ_size() != 2) 2750 continue; 2751 MachineBasicBlock *PB = nullptr; 2752 bool IsLoop = false; 2753 for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) { 2754 if (*PI != &B) 2755 PB = *PI; 2756 else 2757 IsLoop = true; 2758 } 2759 if (!IsLoop) 2760 continue; 2761 2762 MachineBasicBlock *EB = nullptr; 2763 for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) { 2764 if (*SI == &B) 2765 continue; 2766 // Set EP to the epilog block, if it has only 1 predecessor (i.e. the 2767 // edge from B to EP is non-critical. 2768 if ((*SI)->pred_size() == 1) 2769 EB = *SI; 2770 break; 2771 } 2772 2773 Cand.push_back(LoopCand(&B, PB, EB)); 2774 } 2775 2776 bool Changed = false; 2777 for (auto &C : Cand) 2778 Changed |= processLoop(C); 2779 2780 return Changed; 2781 } 2782 2783 //===----------------------------------------------------------------------===// 2784 // Public Constructor Functions 2785 //===----------------------------------------------------------------------===// 2786 2787 FunctionPass *llvm::createHexagonLoopRescheduling() { 2788 return new HexagonLoopRescheduling(); 2789 } 2790 2791 FunctionPass *llvm::createHexagonBitSimplify() { 2792 return new HexagonBitSimplify(); 2793 } 2794 2795