1 //===--------------------- R600MergeVectorRegisters.cpp -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This pass merges inputs of swizzeable instructions into vector sharing 12 /// common data and/or have enough undef subreg using swizzle abilities. 13 /// 14 /// For instance let's consider the following pseudo code : 15 /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 16 /// ... 17 /// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3 18 /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3 19 /// 20 /// is turned into : 21 /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 22 /// ... 23 /// vreg7<def> = INSERT_SUBREG vreg4, sub3 24 /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3 25 /// 26 /// This allow regalloc to reduce register pressure for vector registers and 27 /// to reduce MOV count. 28 //===----------------------------------------------------------------------===// 29 30 #include "AMDGPU.h" 31 #include "AMDGPUSubtarget.h" 32 #include "R600Defines.h" 33 #include "R600InstrInfo.h" 34 #include "llvm/CodeGen/DFAPacketizer.h" 35 #include "llvm/CodeGen/MachineDominators.h" 36 #include "llvm/CodeGen/MachineFunctionPass.h" 37 #include "llvm/CodeGen/MachineInstrBuilder.h" 38 #include "llvm/CodeGen/MachineLoopInfo.h" 39 #include "llvm/CodeGen/MachineRegisterInfo.h" 40 #include "llvm/CodeGen/Passes.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/raw_ostream.h" 43 44 using namespace llvm; 45 46 #define DEBUG_TYPE "vec-merger" 47 48 namespace { 49 50 static bool 51 isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { 52 for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg), 53 E = MRI.def_instr_end(); It != E; ++It) { 54 return (*It).isImplicitDef(); 55 } 56 if (MRI.isReserved(Reg)) { 57 return false; 58 } 59 llvm_unreachable("Reg without a def"); 60 return false; 61 } 62 63 class RegSeqInfo { 64 public: 65 MachineInstr *Instr; 66 DenseMap<unsigned, unsigned> RegToChan; 67 std::vector<unsigned> UndefReg; 68 RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { 69 assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE); 70 for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { 71 MachineOperand &MO = Instr->getOperand(i); 72 unsigned Chan = Instr->getOperand(i + 1).getImm(); 73 if (isImplicitlyDef(MRI, MO.getReg())) 74 UndefReg.push_back(Chan); 75 else 76 RegToChan[MO.getReg()] = Chan; 77 } 78 } 79 RegSeqInfo() {} 80 81 bool operator==(const RegSeqInfo &RSI) const { 82 return RSI.Instr == Instr; 83 } 84 }; 85 86 class R600VectorRegMerger : public MachineFunctionPass { 87 private: 88 MachineRegisterInfo *MRI; 89 const R600InstrInfo *TII; 90 bool canSwizzle(const MachineInstr &) const; 91 bool areAllUsesSwizzeable(unsigned Reg) const; 92 void SwizzleInput(MachineInstr &, 93 const std::vector<std::pair<unsigned, unsigned> > &) const; 94 bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *, 95 std::vector<std::pair<unsigned, unsigned> > &Remap) const; 96 bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 97 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 98 bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 99 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 100 MachineInstr *RebuildVector(RegSeqInfo *MI, 101 const RegSeqInfo *BaseVec, 102 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const; 103 void RemoveMI(MachineInstr *); 104 void trackRSI(const RegSeqInfo &RSI); 105 106 typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap; 107 DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; 108 InstructionSetMap PreviousRegSeqByReg; 109 InstructionSetMap PreviousRegSeqByUndefCount; 110 public: 111 static char ID; 112 R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), 113 TII(nullptr) { } 114 115 void getAnalysisUsage(AnalysisUsage &AU) const override { 116 AU.setPreservesCFG(); 117 AU.addRequired<MachineDominatorTree>(); 118 AU.addPreserved<MachineDominatorTree>(); 119 AU.addRequired<MachineLoopInfo>(); 120 AU.addPreserved<MachineLoopInfo>(); 121 MachineFunctionPass::getAnalysisUsage(AU); 122 } 123 124 StringRef getPassName() const override { 125 return "R600 Vector Registers Merge Pass"; 126 } 127 128 bool runOnMachineFunction(MachineFunction &Fn) override; 129 }; 130 131 char R600VectorRegMerger::ID = 0; 132 133 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) 134 const { 135 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 136 return true; 137 switch (MI.getOpcode()) { 138 case AMDGPU::R600_ExportSwz: 139 case AMDGPU::EG_ExportSwz: 140 return true; 141 default: 142 return false; 143 } 144 } 145 146 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, 147 RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap) 148 const { 149 unsigned CurrentUndexIdx = 0; 150 for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(), 151 E = ToMerge->RegToChan.end(); It != E; ++It) { 152 DenseMap<unsigned, unsigned>::const_iterator PosInUntouched = 153 Untouched->RegToChan.find((*It).first); 154 if (PosInUntouched != Untouched->RegToChan.end()) { 155 Remap.push_back(std::pair<unsigned, unsigned> 156 ((*It).second, (*PosInUntouched).second)); 157 continue; 158 } 159 if (CurrentUndexIdx >= Untouched->UndefReg.size()) 160 return false; 161 Remap.push_back(std::pair<unsigned, unsigned> 162 ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); 163 } 164 165 return true; 166 } 167 168 static 169 unsigned getReassignedChan( 170 const std::vector<std::pair<unsigned, unsigned> > &RemapChan, 171 unsigned Chan) { 172 for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { 173 if (RemapChan[j].first == Chan) 174 return RemapChan[j].second; 175 } 176 llvm_unreachable("Chan wasn't reassigned"); 177 } 178 179 MachineInstr *R600VectorRegMerger::RebuildVector( 180 RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, 181 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 182 unsigned Reg = RSI->Instr->getOperand(0).getReg(); 183 MachineBasicBlock::iterator Pos = RSI->Instr; 184 MachineBasicBlock &MBB = *Pos->getParent(); 185 DebugLoc DL = Pos->getDebugLoc(); 186 187 unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); 188 DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; 189 std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg; 190 for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(), 191 E = RSI->RegToChan.end(); It != E; ++It) { 192 unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 193 unsigned SubReg = (*It).first; 194 unsigned Swizzle = (*It).second; 195 unsigned Chan = getReassignedChan(RemapChan, Swizzle); 196 197 MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), 198 DstReg) 199 .addReg(SrcVec) 200 .addReg(SubReg) 201 .addImm(Chan); 202 UpdatedRegToChan[SubReg] = Chan; 203 std::vector<unsigned>::iterator ChanPos = find(UpdatedUndef, Chan); 204 if (ChanPos != UpdatedUndef.end()) 205 UpdatedUndef.erase(ChanPos); 206 assert(!is_contained(UpdatedUndef, Chan) && 207 "UpdatedUndef shouldn't contain Chan more than once!"); 208 DEBUG(dbgs() << " ->"; Tmp->dump();); 209 (void)Tmp; 210 SrcVec = DstReg; 211 } 212 MachineInstr *NewMI = 213 BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec); 214 DEBUG(dbgs() << " ->"; NewMI->dump();); 215 216 DEBUG(dbgs() << " Updating Swizzle:\n"); 217 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 218 E = MRI->use_instr_end(); It != E; ++It) { 219 DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->"); 220 SwizzleInput(*It, RemapChan); 221 DEBUG((*It).dump()); 222 } 223 RSI->Instr->eraseFromParent(); 224 225 // Update RSI 226 RSI->Instr = NewMI; 227 RSI->RegToChan = UpdatedRegToChan; 228 RSI->UndefReg = UpdatedUndef; 229 230 return NewMI; 231 } 232 233 void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { 234 for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), 235 E = PreviousRegSeqByReg.end(); It != E; ++It) { 236 std::vector<MachineInstr *> &MIs = (*It).second; 237 MIs.erase(find(MIs, MI), MIs.end()); 238 } 239 for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), 240 E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { 241 std::vector<MachineInstr *> &MIs = (*It).second; 242 MIs.erase(find(MIs, MI), MIs.end()); 243 } 244 } 245 246 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, 247 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 248 unsigned Offset; 249 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 250 Offset = 2; 251 else 252 Offset = 3; 253 for (unsigned i = 0; i < 4; i++) { 254 unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; 255 for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { 256 if (RemapChan[j].first == Swizzle) { 257 MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); 258 break; 259 } 260 } 261 } 262 } 263 264 bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const { 265 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 266 E = MRI->use_instr_end(); It != E; ++It) { 267 if (!canSwizzle(*It)) 268 return false; 269 } 270 return true; 271 } 272 273 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, 274 RegSeqInfo &CompatibleRSI, 275 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 276 for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), 277 MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { 278 if (!MOp->isReg()) 279 continue; 280 if (PreviousRegSeqByReg[MOp->getReg()].empty()) 281 continue; 282 for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) { 283 CompatibleRSI = PreviousRegSeq[MI]; 284 if (RSI == CompatibleRSI) 285 continue; 286 if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) 287 return true; 288 } 289 } 290 return false; 291 } 292 293 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, 294 RegSeqInfo &CompatibleRSI, 295 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 296 unsigned NeededUndefs = 4 - RSI.UndefReg.size(); 297 if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) 298 return false; 299 std::vector<MachineInstr *> &MIs = 300 PreviousRegSeqByUndefCount[NeededUndefs]; 301 CompatibleRSI = PreviousRegSeq[MIs.back()]; 302 tryMergeVector(&CompatibleRSI, &RSI, RemapChan); 303 return true; 304 } 305 306 void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { 307 for (DenseMap<unsigned, unsigned>::const_iterator 308 It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { 309 PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); 310 } 311 PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); 312 PreviousRegSeq[RSI.Instr] = RSI; 313 } 314 315 bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { 316 if (skipFunction(*Fn.getFunction())) 317 return false; 318 319 const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); 320 TII = ST.getInstrInfo(); 321 MRI = &Fn.getRegInfo(); 322 323 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 324 MBB != MBBe; ++MBB) { 325 MachineBasicBlock *MB = &*MBB; 326 PreviousRegSeq.clear(); 327 PreviousRegSeqByReg.clear(); 328 PreviousRegSeqByUndefCount.clear(); 329 330 for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); 331 MII != MIIE; ++MII) { 332 MachineInstr &MI = *MII; 333 if (MI.getOpcode() != AMDGPU::REG_SEQUENCE) { 334 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { 335 unsigned Reg = MI.getOperand(1).getReg(); 336 for (MachineRegisterInfo::def_instr_iterator 337 It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end(); 338 It != E; ++It) { 339 RemoveMI(&(*It)); 340 } 341 } 342 continue; 343 } 344 345 RegSeqInfo RSI(*MRI, &MI); 346 347 // All uses of MI are swizzeable ? 348 unsigned Reg = MI.getOperand(0).getReg(); 349 if (!areAllUsesSwizzeable(Reg)) 350 continue; 351 352 DEBUG({ 353 dbgs() << "Trying to optimize "; 354 MI.dump(); 355 }); 356 357 RegSeqInfo CandidateRSI; 358 std::vector<std::pair<unsigned, unsigned> > RemapChan; 359 DEBUG(dbgs() << "Using common slots...\n";); 360 if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { 361 // Remove CandidateRSI mapping 362 RemoveMI(CandidateRSI.Instr); 363 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 364 trackRSI(RSI); 365 continue; 366 } 367 DEBUG(dbgs() << "Using free slots...\n";); 368 RemapChan.clear(); 369 if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { 370 RemoveMI(CandidateRSI.Instr); 371 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 372 trackRSI(RSI); 373 continue; 374 } 375 //Failed to merge 376 trackRSI(RSI); 377 } 378 } 379 return false; 380 } 381 382 } 383 384 llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { 385 return new R600VectorRegMerger(tm); 386 } 387