1 //===- R600MergeVectorRegisters.cpp ---------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This pass merges inputs of swizzeable instructions into vector sharing 12 /// common data and/or have enough undef subreg using swizzle abilities. 13 /// 14 /// For instance let's consider the following pseudo code : 15 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 16 /// ... 17 /// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3 18 /// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3 19 /// 20 /// is turned into : 21 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 22 /// ... 23 /// %7 = INSERT_SUBREG %4, sub3 24 /// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3 25 /// 26 /// This allow regalloc to reduce register pressure for vector registers and 27 /// to reduce MOV count. 28 //===----------------------------------------------------------------------===// 29 30 #include "AMDGPU.h" 31 #include "AMDGPUSubtarget.h" 32 #include "R600Defines.h" 33 #include "R600InstrInfo.h" 34 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 35 #include "llvm/ADT/DenseMap.h" 36 #include "llvm/ADT/STLExtras.h" 37 #include "llvm/ADT/StringRef.h" 38 #include "llvm/CodeGen/MachineBasicBlock.h" 39 #include "llvm/CodeGen/MachineDominators.h" 40 #include "llvm/CodeGen/MachineFunction.h" 41 #include "llvm/CodeGen/MachineFunctionPass.h" 42 #include "llvm/CodeGen/MachineInstr.h" 43 #include "llvm/CodeGen/MachineInstrBuilder.h" 44 #include "llvm/CodeGen/MachineLoopInfo.h" 45 #include "llvm/CodeGen/MachineOperand.h" 46 #include "llvm/CodeGen/MachineRegisterInfo.h" 47 #include "llvm/IR/DebugLoc.h" 48 #include "llvm/Pass.h" 49 #include "llvm/Support/Debug.h" 50 #include "llvm/Support/ErrorHandling.h" 51 #include "llvm/Support/raw_ostream.h" 52 #include <cassert> 53 #include <utility> 54 #include <vector> 55 56 using namespace llvm; 57 58 #define DEBUG_TYPE "vec-merger" 59 60 static bool 61 isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { 62 for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg), 63 E = MRI.def_instr_end(); It != E; ++It) { 64 return (*It).isImplicitDef(); 65 } 66 if (MRI.isReserved(Reg)) { 67 return false; 68 } 69 llvm_unreachable("Reg without a def"); 70 return false; 71 } 72 73 namespace { 74 75 class RegSeqInfo { 76 public: 77 MachineInstr *Instr; 78 DenseMap<unsigned, unsigned> RegToChan; 79 std::vector<unsigned> UndefReg; 80 81 RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { 82 assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE); 83 for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { 84 MachineOperand &MO = Instr->getOperand(i); 85 unsigned Chan = Instr->getOperand(i + 1).getImm(); 86 if (isImplicitlyDef(MRI, MO.getReg())) 87 UndefReg.push_back(Chan); 88 else 89 RegToChan[MO.getReg()] = Chan; 90 } 91 } 92 93 RegSeqInfo() = default; 94 95 bool operator==(const RegSeqInfo &RSI) const { 96 return RSI.Instr == Instr; 97 } 98 }; 99 100 class R600VectorRegMerger : public MachineFunctionPass { 101 private: 102 using InstructionSetMap = DenseMap<unsigned, std::vector<MachineInstr *>>; 103 104 MachineRegisterInfo *MRI; 105 const R600InstrInfo *TII = nullptr; 106 DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; 107 InstructionSetMap PreviousRegSeqByReg; 108 InstructionSetMap PreviousRegSeqByUndefCount; 109 110 bool canSwizzle(const MachineInstr &MI) const; 111 bool areAllUsesSwizzeable(unsigned Reg) const; 112 void SwizzleInput(MachineInstr &, 113 const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const; 114 bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge, 115 std::vector<std::pair<unsigned, unsigned>> &Remap) const; 116 bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 117 std::vector<std::pair<unsigned, unsigned>> &RemapChan); 118 bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 119 std::vector<std::pair<unsigned, unsigned>> &RemapChan); 120 MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec, 121 const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const; 122 void RemoveMI(MachineInstr *); 123 void trackRSI(const RegSeqInfo &RSI); 124 125 public: 126 static char ID; 127 128 R600VectorRegMerger() : MachineFunctionPass(ID) {} 129 130 void getAnalysisUsage(AnalysisUsage &AU) const override { 131 AU.setPreservesCFG(); 132 AU.addRequired<MachineDominatorTree>(); 133 AU.addPreserved<MachineDominatorTree>(); 134 AU.addRequired<MachineLoopInfo>(); 135 AU.addPreserved<MachineLoopInfo>(); 136 MachineFunctionPass::getAnalysisUsage(AU); 137 } 138 139 StringRef getPassName() const override { 140 return "R600 Vector Registers Merge Pass"; 141 } 142 143 bool runOnMachineFunction(MachineFunction &Fn) override; 144 }; 145 146 } // end anonymous namespace 147 148 INITIALIZE_PASS_BEGIN(R600VectorRegMerger, DEBUG_TYPE, 149 "R600 Vector Reg Merger", false, false) 150 INITIALIZE_PASS_END(R600VectorRegMerger, DEBUG_TYPE, 151 "R600 Vector Reg Merger", false, false) 152 153 char R600VectorRegMerger::ID = 0; 154 155 char &llvm::R600VectorRegMergerID = R600VectorRegMerger::ID; 156 157 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) 158 const { 159 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 160 return true; 161 switch (MI.getOpcode()) { 162 case AMDGPU::R600_ExportSwz: 163 case AMDGPU::EG_ExportSwz: 164 return true; 165 default: 166 return false; 167 } 168 } 169 170 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, 171 RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap) 172 const { 173 unsigned CurrentUndexIdx = 0; 174 for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(), 175 E = ToMerge->RegToChan.end(); It != E; ++It) { 176 DenseMap<unsigned, unsigned>::const_iterator PosInUntouched = 177 Untouched->RegToChan.find((*It).first); 178 if (PosInUntouched != Untouched->RegToChan.end()) { 179 Remap.push_back(std::pair<unsigned, unsigned> 180 ((*It).second, (*PosInUntouched).second)); 181 continue; 182 } 183 if (CurrentUndexIdx >= Untouched->UndefReg.size()) 184 return false; 185 Remap.push_back(std::pair<unsigned, unsigned> 186 ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); 187 } 188 189 return true; 190 } 191 192 static 193 unsigned getReassignedChan( 194 const std::vector<std::pair<unsigned, unsigned>> &RemapChan, 195 unsigned Chan) { 196 for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { 197 if (RemapChan[j].first == Chan) 198 return RemapChan[j].second; 199 } 200 llvm_unreachable("Chan wasn't reassigned"); 201 } 202 203 MachineInstr *R600VectorRegMerger::RebuildVector( 204 RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, 205 const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const { 206 unsigned Reg = RSI->Instr->getOperand(0).getReg(); 207 MachineBasicBlock::iterator Pos = RSI->Instr; 208 MachineBasicBlock &MBB = *Pos->getParent(); 209 DebugLoc DL = Pos->getDebugLoc(); 210 211 unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); 212 DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; 213 std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg; 214 for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(), 215 E = RSI->RegToChan.end(); It != E; ++It) { 216 unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 217 unsigned SubReg = (*It).first; 218 unsigned Swizzle = (*It).second; 219 unsigned Chan = getReassignedChan(RemapChan, Swizzle); 220 221 MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), 222 DstReg) 223 .addReg(SrcVec) 224 .addReg(SubReg) 225 .addImm(Chan); 226 UpdatedRegToChan[SubReg] = Chan; 227 std::vector<unsigned>::iterator ChanPos = llvm::find(UpdatedUndef, Chan); 228 if (ChanPos != UpdatedUndef.end()) 229 UpdatedUndef.erase(ChanPos); 230 assert(!is_contained(UpdatedUndef, Chan) && 231 "UpdatedUndef shouldn't contain Chan more than once!"); 232 LLVM_DEBUG(dbgs() << " ->"; Tmp->dump();); 233 (void)Tmp; 234 SrcVec = DstReg; 235 } 236 MachineInstr *NewMI = 237 BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec); 238 LLVM_DEBUG(dbgs() << " ->"; NewMI->dump();); 239 240 LLVM_DEBUG(dbgs() << " Updating Swizzle:\n"); 241 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 242 E = MRI->use_instr_end(); It != E; ++It) { 243 LLVM_DEBUG(dbgs() << " "; (*It).dump(); dbgs() << " ->"); 244 SwizzleInput(*It, RemapChan); 245 LLVM_DEBUG((*It).dump()); 246 } 247 RSI->Instr->eraseFromParent(); 248 249 // Update RSI 250 RSI->Instr = NewMI; 251 RSI->RegToChan = UpdatedRegToChan; 252 RSI->UndefReg = UpdatedUndef; 253 254 return NewMI; 255 } 256 257 void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { 258 for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), 259 E = PreviousRegSeqByReg.end(); It != E; ++It) { 260 std::vector<MachineInstr *> &MIs = (*It).second; 261 MIs.erase(llvm::find(MIs, MI), MIs.end()); 262 } 263 for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), 264 E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { 265 std::vector<MachineInstr *> &MIs = (*It).second; 266 MIs.erase(llvm::find(MIs, MI), MIs.end()); 267 } 268 } 269 270 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, 271 const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const { 272 unsigned Offset; 273 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 274 Offset = 2; 275 else 276 Offset = 3; 277 for (unsigned i = 0; i < 4; i++) { 278 unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; 279 for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { 280 if (RemapChan[j].first == Swizzle) { 281 MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); 282 break; 283 } 284 } 285 } 286 } 287 288 bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const { 289 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 290 E = MRI->use_instr_end(); It != E; ++It) { 291 if (!canSwizzle(*It)) 292 return false; 293 } 294 return true; 295 } 296 297 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, 298 RegSeqInfo &CompatibleRSI, 299 std::vector<std::pair<unsigned, unsigned>> &RemapChan) { 300 for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), 301 MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { 302 if (!MOp->isReg()) 303 continue; 304 if (PreviousRegSeqByReg[MOp->getReg()].empty()) 305 continue; 306 for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) { 307 CompatibleRSI = PreviousRegSeq[MI]; 308 if (RSI == CompatibleRSI) 309 continue; 310 if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) 311 return true; 312 } 313 } 314 return false; 315 } 316 317 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, 318 RegSeqInfo &CompatibleRSI, 319 std::vector<std::pair<unsigned, unsigned>> &RemapChan) { 320 unsigned NeededUndefs = 4 - RSI.UndefReg.size(); 321 if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) 322 return false; 323 std::vector<MachineInstr *> &MIs = 324 PreviousRegSeqByUndefCount[NeededUndefs]; 325 CompatibleRSI = PreviousRegSeq[MIs.back()]; 326 tryMergeVector(&CompatibleRSI, &RSI, RemapChan); 327 return true; 328 } 329 330 void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { 331 for (DenseMap<unsigned, unsigned>::const_iterator 332 It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { 333 PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); 334 } 335 PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); 336 PreviousRegSeq[RSI.Instr] = RSI; 337 } 338 339 bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { 340 if (skipFunction(Fn.getFunction())) 341 return false; 342 343 const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); 344 TII = ST.getInstrInfo(); 345 MRI = &Fn.getRegInfo(); 346 347 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 348 MBB != MBBe; ++MBB) { 349 MachineBasicBlock *MB = &*MBB; 350 PreviousRegSeq.clear(); 351 PreviousRegSeqByReg.clear(); 352 PreviousRegSeqByUndefCount.clear(); 353 354 for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); 355 MII != MIIE; ++MII) { 356 MachineInstr &MI = *MII; 357 if (MI.getOpcode() != AMDGPU::REG_SEQUENCE) { 358 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { 359 unsigned Reg = MI.getOperand(1).getReg(); 360 for (MachineRegisterInfo::def_instr_iterator 361 It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end(); 362 It != E; ++It) { 363 RemoveMI(&(*It)); 364 } 365 } 366 continue; 367 } 368 369 RegSeqInfo RSI(*MRI, &MI); 370 371 // All uses of MI are swizzeable ? 372 unsigned Reg = MI.getOperand(0).getReg(); 373 if (!areAllUsesSwizzeable(Reg)) 374 continue; 375 376 LLVM_DEBUG({ 377 dbgs() << "Trying to optimize "; 378 MI.dump(); 379 }); 380 381 RegSeqInfo CandidateRSI; 382 std::vector<std::pair<unsigned, unsigned>> RemapChan; 383 LLVM_DEBUG(dbgs() << "Using common slots...\n";); 384 if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { 385 // Remove CandidateRSI mapping 386 RemoveMI(CandidateRSI.Instr); 387 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 388 trackRSI(RSI); 389 continue; 390 } 391 LLVM_DEBUG(dbgs() << "Using free slots...\n";); 392 RemapChan.clear(); 393 if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { 394 RemoveMI(CandidateRSI.Instr); 395 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 396 trackRSI(RSI); 397 continue; 398 } 399 //Failed to merge 400 trackRSI(RSI); 401 } 402 } 403 return false; 404 } 405 406 llvm::FunctionPass *llvm::createR600VectorRegMerger() { 407 return new R600VectorRegMerger(); 408 } 409