1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 //===----------------------------------------------------------------------===// 10 // 11 12 #include "AMDGPU.h" 13 #include "AMDGPUSubtarget.h" 14 #include "SIInstrInfo.h" 15 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 16 #include "llvm/CodeGen/MachineDominators.h" 17 #include "llvm/CodeGen/MachineFunctionPass.h" 18 #include "llvm/CodeGen/MachineInstrBuilder.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include "llvm/Target/TargetMachine.h" 25 26 #define DEBUG_TYPE "si-fold-operands" 27 using namespace llvm; 28 29 namespace { 30 31 class SIFoldOperands : public MachineFunctionPass { 32 public: 33 static char ID; 34 35 public: 36 SIFoldOperands() : MachineFunctionPass(ID) { 37 initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry()); 38 } 39 40 bool runOnMachineFunction(MachineFunction &MF) override; 41 42 const char *getPassName() const override { 43 return "SI Fold Operands"; 44 } 45 46 void getAnalysisUsage(AnalysisUsage &AU) const override { 47 AU.addRequired<MachineDominatorTree>(); 48 AU.setPreservesCFG(); 49 MachineFunctionPass::getAnalysisUsage(AU); 50 } 51 }; 52 53 struct FoldCandidate { 54 MachineInstr *UseMI; 55 unsigned UseOpNo; 56 MachineOperand *OpToFold; 57 uint64_t ImmToFold; 58 59 FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : 60 UseMI(MI), UseOpNo(OpNo) { 61 62 if (FoldOp->isImm()) { 63 OpToFold = nullptr; 64 ImmToFold = FoldOp->getImm(); 65 } else { 66 assert(FoldOp->isReg()); 67 OpToFold = FoldOp; 68 } 69 } 70 71 bool isImm() const { 72 return !OpToFold; 73 } 74 }; 75 76 } // End anonymous namespace. 77 78 INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE, 79 "SI Fold Operands", false, false) 80 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 81 INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE, 82 "SI Fold Operands", false, false) 83 84 char SIFoldOperands::ID = 0; 85 86 char &llvm::SIFoldOperandsID = SIFoldOperands::ID; 87 88 FunctionPass *llvm::createSIFoldOperandsPass() { 89 return new SIFoldOperands(); 90 } 91 92 static bool isSafeToFold(unsigned Opcode) { 93 switch(Opcode) { 94 case AMDGPU::V_MOV_B32_e32: 95 case AMDGPU::V_MOV_B32_e64: 96 case AMDGPU::V_MOV_B64_PSEUDO: 97 case AMDGPU::S_MOV_B32: 98 case AMDGPU::S_MOV_B64: 99 case AMDGPU::COPY: 100 return true; 101 default: 102 return false; 103 } 104 } 105 106 static bool updateOperand(FoldCandidate &Fold, 107 const TargetRegisterInfo &TRI) { 108 MachineInstr *MI = Fold.UseMI; 109 MachineOperand &Old = MI->getOperand(Fold.UseOpNo); 110 assert(Old.isReg()); 111 112 if (Fold.isImm()) { 113 Old.ChangeToImmediate(Fold.ImmToFold); 114 return true; 115 } 116 117 MachineOperand *New = Fold.OpToFold; 118 if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && 119 TargetRegisterInfo::isVirtualRegister(New->getReg())) { 120 Old.substVirtReg(New->getReg(), New->getSubReg(), TRI); 121 return true; 122 } 123 124 // FIXME: Handle physical registers. 125 126 return false; 127 } 128 129 static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList, 130 const MachineInstr *MI) { 131 for (auto Candidate : FoldList) { 132 if (Candidate.UseMI == MI) 133 return true; 134 } 135 return false; 136 } 137 138 static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList, 139 MachineInstr *MI, unsigned OpNo, 140 MachineOperand *OpToFold, 141 const SIInstrInfo *TII) { 142 if (!TII->isOperandLegal(MI, OpNo, OpToFold)) { 143 144 // Special case for v_mac_f32_e64 if we are trying to fold into src2 145 unsigned Opc = MI->getOpcode(); 146 if (Opc == AMDGPU::V_MAC_F32_e64 && 147 (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) { 148 // Check if changing this to a v_mad_f32 instruction will allow us to 149 // fold the operand. 150 MI->setDesc(TII->get(AMDGPU::V_MAD_F32)); 151 bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII); 152 if (FoldAsMAD) { 153 MI->untieRegOperand(OpNo); 154 return true; 155 } 156 MI->setDesc(TII->get(Opc)); 157 } 158 159 // If we are already folding into another operand of MI, then 160 // we can't commute the instruction, otherwise we risk making the 161 // other fold illegal. 162 if (isUseMIInFoldList(FoldList, MI)) 163 return false; 164 165 // Operand is not legal, so try to commute the instruction to 166 // see if this makes it possible to fold. 167 unsigned CommuteIdx0; 168 unsigned CommuteIdx1; 169 bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1); 170 171 if (CanCommute) { 172 if (CommuteIdx0 == OpNo) 173 OpNo = CommuteIdx1; 174 else if (CommuteIdx1 == OpNo) 175 OpNo = CommuteIdx0; 176 } 177 178 if (!CanCommute || !TII->commuteInstruction(MI)) 179 return false; 180 181 if (!TII->isOperandLegal(MI, OpNo, OpToFold)) 182 return false; 183 } 184 185 FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); 186 return true; 187 } 188 189 static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, 190 unsigned UseOpIdx, 191 std::vector<FoldCandidate> &FoldList, 192 SmallVectorImpl<MachineInstr *> &CopiesToReplace, 193 const SIInstrInfo *TII, const SIRegisterInfo &TRI, 194 MachineRegisterInfo &MRI) { 195 const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); 196 197 // FIXME: Fold operands with subregs. 198 if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || 199 UseOp.isImplicit())) { 200 return; 201 } 202 203 bool FoldingImm = OpToFold.isImm(); 204 APInt Imm; 205 206 if (FoldingImm) { 207 unsigned UseReg = UseOp.getReg(); 208 const TargetRegisterClass *UseRC 209 = TargetRegisterInfo::isVirtualRegister(UseReg) ? 210 MRI.getRegClass(UseReg) : 211 TRI.getPhysRegClass(UseReg); 212 213 Imm = APInt(64, OpToFold.getImm()); 214 215 const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode()); 216 const TargetRegisterClass *FoldRC = 217 TRI.getRegClass(FoldDesc.OpInfo[0].RegClass); 218 219 // Split 64-bit constants into 32-bits for folding. 220 if (FoldRC->getSize() == 8 && UseOp.getSubReg()) { 221 if (UseRC->getSize() != 8) 222 return; 223 224 if (UseOp.getSubReg() == AMDGPU::sub0) { 225 Imm = Imm.getLoBits(32); 226 } else { 227 assert(UseOp.getSubReg() == AMDGPU::sub1); 228 Imm = Imm.getHiBits(32); 229 } 230 } 231 232 // In order to fold immediates into copies, we need to change the 233 // copy to a MOV. 234 if (UseMI->getOpcode() == AMDGPU::COPY) { 235 unsigned DestReg = UseMI->getOperand(0).getReg(); 236 const TargetRegisterClass *DestRC 237 = TargetRegisterInfo::isVirtualRegister(DestReg) ? 238 MRI.getRegClass(DestReg) : 239 TRI.getPhysRegClass(DestReg); 240 241 unsigned MovOp = TII->getMovOpcode(DestRC); 242 if (MovOp == AMDGPU::COPY) 243 return; 244 245 UseMI->setDesc(TII->get(MovOp)); 246 CopiesToReplace.push_back(UseMI); 247 } 248 } 249 250 // Special case for REG_SEQUENCE: We can't fold literals into 251 // REG_SEQUENCE instructions, so we have to fold them into the 252 // uses of REG_SEQUENCE. 253 if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) { 254 unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); 255 unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); 256 257 for (MachineRegisterInfo::use_iterator 258 RSUse = MRI.use_begin(RegSeqDstReg), 259 RSE = MRI.use_end(); RSUse != RSE; ++RSUse) { 260 261 MachineInstr *RSUseMI = RSUse->getParent(); 262 if (RSUse->getSubReg() != RegSeqDstSubReg) 263 continue; 264 265 foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, 266 CopiesToReplace, TII, TRI, MRI); 267 } 268 return; 269 } 270 271 const MCInstrDesc &UseDesc = UseMI->getDesc(); 272 273 // Don't fold into target independent nodes. Target independent opcodes 274 // don't have defined register classes. 275 if (UseDesc.isVariadic() || 276 UseDesc.OpInfo[UseOpIdx].RegClass == -1) 277 return; 278 279 if (FoldingImm) { 280 MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); 281 tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); 282 return; 283 } 284 285 tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); 286 287 // FIXME: We could try to change the instruction from 64-bit to 32-bit 288 // to enable more folding opportunites. The shrink operands pass 289 // already does this. 290 return; 291 } 292 293 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { 294 MachineRegisterInfo &MRI = MF.getRegInfo(); 295 const SIInstrInfo *TII = 296 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); 297 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 298 299 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 300 BI != BE; ++BI) { 301 302 MachineBasicBlock &MBB = *BI; 303 MachineBasicBlock::iterator I, Next; 304 for (I = MBB.begin(); I != MBB.end(); I = Next) { 305 Next = std::next(I); 306 MachineInstr &MI = *I; 307 308 if (!isSafeToFold(MI.getOpcode())) 309 continue; 310 311 unsigned OpSize = TII->getOpSize(MI, 1); 312 MachineOperand &OpToFold = MI.getOperand(1); 313 bool FoldingImm = OpToFold.isImm(); 314 315 // FIXME: We could also be folding things like FrameIndexes and 316 // TargetIndexes. 317 if (!FoldingImm && !OpToFold.isReg()) 318 continue; 319 320 // Folding immediates with more than one use will increase program size. 321 // FIXME: This will also reduce register usage, which may be better 322 // in some cases. A better heuristic is needed. 323 if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) && 324 !MRI.hasOneUse(MI.getOperand(0).getReg())) 325 continue; 326 327 // FIXME: Fold operands with subregs. 328 if (OpToFold.isReg() && 329 (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || 330 OpToFold.getSubReg())) 331 continue; 332 333 334 // We need mutate the operands of new mov instructions to add implicit 335 // uses of EXEC, but adding them invalidates the use_iterator, so defer 336 // this. 337 SmallVector<MachineInstr *, 4> CopiesToReplace; 338 339 std::vector<FoldCandidate> FoldList; 340 for (MachineRegisterInfo::use_iterator 341 Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); 342 Use != E; ++Use) { 343 344 MachineInstr *UseMI = Use->getParent(); 345 346 foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList, 347 CopiesToReplace, TII, TRI, MRI); 348 } 349 350 // Make sure we add EXEC uses to any new v_mov instructions created. 351 for (MachineInstr *Copy : CopiesToReplace) 352 Copy->addImplicitDefUseOperands(MF); 353 354 for (FoldCandidate &Fold : FoldList) { 355 if (updateOperand(Fold, TRI)) { 356 // Clear kill flags. 357 if (!Fold.isImm()) { 358 assert(Fold.OpToFold && Fold.OpToFold->isReg()); 359 Fold.OpToFold->setIsKill(false); 360 } 361 DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << 362 Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); 363 } 364 } 365 } 366 } 367 return false; 368 } 369