1*790a779fSJames Molloy //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===// 2*790a779fSJames Molloy // 3*790a779fSJames Molloy // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*790a779fSJames Molloy // See https://llvm.org/LICENSE.txt for license information. 5*790a779fSJames Molloy // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*790a779fSJames Molloy // 7*790a779fSJames Molloy //===----------------------------------------------------------------------===// 8*790a779fSJames Molloy 9*790a779fSJames Molloy #include "llvm/CodeGen/ModuloSchedule.h" 10*790a779fSJames Molloy #include "llvm/CodeGen/LiveIntervals.h" 11*790a779fSJames Molloy #include "llvm/CodeGen/MachineInstrBuilder.h" 12*790a779fSJames Molloy #include "llvm/CodeGen/TargetInstrInfo.h" 13*790a779fSJames Molloy #include "llvm/Support/Debug.h" 14*790a779fSJames Molloy 15*790a779fSJames Molloy #define DEBUG_TYPE "pipeliner" 16*790a779fSJames Molloy using namespace llvm; 17*790a779fSJames Molloy 18*790a779fSJames Molloy /// Return the register values for the operands of a Phi instruction. 19*790a779fSJames Molloy /// This function assume the instruction is a Phi. 20*790a779fSJames Molloy static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, 21*790a779fSJames Molloy unsigned &InitVal, unsigned &LoopVal) { 22*790a779fSJames Molloy assert(Phi.isPHI() && "Expecting a Phi."); 23*790a779fSJames Molloy 24*790a779fSJames Molloy InitVal = 0; 25*790a779fSJames Molloy LoopVal = 0; 26*790a779fSJames Molloy for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) 27*790a779fSJames Molloy if (Phi.getOperand(i + 1).getMBB() != Loop) 28*790a779fSJames Molloy InitVal = Phi.getOperand(i).getReg(); 29*790a779fSJames Molloy else 30*790a779fSJames Molloy LoopVal = Phi.getOperand(i).getReg(); 31*790a779fSJames Molloy 32*790a779fSJames Molloy assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure."); 33*790a779fSJames Molloy } 34*790a779fSJames Molloy 35*790a779fSJames Molloy /// Return the Phi register value that comes from the incoming block. 36*790a779fSJames Molloy static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { 37*790a779fSJames Molloy for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) 38*790a779fSJames Molloy if (Phi.getOperand(i + 1).getMBB() != LoopBB) 39*790a779fSJames Molloy return Phi.getOperand(i).getReg(); 40*790a779fSJames Molloy return 0; 41*790a779fSJames Molloy } 42*790a779fSJames Molloy 43*790a779fSJames Molloy /// Return the Phi register value that comes the loop block. 44*790a779fSJames Molloy static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { 45*790a779fSJames Molloy for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) 46*790a779fSJames Molloy if (Phi.getOperand(i + 1).getMBB() == LoopBB) 47*790a779fSJames Molloy return Phi.getOperand(i).getReg(); 48*790a779fSJames Molloy return 0; 49*790a779fSJames Molloy } 50*790a779fSJames Molloy 51*790a779fSJames Molloy void ModuloScheduleExpander::expand() { 52*790a779fSJames Molloy BB = Schedule.getLoop()->getTopBlock(); 53*790a779fSJames Molloy Preheader = *BB->pred_begin(); 54*790a779fSJames Molloy if (Preheader == BB) 55*790a779fSJames Molloy Preheader = *std::next(BB->pred_begin()); 56*790a779fSJames Molloy 57*790a779fSJames Molloy // Iterate over the definitions in each instruction, and compute the 58*790a779fSJames Molloy // stage difference for each use. Keep the maximum value. 59*790a779fSJames Molloy for (MachineInstr *MI : Schedule.getInstructions()) { 60*790a779fSJames Molloy int DefStage = Schedule.getStage(MI); 61*790a779fSJames Molloy for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { 62*790a779fSJames Molloy MachineOperand &Op = MI->getOperand(i); 63*790a779fSJames Molloy if (!Op.isReg() || !Op.isDef()) 64*790a779fSJames Molloy continue; 65*790a779fSJames Molloy 66*790a779fSJames Molloy Register Reg = Op.getReg(); 67*790a779fSJames Molloy unsigned MaxDiff = 0; 68*790a779fSJames Molloy bool PhiIsSwapped = false; 69*790a779fSJames Molloy for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg), 70*790a779fSJames Molloy EI = MRI.use_end(); 71*790a779fSJames Molloy UI != EI; ++UI) { 72*790a779fSJames Molloy MachineOperand &UseOp = *UI; 73*790a779fSJames Molloy MachineInstr *UseMI = UseOp.getParent(); 74*790a779fSJames Molloy int UseStage = Schedule.getStage(UseMI); 75*790a779fSJames Molloy unsigned Diff = 0; 76*790a779fSJames Molloy if (UseStage != -1 && UseStage >= DefStage) 77*790a779fSJames Molloy Diff = UseStage - DefStage; 78*790a779fSJames Molloy if (MI->isPHI()) { 79*790a779fSJames Molloy if (isLoopCarried(*MI)) 80*790a779fSJames Molloy ++Diff; 81*790a779fSJames Molloy else 82*790a779fSJames Molloy PhiIsSwapped = true; 83*790a779fSJames Molloy } 84*790a779fSJames Molloy MaxDiff = std::max(Diff, MaxDiff); 85*790a779fSJames Molloy } 86*790a779fSJames Molloy RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped); 87*790a779fSJames Molloy } 88*790a779fSJames Molloy } 89*790a779fSJames Molloy 90*790a779fSJames Molloy generatePipelinedLoop(); 91*790a779fSJames Molloy } 92*790a779fSJames Molloy 93*790a779fSJames Molloy void ModuloScheduleExpander::generatePipelinedLoop() { 94*790a779fSJames Molloy // Create a new basic block for the kernel and add it to the CFG. 95*790a779fSJames Molloy MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); 96*790a779fSJames Molloy 97*790a779fSJames Molloy unsigned MaxStageCount = Schedule.getNumStages() - 1; 98*790a779fSJames Molloy 99*790a779fSJames Molloy // Remember the registers that are used in different stages. The index is 100*790a779fSJames Molloy // the iteration, or stage, that the instruction is scheduled in. This is 101*790a779fSJames Molloy // a map between register names in the original block and the names created 102*790a779fSJames Molloy // in each stage of the pipelined loop. 103*790a779fSJames Molloy ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; 104*790a779fSJames Molloy InstrMapTy InstrMap; 105*790a779fSJames Molloy 106*790a779fSJames Molloy SmallVector<MachineBasicBlock *, 4> PrologBBs; 107*790a779fSJames Molloy 108*790a779fSJames Molloy // Generate the prolog instructions that set up the pipeline. 109*790a779fSJames Molloy generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs); 110*790a779fSJames Molloy MF.insert(BB->getIterator(), KernelBB); 111*790a779fSJames Molloy 112*790a779fSJames Molloy // Rearrange the instructions to generate the new, pipelined loop, 113*790a779fSJames Molloy // and update register names as needed. 114*790a779fSJames Molloy for (MachineInstr *CI : Schedule.getInstructions()) { 115*790a779fSJames Molloy if (CI->isPHI()) 116*790a779fSJames Molloy continue; 117*790a779fSJames Molloy unsigned StageNum = Schedule.getStage(CI); 118*790a779fSJames Molloy MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum); 119*790a779fSJames Molloy updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap); 120*790a779fSJames Molloy KernelBB->push_back(NewMI); 121*790a779fSJames Molloy InstrMap[NewMI] = CI; 122*790a779fSJames Molloy } 123*790a779fSJames Molloy 124*790a779fSJames Molloy // Copy any terminator instructions to the new kernel, and update 125*790a779fSJames Molloy // names as needed. 126*790a779fSJames Molloy for (MachineBasicBlock::iterator I = BB->getFirstTerminator(), 127*790a779fSJames Molloy E = BB->instr_end(); 128*790a779fSJames Molloy I != E; ++I) { 129*790a779fSJames Molloy MachineInstr *NewMI = MF.CloneMachineInstr(&*I); 130*790a779fSJames Molloy updateInstruction(NewMI, false, MaxStageCount, 0, VRMap); 131*790a779fSJames Molloy KernelBB->push_back(NewMI); 132*790a779fSJames Molloy InstrMap[NewMI] = &*I; 133*790a779fSJames Molloy } 134*790a779fSJames Molloy 135*790a779fSJames Molloy KernelBB->transferSuccessors(BB); 136*790a779fSJames Molloy KernelBB->replaceSuccessor(BB, KernelBB); 137*790a779fSJames Molloy 138*790a779fSJames Molloy generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, 139*790a779fSJames Molloy InstrMap, MaxStageCount, MaxStageCount, false); 140*790a779fSJames Molloy generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, 141*790a779fSJames Molloy MaxStageCount, MaxStageCount, false); 142*790a779fSJames Molloy 143*790a779fSJames Molloy LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); 144*790a779fSJames Molloy 145*790a779fSJames Molloy SmallVector<MachineBasicBlock *, 4> EpilogBBs; 146*790a779fSJames Molloy // Generate the epilog instructions to complete the pipeline. 147*790a779fSJames Molloy generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs); 148*790a779fSJames Molloy 149*790a779fSJames Molloy // We need this step because the register allocation doesn't handle some 150*790a779fSJames Molloy // situations well, so we insert copies to help out. 151*790a779fSJames Molloy splitLifetimes(KernelBB, EpilogBBs); 152*790a779fSJames Molloy 153*790a779fSJames Molloy // Remove dead instructions due to loop induction variables. 154*790a779fSJames Molloy removeDeadInstructions(KernelBB, EpilogBBs); 155*790a779fSJames Molloy 156*790a779fSJames Molloy // Add branches between prolog and epilog blocks. 157*790a779fSJames Molloy addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap); 158*790a779fSJames Molloy 159*790a779fSJames Molloy // Remove the original loop since it's no longer referenced. 160*790a779fSJames Molloy for (auto &I : *BB) 161*790a779fSJames Molloy LIS.RemoveMachineInstrFromMaps(I); 162*790a779fSJames Molloy BB->clear(); 163*790a779fSJames Molloy BB->eraseFromParent(); 164*790a779fSJames Molloy 165*790a779fSJames Molloy delete[] VRMap; 166*790a779fSJames Molloy } 167*790a779fSJames Molloy 168*790a779fSJames Molloy /// Generate the pipeline prolog code. 169*790a779fSJames Molloy void ModuloScheduleExpander::generateProlog(unsigned LastStage, 170*790a779fSJames Molloy MachineBasicBlock *KernelBB, 171*790a779fSJames Molloy ValueMapTy *VRMap, 172*790a779fSJames Molloy MBBVectorTy &PrologBBs) { 173*790a779fSJames Molloy MachineBasicBlock *PredBB = Preheader; 174*790a779fSJames Molloy InstrMapTy InstrMap; 175*790a779fSJames Molloy 176*790a779fSJames Molloy // Generate a basic block for each stage, not including the last stage, 177*790a779fSJames Molloy // which will be generated in the kernel. Each basic block may contain 178*790a779fSJames Molloy // instructions from multiple stages/iterations. 179*790a779fSJames Molloy for (unsigned i = 0; i < LastStage; ++i) { 180*790a779fSJames Molloy // Create and insert the prolog basic block prior to the original loop 181*790a779fSJames Molloy // basic block. The original loop is removed later. 182*790a779fSJames Molloy MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); 183*790a779fSJames Molloy PrologBBs.push_back(NewBB); 184*790a779fSJames Molloy MF.insert(BB->getIterator(), NewBB); 185*790a779fSJames Molloy NewBB->transferSuccessors(PredBB); 186*790a779fSJames Molloy PredBB->addSuccessor(NewBB); 187*790a779fSJames Molloy PredBB = NewBB; 188*790a779fSJames Molloy 189*790a779fSJames Molloy // Generate instructions for each appropriate stage. Process instructions 190*790a779fSJames Molloy // in original program order. 191*790a779fSJames Molloy for (int StageNum = i; StageNum >= 0; --StageNum) { 192*790a779fSJames Molloy for (MachineBasicBlock::iterator BBI = BB->instr_begin(), 193*790a779fSJames Molloy BBE = BB->getFirstTerminator(); 194*790a779fSJames Molloy BBI != BBE; ++BBI) { 195*790a779fSJames Molloy if (Schedule.getStage(&*BBI) == StageNum) { 196*790a779fSJames Molloy if (BBI->isPHI()) 197*790a779fSJames Molloy continue; 198*790a779fSJames Molloy MachineInstr *NewMI = 199*790a779fSJames Molloy cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum); 200*790a779fSJames Molloy updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap); 201*790a779fSJames Molloy NewBB->push_back(NewMI); 202*790a779fSJames Molloy InstrMap[NewMI] = &*BBI; 203*790a779fSJames Molloy } 204*790a779fSJames Molloy } 205*790a779fSJames Molloy } 206*790a779fSJames Molloy rewritePhiValues(NewBB, i, VRMap, InstrMap); 207*790a779fSJames Molloy LLVM_DEBUG({ 208*790a779fSJames Molloy dbgs() << "prolog:\n"; 209*790a779fSJames Molloy NewBB->dump(); 210*790a779fSJames Molloy }); 211*790a779fSJames Molloy } 212*790a779fSJames Molloy 213*790a779fSJames Molloy PredBB->replaceSuccessor(BB, KernelBB); 214*790a779fSJames Molloy 215*790a779fSJames Molloy // Check if we need to remove the branch from the preheader to the original 216*790a779fSJames Molloy // loop, and replace it with a branch to the new loop. 217*790a779fSJames Molloy unsigned numBranches = TII->removeBranch(*Preheader); 218*790a779fSJames Molloy if (numBranches) { 219*790a779fSJames Molloy SmallVector<MachineOperand, 0> Cond; 220*790a779fSJames Molloy TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc()); 221*790a779fSJames Molloy } 222*790a779fSJames Molloy } 223*790a779fSJames Molloy 224*790a779fSJames Molloy /// Generate the pipeline epilog code. The epilog code finishes the iterations 225*790a779fSJames Molloy /// that were started in either the prolog or the kernel. We create a basic 226*790a779fSJames Molloy /// block for each stage that needs to complete. 227*790a779fSJames Molloy void ModuloScheduleExpander::generateEpilog(unsigned LastStage, 228*790a779fSJames Molloy MachineBasicBlock *KernelBB, 229*790a779fSJames Molloy ValueMapTy *VRMap, 230*790a779fSJames Molloy MBBVectorTy &EpilogBBs, 231*790a779fSJames Molloy MBBVectorTy &PrologBBs) { 232*790a779fSJames Molloy // We need to change the branch from the kernel to the first epilog block, so 233*790a779fSJames Molloy // this call to analyze branch uses the kernel rather than the original BB. 234*790a779fSJames Molloy MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 235*790a779fSJames Molloy SmallVector<MachineOperand, 4> Cond; 236*790a779fSJames Molloy bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond); 237*790a779fSJames Molloy assert(!checkBranch && "generateEpilog must be able to analyze the branch"); 238*790a779fSJames Molloy if (checkBranch) 239*790a779fSJames Molloy return; 240*790a779fSJames Molloy 241*790a779fSJames Molloy MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin(); 242*790a779fSJames Molloy if (*LoopExitI == KernelBB) 243*790a779fSJames Molloy ++LoopExitI; 244*790a779fSJames Molloy assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor"); 245*790a779fSJames Molloy MachineBasicBlock *LoopExitBB = *LoopExitI; 246*790a779fSJames Molloy 247*790a779fSJames Molloy MachineBasicBlock *PredBB = KernelBB; 248*790a779fSJames Molloy MachineBasicBlock *EpilogStart = LoopExitBB; 249*790a779fSJames Molloy InstrMapTy InstrMap; 250*790a779fSJames Molloy 251*790a779fSJames Molloy // Generate a basic block for each stage, not including the last stage, 252*790a779fSJames Molloy // which was generated for the kernel. Each basic block may contain 253*790a779fSJames Molloy // instructions from multiple stages/iterations. 254*790a779fSJames Molloy int EpilogStage = LastStage + 1; 255*790a779fSJames Molloy for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) { 256*790a779fSJames Molloy MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(); 257*790a779fSJames Molloy EpilogBBs.push_back(NewBB); 258*790a779fSJames Molloy MF.insert(BB->getIterator(), NewBB); 259*790a779fSJames Molloy 260*790a779fSJames Molloy PredBB->replaceSuccessor(LoopExitBB, NewBB); 261*790a779fSJames Molloy NewBB->addSuccessor(LoopExitBB); 262*790a779fSJames Molloy 263*790a779fSJames Molloy if (EpilogStart == LoopExitBB) 264*790a779fSJames Molloy EpilogStart = NewBB; 265*790a779fSJames Molloy 266*790a779fSJames Molloy // Add instructions to the epilog depending on the current block. 267*790a779fSJames Molloy // Process instructions in original program order. 268*790a779fSJames Molloy for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) { 269*790a779fSJames Molloy for (auto &BBI : *BB) { 270*790a779fSJames Molloy if (BBI.isPHI()) 271*790a779fSJames Molloy continue; 272*790a779fSJames Molloy MachineInstr *In = &BBI; 273*790a779fSJames Molloy if ((unsigned)Schedule.getStage(In) == StageNum) { 274*790a779fSJames Molloy // Instructions with memoperands in the epilog are updated with 275*790a779fSJames Molloy // conservative values. 276*790a779fSJames Molloy MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0); 277*790a779fSJames Molloy updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap); 278*790a779fSJames Molloy NewBB->push_back(NewMI); 279*790a779fSJames Molloy InstrMap[NewMI] = In; 280*790a779fSJames Molloy } 281*790a779fSJames Molloy } 282*790a779fSJames Molloy } 283*790a779fSJames Molloy generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, 284*790a779fSJames Molloy InstrMap, LastStage, EpilogStage, i == 1); 285*790a779fSJames Molloy generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, 286*790a779fSJames Molloy LastStage, EpilogStage, i == 1); 287*790a779fSJames Molloy PredBB = NewBB; 288*790a779fSJames Molloy 289*790a779fSJames Molloy LLVM_DEBUG({ 290*790a779fSJames Molloy dbgs() << "epilog:\n"; 291*790a779fSJames Molloy NewBB->dump(); 292*790a779fSJames Molloy }); 293*790a779fSJames Molloy } 294*790a779fSJames Molloy 295*790a779fSJames Molloy // Fix any Phi nodes in the loop exit block. 296*790a779fSJames Molloy LoopExitBB->replacePhiUsesWith(BB, PredBB); 297*790a779fSJames Molloy 298*790a779fSJames Molloy // Create a branch to the new epilog from the kernel. 299*790a779fSJames Molloy // Remove the original branch and add a new branch to the epilog. 300*790a779fSJames Molloy TII->removeBranch(*KernelBB); 301*790a779fSJames Molloy TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); 302*790a779fSJames Molloy // Add a branch to the loop exit. 303*790a779fSJames Molloy if (EpilogBBs.size() > 0) { 304*790a779fSJames Molloy MachineBasicBlock *LastEpilogBB = EpilogBBs.back(); 305*790a779fSJames Molloy SmallVector<MachineOperand, 4> Cond1; 306*790a779fSJames Molloy TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc()); 307*790a779fSJames Molloy } 308*790a779fSJames Molloy } 309*790a779fSJames Molloy 310*790a779fSJames Molloy /// Replace all uses of FromReg that appear outside the specified 311*790a779fSJames Molloy /// basic block with ToReg. 312*790a779fSJames Molloy static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, 313*790a779fSJames Molloy MachineBasicBlock *MBB, 314*790a779fSJames Molloy MachineRegisterInfo &MRI, 315*790a779fSJames Molloy LiveIntervals &LIS) { 316*790a779fSJames Molloy for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg), 317*790a779fSJames Molloy E = MRI.use_end(); 318*790a779fSJames Molloy I != E;) { 319*790a779fSJames Molloy MachineOperand &O = *I; 320*790a779fSJames Molloy ++I; 321*790a779fSJames Molloy if (O.getParent()->getParent() != MBB) 322*790a779fSJames Molloy O.setReg(ToReg); 323*790a779fSJames Molloy } 324*790a779fSJames Molloy if (!LIS.hasInterval(ToReg)) 325*790a779fSJames Molloy LIS.createEmptyInterval(ToReg); 326*790a779fSJames Molloy } 327*790a779fSJames Molloy 328*790a779fSJames Molloy /// Return true if the register has a use that occurs outside the 329*790a779fSJames Molloy /// specified loop. 330*790a779fSJames Molloy static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, 331*790a779fSJames Molloy MachineRegisterInfo &MRI) { 332*790a779fSJames Molloy for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), 333*790a779fSJames Molloy E = MRI.use_end(); 334*790a779fSJames Molloy I != E; ++I) 335*790a779fSJames Molloy if (I->getParent()->getParent() != BB) 336*790a779fSJames Molloy return true; 337*790a779fSJames Molloy return false; 338*790a779fSJames Molloy } 339*790a779fSJames Molloy 340*790a779fSJames Molloy /// Generate Phis for the specific block in the generated pipelined code. 341*790a779fSJames Molloy /// This function looks at the Phis from the original code to guide the 342*790a779fSJames Molloy /// creation of new Phis. 343*790a779fSJames Molloy void ModuloScheduleExpander::generateExistingPhis( 344*790a779fSJames Molloy MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, 345*790a779fSJames Molloy MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, 346*790a779fSJames Molloy unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { 347*790a779fSJames Molloy // Compute the stage number for the initial value of the Phi, which 348*790a779fSJames Molloy // comes from the prolog. The prolog to use depends on to which kernel/ 349*790a779fSJames Molloy // epilog that we're adding the Phi. 350*790a779fSJames Molloy unsigned PrologStage = 0; 351*790a779fSJames Molloy unsigned PrevStage = 0; 352*790a779fSJames Molloy bool InKernel = (LastStageNum == CurStageNum); 353*790a779fSJames Molloy if (InKernel) { 354*790a779fSJames Molloy PrologStage = LastStageNum - 1; 355*790a779fSJames Molloy PrevStage = CurStageNum; 356*790a779fSJames Molloy } else { 357*790a779fSJames Molloy PrologStage = LastStageNum - (CurStageNum - LastStageNum); 358*790a779fSJames Molloy PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1; 359*790a779fSJames Molloy } 360*790a779fSJames Molloy 361*790a779fSJames Molloy for (MachineBasicBlock::iterator BBI = BB->instr_begin(), 362*790a779fSJames Molloy BBE = BB->getFirstNonPHI(); 363*790a779fSJames Molloy BBI != BBE; ++BBI) { 364*790a779fSJames Molloy Register Def = BBI->getOperand(0).getReg(); 365*790a779fSJames Molloy 366*790a779fSJames Molloy unsigned InitVal = 0; 367*790a779fSJames Molloy unsigned LoopVal = 0; 368*790a779fSJames Molloy getPhiRegs(*BBI, BB, InitVal, LoopVal); 369*790a779fSJames Molloy 370*790a779fSJames Molloy unsigned PhiOp1 = 0; 371*790a779fSJames Molloy // The Phi value from the loop body typically is defined in the loop, but 372*790a779fSJames Molloy // not always. So, we need to check if the value is defined in the loop. 373*790a779fSJames Molloy unsigned PhiOp2 = LoopVal; 374*790a779fSJames Molloy if (VRMap[LastStageNum].count(LoopVal)) 375*790a779fSJames Molloy PhiOp2 = VRMap[LastStageNum][LoopVal]; 376*790a779fSJames Molloy 377*790a779fSJames Molloy int StageScheduled = Schedule.getStage(&*BBI); 378*790a779fSJames Molloy int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal)); 379*790a779fSJames Molloy unsigned NumStages = getStagesForReg(Def, CurStageNum); 380*790a779fSJames Molloy if (NumStages == 0) { 381*790a779fSJames Molloy // We don't need to generate a Phi anymore, but we need to rename any uses 382*790a779fSJames Molloy // of the Phi value. 383*790a779fSJames Molloy unsigned NewReg = VRMap[PrevStage][LoopVal]; 384*790a779fSJames Molloy rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def, 385*790a779fSJames Molloy InitVal, NewReg); 386*790a779fSJames Molloy if (VRMap[CurStageNum].count(LoopVal)) 387*790a779fSJames Molloy VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal]; 388*790a779fSJames Molloy } 389*790a779fSJames Molloy // Adjust the number of Phis needed depending on the number of prologs left, 390*790a779fSJames Molloy // and the distance from where the Phi is first scheduled. The number of 391*790a779fSJames Molloy // Phis cannot exceed the number of prolog stages. Each stage can 392*790a779fSJames Molloy // potentially define two values. 393*790a779fSJames Molloy unsigned MaxPhis = PrologStage + 2; 394*790a779fSJames Molloy if (!InKernel && (int)PrologStage <= LoopValStage) 395*790a779fSJames Molloy MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); 396*790a779fSJames Molloy unsigned NumPhis = std::min(NumStages, MaxPhis); 397*790a779fSJames Molloy 398*790a779fSJames Molloy unsigned NewReg = 0; 399*790a779fSJames Molloy unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled; 400*790a779fSJames Molloy // In the epilog, we may need to look back one stage to get the correct 401*790a779fSJames Molloy // Phi name because the epilog and prolog blocks execute the same stage. 402*790a779fSJames Molloy // The correct name is from the previous block only when the Phi has 403*790a779fSJames Molloy // been completely scheduled prior to the epilog, and Phi value is not 404*790a779fSJames Molloy // needed in multiple stages. 405*790a779fSJames Molloy int StageDiff = 0; 406*790a779fSJames Molloy if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 && 407*790a779fSJames Molloy NumPhis == 1) 408*790a779fSJames Molloy StageDiff = 1; 409*790a779fSJames Molloy // Adjust the computations below when the phi and the loop definition 410*790a779fSJames Molloy // are scheduled in different stages. 411*790a779fSJames Molloy if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage) 412*790a779fSJames Molloy StageDiff = StageScheduled - LoopValStage; 413*790a779fSJames Molloy for (unsigned np = 0; np < NumPhis; ++np) { 414*790a779fSJames Molloy // If the Phi hasn't been scheduled, then use the initial Phi operand 415*790a779fSJames Molloy // value. Otherwise, use the scheduled version of the instruction. This 416*790a779fSJames Molloy // is a little complicated when a Phi references another Phi. 417*790a779fSJames Molloy if (np > PrologStage || StageScheduled >= (int)LastStageNum) 418*790a779fSJames Molloy PhiOp1 = InitVal; 419*790a779fSJames Molloy // Check if the Phi has already been scheduled in a prolog stage. 420*790a779fSJames Molloy else if (PrologStage >= AccessStage + StageDiff + np && 421*790a779fSJames Molloy VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0) 422*790a779fSJames Molloy PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal]; 423*790a779fSJames Molloy // Check if the Phi has already been scheduled, but the loop instruction 424*790a779fSJames Molloy // is either another Phi, or doesn't occur in the loop. 425*790a779fSJames Molloy else if (PrologStage >= AccessStage + StageDiff + np) { 426*790a779fSJames Molloy // If the Phi references another Phi, we need to examine the other 427*790a779fSJames Molloy // Phi to get the correct value. 428*790a779fSJames Molloy PhiOp1 = LoopVal; 429*790a779fSJames Molloy MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1); 430*790a779fSJames Molloy int Indirects = 1; 431*790a779fSJames Molloy while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) { 432*790a779fSJames Molloy int PhiStage = Schedule.getStage(InstOp1); 433*790a779fSJames Molloy if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects) 434*790a779fSJames Molloy PhiOp1 = getInitPhiReg(*InstOp1, BB); 435*790a779fSJames Molloy else 436*790a779fSJames Molloy PhiOp1 = getLoopPhiReg(*InstOp1, BB); 437*790a779fSJames Molloy InstOp1 = MRI.getVRegDef(PhiOp1); 438*790a779fSJames Molloy int PhiOpStage = Schedule.getStage(InstOp1); 439*790a779fSJames Molloy int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0); 440*790a779fSJames Molloy if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np && 441*790a779fSJames Molloy VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) { 442*790a779fSJames Molloy PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1]; 443*790a779fSJames Molloy break; 444*790a779fSJames Molloy } 445*790a779fSJames Molloy ++Indirects; 446*790a779fSJames Molloy } 447*790a779fSJames Molloy } else 448*790a779fSJames Molloy PhiOp1 = InitVal; 449*790a779fSJames Molloy // If this references a generated Phi in the kernel, get the Phi operand 450*790a779fSJames Molloy // from the incoming block. 451*790a779fSJames Molloy if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) 452*790a779fSJames Molloy if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) 453*790a779fSJames Molloy PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); 454*790a779fSJames Molloy 455*790a779fSJames Molloy MachineInstr *PhiInst = MRI.getVRegDef(LoopVal); 456*790a779fSJames Molloy bool LoopDefIsPhi = PhiInst && PhiInst->isPHI(); 457*790a779fSJames Molloy // In the epilog, a map lookup is needed to get the value from the kernel, 458*790a779fSJames Molloy // or previous epilog block. How is does this depends on if the 459*790a779fSJames Molloy // instruction is scheduled in the previous block. 460*790a779fSJames Molloy if (!InKernel) { 461*790a779fSJames Molloy int StageDiffAdj = 0; 462*790a779fSJames Molloy if (LoopValStage != -1 && StageScheduled > LoopValStage) 463*790a779fSJames Molloy StageDiffAdj = StageScheduled - LoopValStage; 464*790a779fSJames Molloy // Use the loop value defined in the kernel, unless the kernel 465*790a779fSJames Molloy // contains the last definition of the Phi. 466*790a779fSJames Molloy if (np == 0 && PrevStage == LastStageNum && 467*790a779fSJames Molloy (StageScheduled != 0 || LoopValStage != 0) && 468*790a779fSJames Molloy VRMap[PrevStage - StageDiffAdj].count(LoopVal)) 469*790a779fSJames Molloy PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal]; 470*790a779fSJames Molloy // Use the value defined by the Phi. We add one because we switch 471*790a779fSJames Molloy // from looking at the loop value to the Phi definition. 472*790a779fSJames Molloy else if (np > 0 && PrevStage == LastStageNum && 473*790a779fSJames Molloy VRMap[PrevStage - np + 1].count(Def)) 474*790a779fSJames Molloy PhiOp2 = VRMap[PrevStage - np + 1][Def]; 475*790a779fSJames Molloy // Use the loop value defined in the kernel. 476*790a779fSJames Molloy else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 && 477*790a779fSJames Molloy VRMap[PrevStage - StageDiffAdj - np].count(LoopVal)) 478*790a779fSJames Molloy PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal]; 479*790a779fSJames Molloy // Use the value defined by the Phi, unless we're generating the first 480*790a779fSJames Molloy // epilog and the Phi refers to a Phi in a different stage. 481*790a779fSJames Molloy else if (VRMap[PrevStage - np].count(Def) && 482*790a779fSJames Molloy (!LoopDefIsPhi || (PrevStage != LastStageNum) || 483*790a779fSJames Molloy (LoopValStage == StageScheduled))) 484*790a779fSJames Molloy PhiOp2 = VRMap[PrevStage - np][Def]; 485*790a779fSJames Molloy } 486*790a779fSJames Molloy 487*790a779fSJames Molloy // Check if we can reuse an existing Phi. This occurs when a Phi 488*790a779fSJames Molloy // references another Phi, and the other Phi is scheduled in an 489*790a779fSJames Molloy // earlier stage. We can try to reuse an existing Phi up until the last 490*790a779fSJames Molloy // stage of the current Phi. 491*790a779fSJames Molloy if (LoopDefIsPhi) { 492*790a779fSJames Molloy if (static_cast<int>(PrologStage - np) >= StageScheduled) { 493*790a779fSJames Molloy int LVNumStages = getStagesForPhi(LoopVal); 494*790a779fSJames Molloy int StageDiff = (StageScheduled - LoopValStage); 495*790a779fSJames Molloy LVNumStages -= StageDiff; 496*790a779fSJames Molloy // Make sure the loop value Phi has been processed already. 497*790a779fSJames Molloy if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { 498*790a779fSJames Molloy NewReg = PhiOp2; 499*790a779fSJames Molloy unsigned ReuseStage = CurStageNum; 500*790a779fSJames Molloy if (isLoopCarried(*PhiInst)) 501*790a779fSJames Molloy ReuseStage -= LVNumStages; 502*790a779fSJames Molloy // Check if the Phi to reuse has been generated yet. If not, then 503*790a779fSJames Molloy // there is nothing to reuse. 504*790a779fSJames Molloy if (VRMap[ReuseStage - np].count(LoopVal)) { 505*790a779fSJames Molloy NewReg = VRMap[ReuseStage - np][LoopVal]; 506*790a779fSJames Molloy 507*790a779fSJames Molloy rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, 508*790a779fSJames Molloy Def, NewReg); 509*790a779fSJames Molloy // Update the map with the new Phi name. 510*790a779fSJames Molloy VRMap[CurStageNum - np][Def] = NewReg; 511*790a779fSJames Molloy PhiOp2 = NewReg; 512*790a779fSJames Molloy if (VRMap[LastStageNum - np - 1].count(LoopVal)) 513*790a779fSJames Molloy PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; 514*790a779fSJames Molloy 515*790a779fSJames Molloy if (IsLast && np == NumPhis - 1) 516*790a779fSJames Molloy replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); 517*790a779fSJames Molloy continue; 518*790a779fSJames Molloy } 519*790a779fSJames Molloy } 520*790a779fSJames Molloy } 521*790a779fSJames Molloy if (InKernel && StageDiff > 0 && 522*790a779fSJames Molloy VRMap[CurStageNum - StageDiff - np].count(LoopVal)) 523*790a779fSJames Molloy PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal]; 524*790a779fSJames Molloy } 525*790a779fSJames Molloy 526*790a779fSJames Molloy const TargetRegisterClass *RC = MRI.getRegClass(Def); 527*790a779fSJames Molloy NewReg = MRI.createVirtualRegister(RC); 528*790a779fSJames Molloy 529*790a779fSJames Molloy MachineInstrBuilder NewPhi = 530*790a779fSJames Molloy BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), 531*790a779fSJames Molloy TII->get(TargetOpcode::PHI), NewReg); 532*790a779fSJames Molloy NewPhi.addReg(PhiOp1).addMBB(BB1); 533*790a779fSJames Molloy NewPhi.addReg(PhiOp2).addMBB(BB2); 534*790a779fSJames Molloy if (np == 0) 535*790a779fSJames Molloy InstrMap[NewPhi] = &*BBI; 536*790a779fSJames Molloy 537*790a779fSJames Molloy // We define the Phis after creating the new pipelined code, so 538*790a779fSJames Molloy // we need to rename the Phi values in scheduled instructions. 539*790a779fSJames Molloy 540*790a779fSJames Molloy unsigned PrevReg = 0; 541*790a779fSJames Molloy if (InKernel && VRMap[PrevStage - np].count(LoopVal)) 542*790a779fSJames Molloy PrevReg = VRMap[PrevStage - np][LoopVal]; 543*790a779fSJames Molloy rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, 544*790a779fSJames Molloy NewReg, PrevReg); 545*790a779fSJames Molloy // If the Phi has been scheduled, use the new name for rewriting. 546*790a779fSJames Molloy if (VRMap[CurStageNum - np].count(Def)) { 547*790a779fSJames Molloy unsigned R = VRMap[CurStageNum - np][Def]; 548*790a779fSJames Molloy rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R, 549*790a779fSJames Molloy NewReg); 550*790a779fSJames Molloy } 551*790a779fSJames Molloy 552*790a779fSJames Molloy // Check if we need to rename any uses that occurs after the loop. The 553*790a779fSJames Molloy // register to replace depends on whether the Phi is scheduled in the 554*790a779fSJames Molloy // epilog. 555*790a779fSJames Molloy if (IsLast && np == NumPhis - 1) 556*790a779fSJames Molloy replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); 557*790a779fSJames Molloy 558*790a779fSJames Molloy // In the kernel, a dependent Phi uses the value from this Phi. 559*790a779fSJames Molloy if (InKernel) 560*790a779fSJames Molloy PhiOp2 = NewReg; 561*790a779fSJames Molloy 562*790a779fSJames Molloy // Update the map with the new Phi name. 563*790a779fSJames Molloy VRMap[CurStageNum - np][Def] = NewReg; 564*790a779fSJames Molloy } 565*790a779fSJames Molloy 566*790a779fSJames Molloy while (NumPhis++ < NumStages) { 567*790a779fSJames Molloy rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def, 568*790a779fSJames Molloy NewReg, 0); 569*790a779fSJames Molloy } 570*790a779fSJames Molloy 571*790a779fSJames Molloy // Check if we need to rename a Phi that has been eliminated due to 572*790a779fSJames Molloy // scheduling. 573*790a779fSJames Molloy if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal)) 574*790a779fSJames Molloy replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS); 575*790a779fSJames Molloy } 576*790a779fSJames Molloy } 577*790a779fSJames Molloy 578*790a779fSJames Molloy /// Generate Phis for the specified block in the generated pipelined code. 579*790a779fSJames Molloy /// These are new Phis needed because the definition is scheduled after the 580*790a779fSJames Molloy /// use in the pipelined sequence. 581*790a779fSJames Molloy void ModuloScheduleExpander::generatePhis( 582*790a779fSJames Molloy MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, 583*790a779fSJames Molloy MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, 584*790a779fSJames Molloy unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { 585*790a779fSJames Molloy // Compute the stage number that contains the initial Phi value, and 586*790a779fSJames Molloy // the Phi from the previous stage. 587*790a779fSJames Molloy unsigned PrologStage = 0; 588*790a779fSJames Molloy unsigned PrevStage = 0; 589*790a779fSJames Molloy unsigned StageDiff = CurStageNum - LastStageNum; 590*790a779fSJames Molloy bool InKernel = (StageDiff == 0); 591*790a779fSJames Molloy if (InKernel) { 592*790a779fSJames Molloy PrologStage = LastStageNum - 1; 593*790a779fSJames Molloy PrevStage = CurStageNum; 594*790a779fSJames Molloy } else { 595*790a779fSJames Molloy PrologStage = LastStageNum - StageDiff; 596*790a779fSJames Molloy PrevStage = LastStageNum + StageDiff - 1; 597*790a779fSJames Molloy } 598*790a779fSJames Molloy 599*790a779fSJames Molloy for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(), 600*790a779fSJames Molloy BBE = BB->instr_end(); 601*790a779fSJames Molloy BBI != BBE; ++BBI) { 602*790a779fSJames Molloy for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) { 603*790a779fSJames Molloy MachineOperand &MO = BBI->getOperand(i); 604*790a779fSJames Molloy if (!MO.isReg() || !MO.isDef() || 605*790a779fSJames Molloy !Register::isVirtualRegister(MO.getReg())) 606*790a779fSJames Molloy continue; 607*790a779fSJames Molloy 608*790a779fSJames Molloy int StageScheduled = Schedule.getStage(&*BBI); 609*790a779fSJames Molloy assert(StageScheduled != -1 && "Expecting scheduled instruction."); 610*790a779fSJames Molloy Register Def = MO.getReg(); 611*790a779fSJames Molloy unsigned NumPhis = getStagesForReg(Def, CurStageNum); 612*790a779fSJames Molloy // An instruction scheduled in stage 0 and is used after the loop 613*790a779fSJames Molloy // requires a phi in the epilog for the last definition from either 614*790a779fSJames Molloy // the kernel or prolog. 615*790a779fSJames Molloy if (!InKernel && NumPhis == 0 && StageScheduled == 0 && 616*790a779fSJames Molloy hasUseAfterLoop(Def, BB, MRI)) 617*790a779fSJames Molloy NumPhis = 1; 618*790a779fSJames Molloy if (!InKernel && (unsigned)StageScheduled > PrologStage) 619*790a779fSJames Molloy continue; 620*790a779fSJames Molloy 621*790a779fSJames Molloy unsigned PhiOp2 = VRMap[PrevStage][Def]; 622*790a779fSJames Molloy if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) 623*790a779fSJames Molloy if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) 624*790a779fSJames Molloy PhiOp2 = getLoopPhiReg(*InstOp2, BB2); 625*790a779fSJames Molloy // The number of Phis can't exceed the number of prolog stages. The 626*790a779fSJames Molloy // prolog stage number is zero based. 627*790a779fSJames Molloy if (NumPhis > PrologStage + 1 - StageScheduled) 628*790a779fSJames Molloy NumPhis = PrologStage + 1 - StageScheduled; 629*790a779fSJames Molloy for (unsigned np = 0; np < NumPhis; ++np) { 630*790a779fSJames Molloy unsigned PhiOp1 = VRMap[PrologStage][Def]; 631*790a779fSJames Molloy if (np <= PrologStage) 632*790a779fSJames Molloy PhiOp1 = VRMap[PrologStage - np][Def]; 633*790a779fSJames Molloy if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { 634*790a779fSJames Molloy if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) 635*790a779fSJames Molloy PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); 636*790a779fSJames Molloy if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) 637*790a779fSJames Molloy PhiOp1 = getInitPhiReg(*InstOp1, NewBB); 638*790a779fSJames Molloy } 639*790a779fSJames Molloy if (!InKernel) 640*790a779fSJames Molloy PhiOp2 = VRMap[PrevStage - np][Def]; 641*790a779fSJames Molloy 642*790a779fSJames Molloy const TargetRegisterClass *RC = MRI.getRegClass(Def); 643*790a779fSJames Molloy Register NewReg = MRI.createVirtualRegister(RC); 644*790a779fSJames Molloy 645*790a779fSJames Molloy MachineInstrBuilder NewPhi = 646*790a779fSJames Molloy BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), 647*790a779fSJames Molloy TII->get(TargetOpcode::PHI), NewReg); 648*790a779fSJames Molloy NewPhi.addReg(PhiOp1).addMBB(BB1); 649*790a779fSJames Molloy NewPhi.addReg(PhiOp2).addMBB(BB2); 650*790a779fSJames Molloy if (np == 0) 651*790a779fSJames Molloy InstrMap[NewPhi] = &*BBI; 652*790a779fSJames Molloy 653*790a779fSJames Molloy // Rewrite uses and update the map. The actions depend upon whether 654*790a779fSJames Molloy // we generating code for the kernel or epilog blocks. 655*790a779fSJames Molloy if (InKernel) { 656*790a779fSJames Molloy rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1, 657*790a779fSJames Molloy NewReg); 658*790a779fSJames Molloy rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2, 659*790a779fSJames Molloy NewReg); 660*790a779fSJames Molloy 661*790a779fSJames Molloy PhiOp2 = NewReg; 662*790a779fSJames Molloy VRMap[PrevStage - np - 1][Def] = NewReg; 663*790a779fSJames Molloy } else { 664*790a779fSJames Molloy VRMap[CurStageNum - np][Def] = NewReg; 665*790a779fSJames Molloy if (np == NumPhis - 1) 666*790a779fSJames Molloy rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, 667*790a779fSJames Molloy NewReg); 668*790a779fSJames Molloy } 669*790a779fSJames Molloy if (IsLast && np == NumPhis - 1) 670*790a779fSJames Molloy replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); 671*790a779fSJames Molloy } 672*790a779fSJames Molloy } 673*790a779fSJames Molloy } 674*790a779fSJames Molloy } 675*790a779fSJames Molloy 676*790a779fSJames Molloy /// Remove instructions that generate values with no uses. 677*790a779fSJames Molloy /// Typically, these are induction variable operations that generate values 678*790a779fSJames Molloy /// used in the loop itself. A dead instruction has a definition with 679*790a779fSJames Molloy /// no uses, or uses that occur in the original loop only. 680*790a779fSJames Molloy void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB, 681*790a779fSJames Molloy MBBVectorTy &EpilogBBs) { 682*790a779fSJames Molloy // For each epilog block, check that the value defined by each instruction 683*790a779fSJames Molloy // is used. If not, delete it. 684*790a779fSJames Molloy for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(), 685*790a779fSJames Molloy MBE = EpilogBBs.rend(); 686*790a779fSJames Molloy MBB != MBE; ++MBB) 687*790a779fSJames Molloy for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(), 688*790a779fSJames Molloy ME = (*MBB)->instr_rend(); 689*790a779fSJames Molloy MI != ME;) { 690*790a779fSJames Molloy // From DeadMachineInstructionElem. Don't delete inline assembly. 691*790a779fSJames Molloy if (MI->isInlineAsm()) { 692*790a779fSJames Molloy ++MI; 693*790a779fSJames Molloy continue; 694*790a779fSJames Molloy } 695*790a779fSJames Molloy bool SawStore = false; 696*790a779fSJames Molloy // Check if it's safe to remove the instruction due to side effects. 697*790a779fSJames Molloy // We can, and want to, remove Phis here. 698*790a779fSJames Molloy if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) { 699*790a779fSJames Molloy ++MI; 700*790a779fSJames Molloy continue; 701*790a779fSJames Molloy } 702*790a779fSJames Molloy bool used = true; 703*790a779fSJames Molloy for (MachineInstr::mop_iterator MOI = MI->operands_begin(), 704*790a779fSJames Molloy MOE = MI->operands_end(); 705*790a779fSJames Molloy MOI != MOE; ++MOI) { 706*790a779fSJames Molloy if (!MOI->isReg() || !MOI->isDef()) 707*790a779fSJames Molloy continue; 708*790a779fSJames Molloy Register reg = MOI->getReg(); 709*790a779fSJames Molloy // Assume physical registers are used, unless they are marked dead. 710*790a779fSJames Molloy if (Register::isPhysicalRegister(reg)) { 711*790a779fSJames Molloy used = !MOI->isDead(); 712*790a779fSJames Molloy if (used) 713*790a779fSJames Molloy break; 714*790a779fSJames Molloy continue; 715*790a779fSJames Molloy } 716*790a779fSJames Molloy unsigned realUses = 0; 717*790a779fSJames Molloy for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg), 718*790a779fSJames Molloy EI = MRI.use_end(); 719*790a779fSJames Molloy UI != EI; ++UI) { 720*790a779fSJames Molloy // Check if there are any uses that occur only in the original 721*790a779fSJames Molloy // loop. If so, that's not a real use. 722*790a779fSJames Molloy if (UI->getParent()->getParent() != BB) { 723*790a779fSJames Molloy realUses++; 724*790a779fSJames Molloy used = true; 725*790a779fSJames Molloy break; 726*790a779fSJames Molloy } 727*790a779fSJames Molloy } 728*790a779fSJames Molloy if (realUses > 0) 729*790a779fSJames Molloy break; 730*790a779fSJames Molloy used = false; 731*790a779fSJames Molloy } 732*790a779fSJames Molloy if (!used) { 733*790a779fSJames Molloy LIS.RemoveMachineInstrFromMaps(*MI); 734*790a779fSJames Molloy MI++->eraseFromParent(); 735*790a779fSJames Molloy continue; 736*790a779fSJames Molloy } 737*790a779fSJames Molloy ++MI; 738*790a779fSJames Molloy } 739*790a779fSJames Molloy // In the kernel block, check if we can remove a Phi that generates a value 740*790a779fSJames Molloy // used in an instruction removed in the epilog block. 741*790a779fSJames Molloy for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), 742*790a779fSJames Molloy BBE = KernelBB->getFirstNonPHI(); 743*790a779fSJames Molloy BBI != BBE;) { 744*790a779fSJames Molloy MachineInstr *MI = &*BBI; 745*790a779fSJames Molloy ++BBI; 746*790a779fSJames Molloy Register reg = MI->getOperand(0).getReg(); 747*790a779fSJames Molloy if (MRI.use_begin(reg) == MRI.use_end()) { 748*790a779fSJames Molloy LIS.RemoveMachineInstrFromMaps(*MI); 749*790a779fSJames Molloy MI->eraseFromParent(); 750*790a779fSJames Molloy } 751*790a779fSJames Molloy } 752*790a779fSJames Molloy } 753*790a779fSJames Molloy 754*790a779fSJames Molloy /// For loop carried definitions, we split the lifetime of a virtual register 755*790a779fSJames Molloy /// that has uses past the definition in the next iteration. A copy with a new 756*790a779fSJames Molloy /// virtual register is inserted before the definition, which helps with 757*790a779fSJames Molloy /// generating a better register assignment. 758*790a779fSJames Molloy /// 759*790a779fSJames Molloy /// v1 = phi(a, v2) v1 = phi(a, v2) 760*790a779fSJames Molloy /// v2 = phi(b, v3) v2 = phi(b, v3) 761*790a779fSJames Molloy /// v3 = .. v4 = copy v1 762*790a779fSJames Molloy /// .. = V1 v3 = .. 763*790a779fSJames Molloy /// .. = v4 764*790a779fSJames Molloy void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB, 765*790a779fSJames Molloy MBBVectorTy &EpilogBBs) { 766*790a779fSJames Molloy const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 767*790a779fSJames Molloy for (auto &PHI : KernelBB->phis()) { 768*790a779fSJames Molloy Register Def = PHI.getOperand(0).getReg(); 769*790a779fSJames Molloy // Check for any Phi definition that used as an operand of another Phi 770*790a779fSJames Molloy // in the same block. 771*790a779fSJames Molloy for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def), 772*790a779fSJames Molloy E = MRI.use_instr_end(); 773*790a779fSJames Molloy I != E; ++I) { 774*790a779fSJames Molloy if (I->isPHI() && I->getParent() == KernelBB) { 775*790a779fSJames Molloy // Get the loop carried definition. 776*790a779fSJames Molloy unsigned LCDef = getLoopPhiReg(PHI, KernelBB); 777*790a779fSJames Molloy if (!LCDef) 778*790a779fSJames Molloy continue; 779*790a779fSJames Molloy MachineInstr *MI = MRI.getVRegDef(LCDef); 780*790a779fSJames Molloy if (!MI || MI->getParent() != KernelBB || MI->isPHI()) 781*790a779fSJames Molloy continue; 782*790a779fSJames Molloy // Search through the rest of the block looking for uses of the Phi 783*790a779fSJames Molloy // definition. If one occurs, then split the lifetime. 784*790a779fSJames Molloy unsigned SplitReg = 0; 785*790a779fSJames Molloy for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI), 786*790a779fSJames Molloy KernelBB->instr_end())) 787*790a779fSJames Molloy if (BBJ.readsRegister(Def)) { 788*790a779fSJames Molloy // We split the lifetime when we find the first use. 789*790a779fSJames Molloy if (SplitReg == 0) { 790*790a779fSJames Molloy SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def)); 791*790a779fSJames Molloy BuildMI(*KernelBB, MI, MI->getDebugLoc(), 792*790a779fSJames Molloy TII->get(TargetOpcode::COPY), SplitReg) 793*790a779fSJames Molloy .addReg(Def); 794*790a779fSJames Molloy } 795*790a779fSJames Molloy BBJ.substituteRegister(Def, SplitReg, 0, *TRI); 796*790a779fSJames Molloy } 797*790a779fSJames Molloy if (!SplitReg) 798*790a779fSJames Molloy continue; 799*790a779fSJames Molloy // Search through each of the epilog blocks for any uses to be renamed. 800*790a779fSJames Molloy for (auto &Epilog : EpilogBBs) 801*790a779fSJames Molloy for (auto &I : *Epilog) 802*790a779fSJames Molloy if (I.readsRegister(Def)) 803*790a779fSJames Molloy I.substituteRegister(Def, SplitReg, 0, *TRI); 804*790a779fSJames Molloy break; 805*790a779fSJames Molloy } 806*790a779fSJames Molloy } 807*790a779fSJames Molloy } 808*790a779fSJames Molloy } 809*790a779fSJames Molloy 810*790a779fSJames Molloy /// Remove the incoming block from the Phis in a basic block. 811*790a779fSJames Molloy static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) { 812*790a779fSJames Molloy for (MachineInstr &MI : *BB) { 813*790a779fSJames Molloy if (!MI.isPHI()) 814*790a779fSJames Molloy break; 815*790a779fSJames Molloy for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) 816*790a779fSJames Molloy if (MI.getOperand(i + 1).getMBB() == Incoming) { 817*790a779fSJames Molloy MI.RemoveOperand(i + 1); 818*790a779fSJames Molloy MI.RemoveOperand(i); 819*790a779fSJames Molloy break; 820*790a779fSJames Molloy } 821*790a779fSJames Molloy } 822*790a779fSJames Molloy } 823*790a779fSJames Molloy 824*790a779fSJames Molloy /// Create branches from each prolog basic block to the appropriate epilog 825*790a779fSJames Molloy /// block. These edges are needed if the loop ends before reaching the 826*790a779fSJames Molloy /// kernel. 827*790a779fSJames Molloy void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, 828*790a779fSJames Molloy MBBVectorTy &PrologBBs, 829*790a779fSJames Molloy MachineBasicBlock *KernelBB, 830*790a779fSJames Molloy MBBVectorTy &EpilogBBs, 831*790a779fSJames Molloy ValueMapTy *VRMap) { 832*790a779fSJames Molloy assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch"); 833*790a779fSJames Molloy MachineInstr *IndVar; 834*790a779fSJames Molloy MachineInstr *Cmp; 835*790a779fSJames Molloy if (TII->analyzeLoop(*Schedule.getLoop(), IndVar, Cmp)) 836*790a779fSJames Molloy llvm_unreachable("Must be able to analyze loop!"); 837*790a779fSJames Molloy MachineBasicBlock *LastPro = KernelBB; 838*790a779fSJames Molloy MachineBasicBlock *LastEpi = KernelBB; 839*790a779fSJames Molloy 840*790a779fSJames Molloy // Start from the blocks connected to the kernel and work "out" 841*790a779fSJames Molloy // to the first prolog and the last epilog blocks. 842*790a779fSJames Molloy SmallVector<MachineInstr *, 4> PrevInsts; 843*790a779fSJames Molloy unsigned MaxIter = PrologBBs.size() - 1; 844*790a779fSJames Molloy unsigned LC = UINT_MAX; 845*790a779fSJames Molloy unsigned LCMin = UINT_MAX; 846*790a779fSJames Molloy for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) { 847*790a779fSJames Molloy // Add branches to the prolog that go to the corresponding 848*790a779fSJames Molloy // epilog, and the fall-thru prolog/kernel block. 849*790a779fSJames Molloy MachineBasicBlock *Prolog = PrologBBs[j]; 850*790a779fSJames Molloy MachineBasicBlock *Epilog = EpilogBBs[i]; 851*790a779fSJames Molloy // We've executed one iteration, so decrement the loop count and check for 852*790a779fSJames Molloy // the loop end. 853*790a779fSJames Molloy SmallVector<MachineOperand, 4> Cond; 854*790a779fSJames Molloy // Check if the LOOP0 has already been removed. If so, then there is no need 855*790a779fSJames Molloy // to reduce the trip count. 856*790a779fSJames Molloy if (LC != 0) 857*790a779fSJames Molloy LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond, 858*790a779fSJames Molloy PrevInsts, j, MaxIter); 859*790a779fSJames Molloy 860*790a779fSJames Molloy // Record the value of the first trip count, which is used to determine if 861*790a779fSJames Molloy // branches and blocks can be removed for constant trip counts. 862*790a779fSJames Molloy if (LCMin == UINT_MAX) 863*790a779fSJames Molloy LCMin = LC; 864*790a779fSJames Molloy 865*790a779fSJames Molloy unsigned numAdded = 0; 866*790a779fSJames Molloy if (Register::isVirtualRegister(LC)) { 867*790a779fSJames Molloy Prolog->addSuccessor(Epilog); 868*790a779fSJames Molloy numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc()); 869*790a779fSJames Molloy } else if (j >= LCMin) { 870*790a779fSJames Molloy Prolog->addSuccessor(Epilog); 871*790a779fSJames Molloy Prolog->removeSuccessor(LastPro); 872*790a779fSJames Molloy LastEpi->removeSuccessor(Epilog); 873*790a779fSJames Molloy numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc()); 874*790a779fSJames Molloy removePhis(Epilog, LastEpi); 875*790a779fSJames Molloy // Remove the blocks that are no longer referenced. 876*790a779fSJames Molloy if (LastPro != LastEpi) { 877*790a779fSJames Molloy LastEpi->clear(); 878*790a779fSJames Molloy LastEpi->eraseFromParent(); 879*790a779fSJames Molloy } 880*790a779fSJames Molloy LastPro->clear(); 881*790a779fSJames Molloy LastPro->eraseFromParent(); 882*790a779fSJames Molloy } else { 883*790a779fSJames Molloy numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc()); 884*790a779fSJames Molloy removePhis(Epilog, Prolog); 885*790a779fSJames Molloy } 886*790a779fSJames Molloy LastPro = Prolog; 887*790a779fSJames Molloy LastEpi = Epilog; 888*790a779fSJames Molloy for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(), 889*790a779fSJames Molloy E = Prolog->instr_rend(); 890*790a779fSJames Molloy I != E && numAdded > 0; ++I, --numAdded) 891*790a779fSJames Molloy updateInstruction(&*I, false, j, 0, VRMap); 892*790a779fSJames Molloy } 893*790a779fSJames Molloy } 894*790a779fSJames Molloy 895*790a779fSJames Molloy /// Return true if we can compute the amount the instruction changes 896*790a779fSJames Molloy /// during each iteration. Set Delta to the amount of the change. 897*790a779fSJames Molloy bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) { 898*790a779fSJames Molloy const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 899*790a779fSJames Molloy const MachineOperand *BaseOp; 900*790a779fSJames Molloy int64_t Offset; 901*790a779fSJames Molloy if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) 902*790a779fSJames Molloy return false; 903*790a779fSJames Molloy 904*790a779fSJames Molloy if (!BaseOp->isReg()) 905*790a779fSJames Molloy return false; 906*790a779fSJames Molloy 907*790a779fSJames Molloy Register BaseReg = BaseOp->getReg(); 908*790a779fSJames Molloy 909*790a779fSJames Molloy MachineRegisterInfo &MRI = MF.getRegInfo(); 910*790a779fSJames Molloy // Check if there is a Phi. If so, get the definition in the loop. 911*790a779fSJames Molloy MachineInstr *BaseDef = MRI.getVRegDef(BaseReg); 912*790a779fSJames Molloy if (BaseDef && BaseDef->isPHI()) { 913*790a779fSJames Molloy BaseReg = getLoopPhiReg(*BaseDef, MI.getParent()); 914*790a779fSJames Molloy BaseDef = MRI.getVRegDef(BaseReg); 915*790a779fSJames Molloy } 916*790a779fSJames Molloy if (!BaseDef) 917*790a779fSJames Molloy return false; 918*790a779fSJames Molloy 919*790a779fSJames Molloy int D = 0; 920*790a779fSJames Molloy if (!TII->getIncrementValue(*BaseDef, D) && D >= 0) 921*790a779fSJames Molloy return false; 922*790a779fSJames Molloy 923*790a779fSJames Molloy Delta = D; 924*790a779fSJames Molloy return true; 925*790a779fSJames Molloy } 926*790a779fSJames Molloy 927*790a779fSJames Molloy /// Update the memory operand with a new offset when the pipeliner 928*790a779fSJames Molloy /// generates a new copy of the instruction that refers to a 929*790a779fSJames Molloy /// different memory location. 930*790a779fSJames Molloy void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI, 931*790a779fSJames Molloy MachineInstr &OldMI, 932*790a779fSJames Molloy unsigned Num) { 933*790a779fSJames Molloy if (Num == 0) 934*790a779fSJames Molloy return; 935*790a779fSJames Molloy // If the instruction has memory operands, then adjust the offset 936*790a779fSJames Molloy // when the instruction appears in different stages. 937*790a779fSJames Molloy if (NewMI.memoperands_empty()) 938*790a779fSJames Molloy return; 939*790a779fSJames Molloy SmallVector<MachineMemOperand *, 2> NewMMOs; 940*790a779fSJames Molloy for (MachineMemOperand *MMO : NewMI.memoperands()) { 941*790a779fSJames Molloy // TODO: Figure out whether isAtomic is really necessary (see D57601). 942*790a779fSJames Molloy if (MMO->isVolatile() || MMO->isAtomic() || 943*790a779fSJames Molloy (MMO->isInvariant() && MMO->isDereferenceable()) || 944*790a779fSJames Molloy (!MMO->getValue())) { 945*790a779fSJames Molloy NewMMOs.push_back(MMO); 946*790a779fSJames Molloy continue; 947*790a779fSJames Molloy } 948*790a779fSJames Molloy unsigned Delta; 949*790a779fSJames Molloy if (Num != UINT_MAX && computeDelta(OldMI, Delta)) { 950*790a779fSJames Molloy int64_t AdjOffset = Delta * Num; 951*790a779fSJames Molloy NewMMOs.push_back( 952*790a779fSJames Molloy MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize())); 953*790a779fSJames Molloy } else { 954*790a779fSJames Molloy NewMMOs.push_back( 955*790a779fSJames Molloy MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize)); 956*790a779fSJames Molloy } 957*790a779fSJames Molloy } 958*790a779fSJames Molloy NewMI.setMemRefs(MF, NewMMOs); 959*790a779fSJames Molloy } 960*790a779fSJames Molloy 961*790a779fSJames Molloy /// Clone the instruction for the new pipelined loop and update the 962*790a779fSJames Molloy /// memory operands, if needed. 963*790a779fSJames Molloy MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI, 964*790a779fSJames Molloy unsigned CurStageNum, 965*790a779fSJames Molloy unsigned InstStageNum) { 966*790a779fSJames Molloy MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); 967*790a779fSJames Molloy // Check for tied operands in inline asm instructions. This should be handled 968*790a779fSJames Molloy // elsewhere, but I'm not sure of the best solution. 969*790a779fSJames Molloy if (OldMI->isInlineAsm()) 970*790a779fSJames Molloy for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) { 971*790a779fSJames Molloy const auto &MO = OldMI->getOperand(i); 972*790a779fSJames Molloy if (MO.isReg() && MO.isUse()) 973*790a779fSJames Molloy break; 974*790a779fSJames Molloy unsigned UseIdx; 975*790a779fSJames Molloy if (OldMI->isRegTiedToUseOperand(i, &UseIdx)) 976*790a779fSJames Molloy NewMI->tieOperands(i, UseIdx); 977*790a779fSJames Molloy } 978*790a779fSJames Molloy updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); 979*790a779fSJames Molloy return NewMI; 980*790a779fSJames Molloy } 981*790a779fSJames Molloy 982*790a779fSJames Molloy /// Clone the instruction for the new pipelined loop. If needed, this 983*790a779fSJames Molloy /// function updates the instruction using the values saved in the 984*790a779fSJames Molloy /// InstrChanges structure. 985*790a779fSJames Molloy MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr( 986*790a779fSJames Molloy MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) { 987*790a779fSJames Molloy MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); 988*790a779fSJames Molloy auto It = InstrChanges.find(OldMI); 989*790a779fSJames Molloy if (It != InstrChanges.end()) { 990*790a779fSJames Molloy std::pair<unsigned, int64_t> RegAndOffset = It->second; 991*790a779fSJames Molloy unsigned BasePos, OffsetPos; 992*790a779fSJames Molloy if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos)) 993*790a779fSJames Molloy return nullptr; 994*790a779fSJames Molloy int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm(); 995*790a779fSJames Molloy MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first); 996*790a779fSJames Molloy if (Schedule.getStage(LoopDef) > (signed)InstStageNum) 997*790a779fSJames Molloy NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum); 998*790a779fSJames Molloy NewMI->getOperand(OffsetPos).setImm(NewOffset); 999*790a779fSJames Molloy } 1000*790a779fSJames Molloy updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); 1001*790a779fSJames Molloy return NewMI; 1002*790a779fSJames Molloy } 1003*790a779fSJames Molloy 1004*790a779fSJames Molloy /// Update the machine instruction with new virtual registers. This 1005*790a779fSJames Molloy /// function may change the defintions and/or uses. 1006*790a779fSJames Molloy void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI, 1007*790a779fSJames Molloy bool LastDef, 1008*790a779fSJames Molloy unsigned CurStageNum, 1009*790a779fSJames Molloy unsigned InstrStageNum, 1010*790a779fSJames Molloy ValueMapTy *VRMap) { 1011*790a779fSJames Molloy for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { 1012*790a779fSJames Molloy MachineOperand &MO = NewMI->getOperand(i); 1013*790a779fSJames Molloy if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) 1014*790a779fSJames Molloy continue; 1015*790a779fSJames Molloy Register reg = MO.getReg(); 1016*790a779fSJames Molloy if (MO.isDef()) { 1017*790a779fSJames Molloy // Create a new virtual register for the definition. 1018*790a779fSJames Molloy const TargetRegisterClass *RC = MRI.getRegClass(reg); 1019*790a779fSJames Molloy Register NewReg = MRI.createVirtualRegister(RC); 1020*790a779fSJames Molloy MO.setReg(NewReg); 1021*790a779fSJames Molloy VRMap[CurStageNum][reg] = NewReg; 1022*790a779fSJames Molloy if (LastDef) 1023*790a779fSJames Molloy replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS); 1024*790a779fSJames Molloy } else if (MO.isUse()) { 1025*790a779fSJames Molloy MachineInstr *Def = MRI.getVRegDef(reg); 1026*790a779fSJames Molloy // Compute the stage that contains the last definition for instruction. 1027*790a779fSJames Molloy int DefStageNum = Schedule.getStage(Def); 1028*790a779fSJames Molloy unsigned StageNum = CurStageNum; 1029*790a779fSJames Molloy if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) { 1030*790a779fSJames Molloy // Compute the difference in stages between the defintion and the use. 1031*790a779fSJames Molloy unsigned StageDiff = (InstrStageNum - DefStageNum); 1032*790a779fSJames Molloy // Make an adjustment to get the last definition. 1033*790a779fSJames Molloy StageNum -= StageDiff; 1034*790a779fSJames Molloy } 1035*790a779fSJames Molloy if (VRMap[StageNum].count(reg)) 1036*790a779fSJames Molloy MO.setReg(VRMap[StageNum][reg]); 1037*790a779fSJames Molloy } 1038*790a779fSJames Molloy } 1039*790a779fSJames Molloy } 1040*790a779fSJames Molloy 1041*790a779fSJames Molloy /// Return the instruction in the loop that defines the register. 1042*790a779fSJames Molloy /// If the definition is a Phi, then follow the Phi operand to 1043*790a779fSJames Molloy /// the instruction in the loop. 1044*790a779fSJames Molloy MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) { 1045*790a779fSJames Molloy SmallPtrSet<MachineInstr *, 8> Visited; 1046*790a779fSJames Molloy MachineInstr *Def = MRI.getVRegDef(Reg); 1047*790a779fSJames Molloy while (Def->isPHI()) { 1048*790a779fSJames Molloy if (!Visited.insert(Def).second) 1049*790a779fSJames Molloy break; 1050*790a779fSJames Molloy for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) 1051*790a779fSJames Molloy if (Def->getOperand(i + 1).getMBB() == BB) { 1052*790a779fSJames Molloy Def = MRI.getVRegDef(Def->getOperand(i).getReg()); 1053*790a779fSJames Molloy break; 1054*790a779fSJames Molloy } 1055*790a779fSJames Molloy } 1056*790a779fSJames Molloy return Def; 1057*790a779fSJames Molloy } 1058*790a779fSJames Molloy 1059*790a779fSJames Molloy /// Return the new name for the value from the previous stage. 1060*790a779fSJames Molloy unsigned ModuloScheduleExpander::getPrevMapVal( 1061*790a779fSJames Molloy unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage, 1062*790a779fSJames Molloy ValueMapTy *VRMap, MachineBasicBlock *BB) { 1063*790a779fSJames Molloy unsigned PrevVal = 0; 1064*790a779fSJames Molloy if (StageNum > PhiStage) { 1065*790a779fSJames Molloy MachineInstr *LoopInst = MRI.getVRegDef(LoopVal); 1066*790a779fSJames Molloy if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal)) 1067*790a779fSJames Molloy // The name is defined in the previous stage. 1068*790a779fSJames Molloy PrevVal = VRMap[StageNum - 1][LoopVal]; 1069*790a779fSJames Molloy else if (VRMap[StageNum].count(LoopVal)) 1070*790a779fSJames Molloy // The previous name is defined in the current stage when the instruction 1071*790a779fSJames Molloy // order is swapped. 1072*790a779fSJames Molloy PrevVal = VRMap[StageNum][LoopVal]; 1073*790a779fSJames Molloy else if (!LoopInst->isPHI() || LoopInst->getParent() != BB) 1074*790a779fSJames Molloy // The loop value hasn't yet been scheduled. 1075*790a779fSJames Molloy PrevVal = LoopVal; 1076*790a779fSJames Molloy else if (StageNum == PhiStage + 1) 1077*790a779fSJames Molloy // The loop value is another phi, which has not been scheduled. 1078*790a779fSJames Molloy PrevVal = getInitPhiReg(*LoopInst, BB); 1079*790a779fSJames Molloy else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB) 1080*790a779fSJames Molloy // The loop value is another phi, which has been scheduled. 1081*790a779fSJames Molloy PrevVal = 1082*790a779fSJames Molloy getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB), 1083*790a779fSJames Molloy LoopStage, VRMap, BB); 1084*790a779fSJames Molloy } 1085*790a779fSJames Molloy return PrevVal; 1086*790a779fSJames Molloy } 1087*790a779fSJames Molloy 1088*790a779fSJames Molloy /// Rewrite the Phi values in the specified block to use the mappings 1089*790a779fSJames Molloy /// from the initial operand. Once the Phi is scheduled, we switch 1090*790a779fSJames Molloy /// to using the loop value instead of the Phi value, so those names 1091*790a779fSJames Molloy /// do not need to be rewritten. 1092*790a779fSJames Molloy void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB, 1093*790a779fSJames Molloy unsigned StageNum, 1094*790a779fSJames Molloy ValueMapTy *VRMap, 1095*790a779fSJames Molloy InstrMapTy &InstrMap) { 1096*790a779fSJames Molloy for (auto &PHI : BB->phis()) { 1097*790a779fSJames Molloy unsigned InitVal = 0; 1098*790a779fSJames Molloy unsigned LoopVal = 0; 1099*790a779fSJames Molloy getPhiRegs(PHI, BB, InitVal, LoopVal); 1100*790a779fSJames Molloy Register PhiDef = PHI.getOperand(0).getReg(); 1101*790a779fSJames Molloy 1102*790a779fSJames Molloy unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef)); 1103*790a779fSJames Molloy unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal)); 1104*790a779fSJames Molloy unsigned NumPhis = getStagesForPhi(PhiDef); 1105*790a779fSJames Molloy if (NumPhis > StageNum) 1106*790a779fSJames Molloy NumPhis = StageNum; 1107*790a779fSJames Molloy for (unsigned np = 0; np <= NumPhis; ++np) { 1108*790a779fSJames Molloy unsigned NewVal = 1109*790a779fSJames Molloy getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB); 1110*790a779fSJames Molloy if (!NewVal) 1111*790a779fSJames Molloy NewVal = InitVal; 1112*790a779fSJames Molloy rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef, 1113*790a779fSJames Molloy NewVal); 1114*790a779fSJames Molloy } 1115*790a779fSJames Molloy } 1116*790a779fSJames Molloy } 1117*790a779fSJames Molloy 1118*790a779fSJames Molloy /// Rewrite a previously scheduled instruction to use the register value 1119*790a779fSJames Molloy /// from the new instruction. Make sure the instruction occurs in the 1120*790a779fSJames Molloy /// basic block, and we don't change the uses in the new instruction. 1121*790a779fSJames Molloy void ModuloScheduleExpander::rewriteScheduledInstr( 1122*790a779fSJames Molloy MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum, 1123*790a779fSJames Molloy unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg, 1124*790a779fSJames Molloy unsigned PrevReg) { 1125*790a779fSJames Molloy bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1); 1126*790a779fSJames Molloy int StagePhi = Schedule.getStage(Phi) + PhiNum; 1127*790a779fSJames Molloy // Rewrite uses that have been scheduled already to use the new 1128*790a779fSJames Molloy // Phi register. 1129*790a779fSJames Molloy for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg), 1130*790a779fSJames Molloy EI = MRI.use_end(); 1131*790a779fSJames Molloy UI != EI;) { 1132*790a779fSJames Molloy MachineOperand &UseOp = *UI; 1133*790a779fSJames Molloy MachineInstr *UseMI = UseOp.getParent(); 1134*790a779fSJames Molloy ++UI; 1135*790a779fSJames Molloy if (UseMI->getParent() != BB) 1136*790a779fSJames Molloy continue; 1137*790a779fSJames Molloy if (UseMI->isPHI()) { 1138*790a779fSJames Molloy if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg) 1139*790a779fSJames Molloy continue; 1140*790a779fSJames Molloy if (getLoopPhiReg(*UseMI, BB) != OldReg) 1141*790a779fSJames Molloy continue; 1142*790a779fSJames Molloy } 1143*790a779fSJames Molloy InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI); 1144*790a779fSJames Molloy assert(OrigInstr != InstrMap.end() && "Instruction not scheduled."); 1145*790a779fSJames Molloy MachineInstr *OrigMI = OrigInstr->second; 1146*790a779fSJames Molloy int StageSched = Schedule.getStage(OrigMI); 1147*790a779fSJames Molloy int CycleSched = Schedule.getCycle(OrigMI); 1148*790a779fSJames Molloy unsigned ReplaceReg = 0; 1149*790a779fSJames Molloy // This is the stage for the scheduled instruction. 1150*790a779fSJames Molloy if (StagePhi == StageSched && Phi->isPHI()) { 1151*790a779fSJames Molloy int CyclePhi = Schedule.getCycle(Phi); 1152*790a779fSJames Molloy if (PrevReg && InProlog) 1153*790a779fSJames Molloy ReplaceReg = PrevReg; 1154*790a779fSJames Molloy else if (PrevReg && !isLoopCarried(*Phi) && 1155*790a779fSJames Molloy (CyclePhi <= CycleSched || OrigMI->isPHI())) 1156*790a779fSJames Molloy ReplaceReg = PrevReg; 1157*790a779fSJames Molloy else 1158*790a779fSJames Molloy ReplaceReg = NewReg; 1159*790a779fSJames Molloy } 1160*790a779fSJames Molloy // The scheduled instruction occurs before the scheduled Phi, and the 1161*790a779fSJames Molloy // Phi is not loop carried. 1162*790a779fSJames Molloy if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi)) 1163*790a779fSJames Molloy ReplaceReg = NewReg; 1164*790a779fSJames Molloy if (StagePhi > StageSched && Phi->isPHI()) 1165*790a779fSJames Molloy ReplaceReg = NewReg; 1166*790a779fSJames Molloy if (!InProlog && !Phi->isPHI() && StagePhi < StageSched) 1167*790a779fSJames Molloy ReplaceReg = NewReg; 1168*790a779fSJames Molloy if (ReplaceReg) { 1169*790a779fSJames Molloy MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); 1170*790a779fSJames Molloy UseOp.setReg(ReplaceReg); 1171*790a779fSJames Molloy } 1172*790a779fSJames Molloy } 1173*790a779fSJames Molloy } 1174*790a779fSJames Molloy 1175*790a779fSJames Molloy bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) { 1176*790a779fSJames Molloy if (!Phi.isPHI()) 1177*790a779fSJames Molloy return false; 1178*790a779fSJames Molloy unsigned DefCycle = Schedule.getCycle(&Phi); 1179*790a779fSJames Molloy int DefStage = Schedule.getStage(&Phi); 1180*790a779fSJames Molloy 1181*790a779fSJames Molloy unsigned InitVal = 0; 1182*790a779fSJames Molloy unsigned LoopVal = 0; 1183*790a779fSJames Molloy getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal); 1184*790a779fSJames Molloy MachineInstr *Use = MRI.getVRegDef(LoopVal); 1185*790a779fSJames Molloy if (!Use || Use->isPHI()) 1186*790a779fSJames Molloy return true; 1187*790a779fSJames Molloy unsigned LoopCycle = Schedule.getCycle(Use); 1188*790a779fSJames Molloy int LoopStage = Schedule.getStage(Use); 1189*790a779fSJames Molloy return (LoopCycle > DefCycle) || (LoopStage <= DefStage); 1190*790a779fSJames Molloy } 1191