1 //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/CodeGen/ModuloSchedule.h" 10 #include "llvm/ADT/StringExtras.h" 11 #include "llvm/CodeGen/LiveIntervals.h" 12 #include "llvm/CodeGen/MachineInstrBuilder.h" 13 #include "llvm/CodeGen/MachineRegisterInfo.h" 14 #include "llvm/CodeGen/TargetInstrInfo.h" 15 #include "llvm/MC/MCContext.h" 16 #include "llvm/Support/Debug.h" 17 #include "llvm/Support/ErrorHandling.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #define DEBUG_TYPE "pipeliner" 21 using namespace llvm; 22 23 void ModuloSchedule::print(raw_ostream &OS) { 24 for (MachineInstr *MI : ScheduledInstrs) 25 OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI; 26 } 27 28 //===----------------------------------------------------------------------===// 29 // ModuloScheduleExpander implementation 30 //===----------------------------------------------------------------------===// 31 32 /// Return the register values for the operands of a Phi instruction. 33 /// This function assume the instruction is a Phi. 34 static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, 35 unsigned &InitVal, unsigned &LoopVal) { 36 assert(Phi.isPHI() && "Expecting a Phi."); 37 38 InitVal = 0; 39 LoopVal = 0; 40 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) 41 if (Phi.getOperand(i + 1).getMBB() != Loop) 42 InitVal = Phi.getOperand(i).getReg(); 43 else 44 LoopVal = Phi.getOperand(i).getReg(); 45 46 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure."); 47 } 48 49 /// Return the Phi register value that comes from the incoming block. 50 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { 51 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) 52 if (Phi.getOperand(i + 1).getMBB() != LoopBB) 53 return Phi.getOperand(i).getReg(); 54 return 0; 55 } 56 57 /// Return the Phi register value that comes the loop block. 58 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { 59 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) 60 if (Phi.getOperand(i + 1).getMBB() == LoopBB) 61 return Phi.getOperand(i).getReg(); 62 return 0; 63 } 64 65 void ModuloScheduleExpander::expand() { 66 BB = Schedule.getLoop()->getTopBlock(); 67 Preheader = *BB->pred_begin(); 68 if (Preheader == BB) 69 Preheader = *std::next(BB->pred_begin()); 70 71 // Iterate over the definitions in each instruction, and compute the 72 // stage difference for each use. Keep the maximum value. 73 for (MachineInstr *MI : Schedule.getInstructions()) { 74 int DefStage = Schedule.getStage(MI); 75 for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { 76 MachineOperand &Op = MI->getOperand(i); 77 if (!Op.isReg() || !Op.isDef()) 78 continue; 79 80 Register Reg = Op.getReg(); 81 unsigned MaxDiff = 0; 82 bool PhiIsSwapped = false; 83 for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg), 84 EI = MRI.use_end(); 85 UI != EI; ++UI) { 86 MachineOperand &UseOp = *UI; 87 MachineInstr *UseMI = UseOp.getParent(); 88 int UseStage = Schedule.getStage(UseMI); 89 unsigned Diff = 0; 90 if (UseStage != -1 && UseStage >= DefStage) 91 Diff = UseStage - DefStage; 92 if (MI->isPHI()) { 93 if (isLoopCarried(*MI)) 94 ++Diff; 95 else 96 PhiIsSwapped = true; 97 } 98 MaxDiff = std::max(Diff, MaxDiff); 99 } 100 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped); 101 } 102 } 103 104 generatePipelinedLoop(); 105 } 106 107 void ModuloScheduleExpander::generatePipelinedLoop() { 108 LoopInfo = TII->analyzeLoopForPipelining(BB); 109 assert(LoopInfo && "Must be able to analyze loop!"); 110 111 // Create a new basic block for the kernel and add it to the CFG. 112 MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); 113 114 unsigned MaxStageCount = Schedule.getNumStages() - 1; 115 116 // Remember the registers that are used in different stages. The index is 117 // the iteration, or stage, that the instruction is scheduled in. This is 118 // a map between register names in the original block and the names created 119 // in each stage of the pipelined loop. 120 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; 121 InstrMapTy InstrMap; 122 123 SmallVector<MachineBasicBlock *, 4> PrologBBs; 124 125 // Generate the prolog instructions that set up the pipeline. 126 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs); 127 MF.insert(BB->getIterator(), KernelBB); 128 129 // Rearrange the instructions to generate the new, pipelined loop, 130 // and update register names as needed. 131 for (MachineInstr *CI : Schedule.getInstructions()) { 132 if (CI->isPHI()) 133 continue; 134 unsigned StageNum = Schedule.getStage(CI); 135 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum); 136 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap); 137 KernelBB->push_back(NewMI); 138 InstrMap[NewMI] = CI; 139 } 140 141 // Copy any terminator instructions to the new kernel, and update 142 // names as needed. 143 for (MachineBasicBlock::iterator I = BB->getFirstTerminator(), 144 E = BB->instr_end(); 145 I != E; ++I) { 146 MachineInstr *NewMI = MF.CloneMachineInstr(&*I); 147 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap); 148 KernelBB->push_back(NewMI); 149 InstrMap[NewMI] = &*I; 150 } 151 152 NewKernel = KernelBB; 153 KernelBB->transferSuccessors(BB); 154 KernelBB->replaceSuccessor(BB, KernelBB); 155 156 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, 157 InstrMap, MaxStageCount, MaxStageCount, false); 158 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, 159 MaxStageCount, MaxStageCount, false); 160 161 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); 162 163 SmallVector<MachineBasicBlock *, 4> EpilogBBs; 164 // Generate the epilog instructions to complete the pipeline. 165 generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs); 166 167 // We need this step because the register allocation doesn't handle some 168 // situations well, so we insert copies to help out. 169 splitLifetimes(KernelBB, EpilogBBs); 170 171 // Remove dead instructions due to loop induction variables. 172 removeDeadInstructions(KernelBB, EpilogBBs); 173 174 // Add branches between prolog and epilog blocks. 175 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap); 176 177 delete[] VRMap; 178 } 179 180 void ModuloScheduleExpander::cleanup() { 181 // Remove the original loop since it's no longer referenced. 182 for (auto &I : *BB) 183 LIS.RemoveMachineInstrFromMaps(I); 184 BB->clear(); 185 BB->eraseFromParent(); 186 } 187 188 /// Generate the pipeline prolog code. 189 void ModuloScheduleExpander::generateProlog(unsigned LastStage, 190 MachineBasicBlock *KernelBB, 191 ValueMapTy *VRMap, 192 MBBVectorTy &PrologBBs) { 193 MachineBasicBlock *PredBB = Preheader; 194 InstrMapTy InstrMap; 195 196 // Generate a basic block for each stage, not including the last stage, 197 // which will be generated in the kernel. Each basic block may contain 198 // instructions from multiple stages/iterations. 199 for (unsigned i = 0; i < LastStage; ++i) { 200 // Create and insert the prolog basic block prior to the original loop 201 // basic block. The original loop is removed later. 202 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); 203 PrologBBs.push_back(NewBB); 204 MF.insert(BB->getIterator(), NewBB); 205 NewBB->transferSuccessors(PredBB); 206 PredBB->addSuccessor(NewBB); 207 PredBB = NewBB; 208 209 // Generate instructions for each appropriate stage. Process instructions 210 // in original program order. 211 for (int StageNum = i; StageNum >= 0; --StageNum) { 212 for (MachineBasicBlock::iterator BBI = BB->instr_begin(), 213 BBE = BB->getFirstTerminator(); 214 BBI != BBE; ++BBI) { 215 if (Schedule.getStage(&*BBI) == StageNum) { 216 if (BBI->isPHI()) 217 continue; 218 MachineInstr *NewMI = 219 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum); 220 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap); 221 NewBB->push_back(NewMI); 222 InstrMap[NewMI] = &*BBI; 223 } 224 } 225 } 226 rewritePhiValues(NewBB, i, VRMap, InstrMap); 227 LLVM_DEBUG({ 228 dbgs() << "prolog:\n"; 229 NewBB->dump(); 230 }); 231 } 232 233 PredBB->replaceSuccessor(BB, KernelBB); 234 235 // Check if we need to remove the branch from the preheader to the original 236 // loop, and replace it with a branch to the new loop. 237 unsigned numBranches = TII->removeBranch(*Preheader); 238 if (numBranches) { 239 SmallVector<MachineOperand, 0> Cond; 240 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc()); 241 } 242 } 243 244 /// Generate the pipeline epilog code. The epilog code finishes the iterations 245 /// that were started in either the prolog or the kernel. We create a basic 246 /// block for each stage that needs to complete. 247 void ModuloScheduleExpander::generateEpilog(unsigned LastStage, 248 MachineBasicBlock *KernelBB, 249 ValueMapTy *VRMap, 250 MBBVectorTy &EpilogBBs, 251 MBBVectorTy &PrologBBs) { 252 // We need to change the branch from the kernel to the first epilog block, so 253 // this call to analyze branch uses the kernel rather than the original BB. 254 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 255 SmallVector<MachineOperand, 4> Cond; 256 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond); 257 assert(!checkBranch && "generateEpilog must be able to analyze the branch"); 258 if (checkBranch) 259 return; 260 261 MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin(); 262 if (*LoopExitI == KernelBB) 263 ++LoopExitI; 264 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor"); 265 MachineBasicBlock *LoopExitBB = *LoopExitI; 266 267 MachineBasicBlock *PredBB = KernelBB; 268 MachineBasicBlock *EpilogStart = LoopExitBB; 269 InstrMapTy InstrMap; 270 271 // Generate a basic block for each stage, not including the last stage, 272 // which was generated for the kernel. Each basic block may contain 273 // instructions from multiple stages/iterations. 274 int EpilogStage = LastStage + 1; 275 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) { 276 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(); 277 EpilogBBs.push_back(NewBB); 278 MF.insert(BB->getIterator(), NewBB); 279 280 PredBB->replaceSuccessor(LoopExitBB, NewBB); 281 NewBB->addSuccessor(LoopExitBB); 282 283 if (EpilogStart == LoopExitBB) 284 EpilogStart = NewBB; 285 286 // Add instructions to the epilog depending on the current block. 287 // Process instructions in original program order. 288 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) { 289 for (auto &BBI : *BB) { 290 if (BBI.isPHI()) 291 continue; 292 MachineInstr *In = &BBI; 293 if ((unsigned)Schedule.getStage(In) == StageNum) { 294 // Instructions with memoperands in the epilog are updated with 295 // conservative values. 296 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0); 297 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap); 298 NewBB->push_back(NewMI); 299 InstrMap[NewMI] = In; 300 } 301 } 302 } 303 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, 304 InstrMap, LastStage, EpilogStage, i == 1); 305 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, 306 LastStage, EpilogStage, i == 1); 307 PredBB = NewBB; 308 309 LLVM_DEBUG({ 310 dbgs() << "epilog:\n"; 311 NewBB->dump(); 312 }); 313 } 314 315 // Fix any Phi nodes in the loop exit block. 316 LoopExitBB->replacePhiUsesWith(BB, PredBB); 317 318 // Create a branch to the new epilog from the kernel. 319 // Remove the original branch and add a new branch to the epilog. 320 TII->removeBranch(*KernelBB); 321 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); 322 // Add a branch to the loop exit. 323 if (EpilogBBs.size() > 0) { 324 MachineBasicBlock *LastEpilogBB = EpilogBBs.back(); 325 SmallVector<MachineOperand, 4> Cond1; 326 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc()); 327 } 328 } 329 330 /// Replace all uses of FromReg that appear outside the specified 331 /// basic block with ToReg. 332 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, 333 MachineBasicBlock *MBB, 334 MachineRegisterInfo &MRI, 335 LiveIntervals &LIS) { 336 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg), 337 E = MRI.use_end(); 338 I != E;) { 339 MachineOperand &O = *I; 340 ++I; 341 if (O.getParent()->getParent() != MBB) 342 O.setReg(ToReg); 343 } 344 if (!LIS.hasInterval(ToReg)) 345 LIS.createEmptyInterval(ToReg); 346 } 347 348 /// Return true if the register has a use that occurs outside the 349 /// specified loop. 350 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, 351 MachineRegisterInfo &MRI) { 352 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), 353 E = MRI.use_end(); 354 I != E; ++I) 355 if (I->getParent()->getParent() != BB) 356 return true; 357 return false; 358 } 359 360 /// Generate Phis for the specific block in the generated pipelined code. 361 /// This function looks at the Phis from the original code to guide the 362 /// creation of new Phis. 363 void ModuloScheduleExpander::generateExistingPhis( 364 MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, 365 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, 366 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { 367 // Compute the stage number for the initial value of the Phi, which 368 // comes from the prolog. The prolog to use depends on to which kernel/ 369 // epilog that we're adding the Phi. 370 unsigned PrologStage = 0; 371 unsigned PrevStage = 0; 372 bool InKernel = (LastStageNum == CurStageNum); 373 if (InKernel) { 374 PrologStage = LastStageNum - 1; 375 PrevStage = CurStageNum; 376 } else { 377 PrologStage = LastStageNum - (CurStageNum - LastStageNum); 378 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1; 379 } 380 381 for (MachineBasicBlock::iterator BBI = BB->instr_begin(), 382 BBE = BB->getFirstNonPHI(); 383 BBI != BBE; ++BBI) { 384 Register Def = BBI->getOperand(0).getReg(); 385 386 unsigned InitVal = 0; 387 unsigned LoopVal = 0; 388 getPhiRegs(*BBI, BB, InitVal, LoopVal); 389 390 unsigned PhiOp1 = 0; 391 // The Phi value from the loop body typically is defined in the loop, but 392 // not always. So, we need to check if the value is defined in the loop. 393 unsigned PhiOp2 = LoopVal; 394 if (VRMap[LastStageNum].count(LoopVal)) 395 PhiOp2 = VRMap[LastStageNum][LoopVal]; 396 397 int StageScheduled = Schedule.getStage(&*BBI); 398 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal)); 399 unsigned NumStages = getStagesForReg(Def, CurStageNum); 400 if (NumStages == 0) { 401 // We don't need to generate a Phi anymore, but we need to rename any uses 402 // of the Phi value. 403 unsigned NewReg = VRMap[PrevStage][LoopVal]; 404 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def, 405 InitVal, NewReg); 406 if (VRMap[CurStageNum].count(LoopVal)) 407 VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal]; 408 } 409 // Adjust the number of Phis needed depending on the number of prologs left, 410 // and the distance from where the Phi is first scheduled. The number of 411 // Phis cannot exceed the number of prolog stages. Each stage can 412 // potentially define two values. 413 unsigned MaxPhis = PrologStage + 2; 414 if (!InKernel && (int)PrologStage <= LoopValStage) 415 MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); 416 unsigned NumPhis = std::min(NumStages, MaxPhis); 417 418 unsigned NewReg = 0; 419 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled; 420 // In the epilog, we may need to look back one stage to get the correct 421 // Phi name because the epilog and prolog blocks execute the same stage. 422 // The correct name is from the previous block only when the Phi has 423 // been completely scheduled prior to the epilog, and Phi value is not 424 // needed in multiple stages. 425 int StageDiff = 0; 426 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 && 427 NumPhis == 1) 428 StageDiff = 1; 429 // Adjust the computations below when the phi and the loop definition 430 // are scheduled in different stages. 431 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage) 432 StageDiff = StageScheduled - LoopValStage; 433 for (unsigned np = 0; np < NumPhis; ++np) { 434 // If the Phi hasn't been scheduled, then use the initial Phi operand 435 // value. Otherwise, use the scheduled version of the instruction. This 436 // is a little complicated when a Phi references another Phi. 437 if (np > PrologStage || StageScheduled >= (int)LastStageNum) 438 PhiOp1 = InitVal; 439 // Check if the Phi has already been scheduled in a prolog stage. 440 else if (PrologStage >= AccessStage + StageDiff + np && 441 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0) 442 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal]; 443 // Check if the Phi has already been scheduled, but the loop instruction 444 // is either another Phi, or doesn't occur in the loop. 445 else if (PrologStage >= AccessStage + StageDiff + np) { 446 // If the Phi references another Phi, we need to examine the other 447 // Phi to get the correct value. 448 PhiOp1 = LoopVal; 449 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1); 450 int Indirects = 1; 451 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) { 452 int PhiStage = Schedule.getStage(InstOp1); 453 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects) 454 PhiOp1 = getInitPhiReg(*InstOp1, BB); 455 else 456 PhiOp1 = getLoopPhiReg(*InstOp1, BB); 457 InstOp1 = MRI.getVRegDef(PhiOp1); 458 int PhiOpStage = Schedule.getStage(InstOp1); 459 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0); 460 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np && 461 VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) { 462 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1]; 463 break; 464 } 465 ++Indirects; 466 } 467 } else 468 PhiOp1 = InitVal; 469 // If this references a generated Phi in the kernel, get the Phi operand 470 // from the incoming block. 471 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) 472 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) 473 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); 474 475 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal); 476 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI(); 477 // In the epilog, a map lookup is needed to get the value from the kernel, 478 // or previous epilog block. How is does this depends on if the 479 // instruction is scheduled in the previous block. 480 if (!InKernel) { 481 int StageDiffAdj = 0; 482 if (LoopValStage != -1 && StageScheduled > LoopValStage) 483 StageDiffAdj = StageScheduled - LoopValStage; 484 // Use the loop value defined in the kernel, unless the kernel 485 // contains the last definition of the Phi. 486 if (np == 0 && PrevStage == LastStageNum && 487 (StageScheduled != 0 || LoopValStage != 0) && 488 VRMap[PrevStage - StageDiffAdj].count(LoopVal)) 489 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal]; 490 // Use the value defined by the Phi. We add one because we switch 491 // from looking at the loop value to the Phi definition. 492 else if (np > 0 && PrevStage == LastStageNum && 493 VRMap[PrevStage - np + 1].count(Def)) 494 PhiOp2 = VRMap[PrevStage - np + 1][Def]; 495 // Use the loop value defined in the kernel. 496 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 && 497 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal)) 498 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal]; 499 // Use the value defined by the Phi, unless we're generating the first 500 // epilog and the Phi refers to a Phi in a different stage. 501 else if (VRMap[PrevStage - np].count(Def) && 502 (!LoopDefIsPhi || (PrevStage != LastStageNum) || 503 (LoopValStage == StageScheduled))) 504 PhiOp2 = VRMap[PrevStage - np][Def]; 505 } 506 507 // Check if we can reuse an existing Phi. This occurs when a Phi 508 // references another Phi, and the other Phi is scheduled in an 509 // earlier stage. We can try to reuse an existing Phi up until the last 510 // stage of the current Phi. 511 if (LoopDefIsPhi) { 512 if (static_cast<int>(PrologStage - np) >= StageScheduled) { 513 int LVNumStages = getStagesForPhi(LoopVal); 514 int StageDiff = (StageScheduled - LoopValStage); 515 LVNumStages -= StageDiff; 516 // Make sure the loop value Phi has been processed already. 517 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { 518 NewReg = PhiOp2; 519 unsigned ReuseStage = CurStageNum; 520 if (isLoopCarried(*PhiInst)) 521 ReuseStage -= LVNumStages; 522 // Check if the Phi to reuse has been generated yet. If not, then 523 // there is nothing to reuse. 524 if (VRMap[ReuseStage - np].count(LoopVal)) { 525 NewReg = VRMap[ReuseStage - np][LoopVal]; 526 527 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, 528 Def, NewReg); 529 // Update the map with the new Phi name. 530 VRMap[CurStageNum - np][Def] = NewReg; 531 PhiOp2 = NewReg; 532 if (VRMap[LastStageNum - np - 1].count(LoopVal)) 533 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; 534 535 if (IsLast && np == NumPhis - 1) 536 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); 537 continue; 538 } 539 } 540 } 541 if (InKernel && StageDiff > 0 && 542 VRMap[CurStageNum - StageDiff - np].count(LoopVal)) 543 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal]; 544 } 545 546 const TargetRegisterClass *RC = MRI.getRegClass(Def); 547 NewReg = MRI.createVirtualRegister(RC); 548 549 MachineInstrBuilder NewPhi = 550 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), 551 TII->get(TargetOpcode::PHI), NewReg); 552 NewPhi.addReg(PhiOp1).addMBB(BB1); 553 NewPhi.addReg(PhiOp2).addMBB(BB2); 554 if (np == 0) 555 InstrMap[NewPhi] = &*BBI; 556 557 // We define the Phis after creating the new pipelined code, so 558 // we need to rename the Phi values in scheduled instructions. 559 560 unsigned PrevReg = 0; 561 if (InKernel && VRMap[PrevStage - np].count(LoopVal)) 562 PrevReg = VRMap[PrevStage - np][LoopVal]; 563 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, 564 NewReg, PrevReg); 565 // If the Phi has been scheduled, use the new name for rewriting. 566 if (VRMap[CurStageNum - np].count(Def)) { 567 unsigned R = VRMap[CurStageNum - np][Def]; 568 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R, 569 NewReg); 570 } 571 572 // Check if we need to rename any uses that occurs after the loop. The 573 // register to replace depends on whether the Phi is scheduled in the 574 // epilog. 575 if (IsLast && np == NumPhis - 1) 576 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); 577 578 // In the kernel, a dependent Phi uses the value from this Phi. 579 if (InKernel) 580 PhiOp2 = NewReg; 581 582 // Update the map with the new Phi name. 583 VRMap[CurStageNum - np][Def] = NewReg; 584 } 585 586 while (NumPhis++ < NumStages) { 587 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def, 588 NewReg, 0); 589 } 590 591 // Check if we need to rename a Phi that has been eliminated due to 592 // scheduling. 593 if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal)) 594 replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS); 595 } 596 } 597 598 /// Generate Phis for the specified block in the generated pipelined code. 599 /// These are new Phis needed because the definition is scheduled after the 600 /// use in the pipelined sequence. 601 void ModuloScheduleExpander::generatePhis( 602 MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, 603 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, 604 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { 605 // Compute the stage number that contains the initial Phi value, and 606 // the Phi from the previous stage. 607 unsigned PrologStage = 0; 608 unsigned PrevStage = 0; 609 unsigned StageDiff = CurStageNum - LastStageNum; 610 bool InKernel = (StageDiff == 0); 611 if (InKernel) { 612 PrologStage = LastStageNum - 1; 613 PrevStage = CurStageNum; 614 } else { 615 PrologStage = LastStageNum - StageDiff; 616 PrevStage = LastStageNum + StageDiff - 1; 617 } 618 619 for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(), 620 BBE = BB->instr_end(); 621 BBI != BBE; ++BBI) { 622 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) { 623 MachineOperand &MO = BBI->getOperand(i); 624 if (!MO.isReg() || !MO.isDef() || 625 !Register::isVirtualRegister(MO.getReg())) 626 continue; 627 628 int StageScheduled = Schedule.getStage(&*BBI); 629 assert(StageScheduled != -1 && "Expecting scheduled instruction."); 630 Register Def = MO.getReg(); 631 unsigned NumPhis = getStagesForReg(Def, CurStageNum); 632 // An instruction scheduled in stage 0 and is used after the loop 633 // requires a phi in the epilog for the last definition from either 634 // the kernel or prolog. 635 if (!InKernel && NumPhis == 0 && StageScheduled == 0 && 636 hasUseAfterLoop(Def, BB, MRI)) 637 NumPhis = 1; 638 if (!InKernel && (unsigned)StageScheduled > PrologStage) 639 continue; 640 641 unsigned PhiOp2 = VRMap[PrevStage][Def]; 642 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) 643 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) 644 PhiOp2 = getLoopPhiReg(*InstOp2, BB2); 645 // The number of Phis can't exceed the number of prolog stages. The 646 // prolog stage number is zero based. 647 if (NumPhis > PrologStage + 1 - StageScheduled) 648 NumPhis = PrologStage + 1 - StageScheduled; 649 for (unsigned np = 0; np < NumPhis; ++np) { 650 unsigned PhiOp1 = VRMap[PrologStage][Def]; 651 if (np <= PrologStage) 652 PhiOp1 = VRMap[PrologStage - np][Def]; 653 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { 654 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) 655 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); 656 if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) 657 PhiOp1 = getInitPhiReg(*InstOp1, NewBB); 658 } 659 if (!InKernel) 660 PhiOp2 = VRMap[PrevStage - np][Def]; 661 662 const TargetRegisterClass *RC = MRI.getRegClass(Def); 663 Register NewReg = MRI.createVirtualRegister(RC); 664 665 MachineInstrBuilder NewPhi = 666 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), 667 TII->get(TargetOpcode::PHI), NewReg); 668 NewPhi.addReg(PhiOp1).addMBB(BB1); 669 NewPhi.addReg(PhiOp2).addMBB(BB2); 670 if (np == 0) 671 InstrMap[NewPhi] = &*BBI; 672 673 // Rewrite uses and update the map. The actions depend upon whether 674 // we generating code for the kernel or epilog blocks. 675 if (InKernel) { 676 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1, 677 NewReg); 678 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2, 679 NewReg); 680 681 PhiOp2 = NewReg; 682 VRMap[PrevStage - np - 1][Def] = NewReg; 683 } else { 684 VRMap[CurStageNum - np][Def] = NewReg; 685 if (np == NumPhis - 1) 686 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, 687 NewReg); 688 } 689 if (IsLast && np == NumPhis - 1) 690 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); 691 } 692 } 693 } 694 } 695 696 /// Remove instructions that generate values with no uses. 697 /// Typically, these are induction variable operations that generate values 698 /// used in the loop itself. A dead instruction has a definition with 699 /// no uses, or uses that occur in the original loop only. 700 void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB, 701 MBBVectorTy &EpilogBBs) { 702 // For each epilog block, check that the value defined by each instruction 703 // is used. If not, delete it. 704 for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(), 705 MBE = EpilogBBs.rend(); 706 MBB != MBE; ++MBB) 707 for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(), 708 ME = (*MBB)->instr_rend(); 709 MI != ME;) { 710 // From DeadMachineInstructionElem. Don't delete inline assembly. 711 if (MI->isInlineAsm()) { 712 ++MI; 713 continue; 714 } 715 bool SawStore = false; 716 // Check if it's safe to remove the instruction due to side effects. 717 // We can, and want to, remove Phis here. 718 if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) { 719 ++MI; 720 continue; 721 } 722 bool used = true; 723 for (MachineInstr::mop_iterator MOI = MI->operands_begin(), 724 MOE = MI->operands_end(); 725 MOI != MOE; ++MOI) { 726 if (!MOI->isReg() || !MOI->isDef()) 727 continue; 728 Register reg = MOI->getReg(); 729 // Assume physical registers are used, unless they are marked dead. 730 if (Register::isPhysicalRegister(reg)) { 731 used = !MOI->isDead(); 732 if (used) 733 break; 734 continue; 735 } 736 unsigned realUses = 0; 737 for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg), 738 EI = MRI.use_end(); 739 UI != EI; ++UI) { 740 // Check if there are any uses that occur only in the original 741 // loop. If so, that's not a real use. 742 if (UI->getParent()->getParent() != BB) { 743 realUses++; 744 used = true; 745 break; 746 } 747 } 748 if (realUses > 0) 749 break; 750 used = false; 751 } 752 if (!used) { 753 LIS.RemoveMachineInstrFromMaps(*MI); 754 MI++->eraseFromParent(); 755 continue; 756 } 757 ++MI; 758 } 759 // In the kernel block, check if we can remove a Phi that generates a value 760 // used in an instruction removed in the epilog block. 761 for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), 762 BBE = KernelBB->getFirstNonPHI(); 763 BBI != BBE;) { 764 MachineInstr *MI = &*BBI; 765 ++BBI; 766 Register reg = MI->getOperand(0).getReg(); 767 if (MRI.use_begin(reg) == MRI.use_end()) { 768 LIS.RemoveMachineInstrFromMaps(*MI); 769 MI->eraseFromParent(); 770 } 771 } 772 } 773 774 /// For loop carried definitions, we split the lifetime of a virtual register 775 /// that has uses past the definition in the next iteration. A copy with a new 776 /// virtual register is inserted before the definition, which helps with 777 /// generating a better register assignment. 778 /// 779 /// v1 = phi(a, v2) v1 = phi(a, v2) 780 /// v2 = phi(b, v3) v2 = phi(b, v3) 781 /// v3 = .. v4 = copy v1 782 /// .. = V1 v3 = .. 783 /// .. = v4 784 void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB, 785 MBBVectorTy &EpilogBBs) { 786 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 787 for (auto &PHI : KernelBB->phis()) { 788 Register Def = PHI.getOperand(0).getReg(); 789 // Check for any Phi definition that used as an operand of another Phi 790 // in the same block. 791 for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def), 792 E = MRI.use_instr_end(); 793 I != E; ++I) { 794 if (I->isPHI() && I->getParent() == KernelBB) { 795 // Get the loop carried definition. 796 unsigned LCDef = getLoopPhiReg(PHI, KernelBB); 797 if (!LCDef) 798 continue; 799 MachineInstr *MI = MRI.getVRegDef(LCDef); 800 if (!MI || MI->getParent() != KernelBB || MI->isPHI()) 801 continue; 802 // Search through the rest of the block looking for uses of the Phi 803 // definition. If one occurs, then split the lifetime. 804 unsigned SplitReg = 0; 805 for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI), 806 KernelBB->instr_end())) 807 if (BBJ.readsRegister(Def)) { 808 // We split the lifetime when we find the first use. 809 if (SplitReg == 0) { 810 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def)); 811 BuildMI(*KernelBB, MI, MI->getDebugLoc(), 812 TII->get(TargetOpcode::COPY), SplitReg) 813 .addReg(Def); 814 } 815 BBJ.substituteRegister(Def, SplitReg, 0, *TRI); 816 } 817 if (!SplitReg) 818 continue; 819 // Search through each of the epilog blocks for any uses to be renamed. 820 for (auto &Epilog : EpilogBBs) 821 for (auto &I : *Epilog) 822 if (I.readsRegister(Def)) 823 I.substituteRegister(Def, SplitReg, 0, *TRI); 824 break; 825 } 826 } 827 } 828 } 829 830 /// Remove the incoming block from the Phis in a basic block. 831 static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) { 832 for (MachineInstr &MI : *BB) { 833 if (!MI.isPHI()) 834 break; 835 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) 836 if (MI.getOperand(i + 1).getMBB() == Incoming) { 837 MI.RemoveOperand(i + 1); 838 MI.RemoveOperand(i); 839 break; 840 } 841 } 842 } 843 844 /// Create branches from each prolog basic block to the appropriate epilog 845 /// block. These edges are needed if the loop ends before reaching the 846 /// kernel. 847 void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, 848 MBBVectorTy &PrologBBs, 849 MachineBasicBlock *KernelBB, 850 MBBVectorTy &EpilogBBs, 851 ValueMapTy *VRMap) { 852 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch"); 853 MachineBasicBlock *LastPro = KernelBB; 854 MachineBasicBlock *LastEpi = KernelBB; 855 856 // Start from the blocks connected to the kernel and work "out" 857 // to the first prolog and the last epilog blocks. 858 SmallVector<MachineInstr *, 4> PrevInsts; 859 unsigned MaxIter = PrologBBs.size() - 1; 860 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) { 861 // Add branches to the prolog that go to the corresponding 862 // epilog, and the fall-thru prolog/kernel block. 863 MachineBasicBlock *Prolog = PrologBBs[j]; 864 MachineBasicBlock *Epilog = EpilogBBs[i]; 865 866 SmallVector<MachineOperand, 4> Cond; 867 Optional<bool> StaticallyGreater = 868 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond); 869 unsigned numAdded = 0; 870 if (!StaticallyGreater.hasValue()) { 871 Prolog->addSuccessor(Epilog); 872 numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc()); 873 } else if (*StaticallyGreater == false) { 874 Prolog->addSuccessor(Epilog); 875 Prolog->removeSuccessor(LastPro); 876 LastEpi->removeSuccessor(Epilog); 877 numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc()); 878 removePhis(Epilog, LastEpi); 879 // Remove the blocks that are no longer referenced. 880 if (LastPro != LastEpi) { 881 LastEpi->clear(); 882 LastEpi->eraseFromParent(); 883 } 884 if (LastPro == KernelBB) { 885 LoopInfo->disposed(); 886 NewKernel = nullptr; 887 } 888 LastPro->clear(); 889 LastPro->eraseFromParent(); 890 } else { 891 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc()); 892 removePhis(Epilog, Prolog); 893 } 894 LastPro = Prolog; 895 LastEpi = Epilog; 896 for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(), 897 E = Prolog->instr_rend(); 898 I != E && numAdded > 0; ++I, --numAdded) 899 updateInstruction(&*I, false, j, 0, VRMap); 900 } 901 902 if (NewKernel) { 903 LoopInfo->setPreheader(PrologBBs[MaxIter]); 904 LoopInfo->adjustTripCount(-(MaxIter + 1)); 905 } 906 } 907 908 /// Return true if we can compute the amount the instruction changes 909 /// during each iteration. Set Delta to the amount of the change. 910 bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) { 911 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 912 const MachineOperand *BaseOp; 913 int64_t Offset; 914 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) 915 return false; 916 917 if (!BaseOp->isReg()) 918 return false; 919 920 Register BaseReg = BaseOp->getReg(); 921 922 MachineRegisterInfo &MRI = MF.getRegInfo(); 923 // Check if there is a Phi. If so, get the definition in the loop. 924 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg); 925 if (BaseDef && BaseDef->isPHI()) { 926 BaseReg = getLoopPhiReg(*BaseDef, MI.getParent()); 927 BaseDef = MRI.getVRegDef(BaseReg); 928 } 929 if (!BaseDef) 930 return false; 931 932 int D = 0; 933 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0) 934 return false; 935 936 Delta = D; 937 return true; 938 } 939 940 /// Update the memory operand with a new offset when the pipeliner 941 /// generates a new copy of the instruction that refers to a 942 /// different memory location. 943 void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI, 944 MachineInstr &OldMI, 945 unsigned Num) { 946 if (Num == 0) 947 return; 948 // If the instruction has memory operands, then adjust the offset 949 // when the instruction appears in different stages. 950 if (NewMI.memoperands_empty()) 951 return; 952 SmallVector<MachineMemOperand *, 2> NewMMOs; 953 for (MachineMemOperand *MMO : NewMI.memoperands()) { 954 // TODO: Figure out whether isAtomic is really necessary (see D57601). 955 if (MMO->isVolatile() || MMO->isAtomic() || 956 (MMO->isInvariant() && MMO->isDereferenceable()) || 957 (!MMO->getValue())) { 958 NewMMOs.push_back(MMO); 959 continue; 960 } 961 unsigned Delta; 962 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) { 963 int64_t AdjOffset = Delta * Num; 964 NewMMOs.push_back( 965 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize())); 966 } else { 967 NewMMOs.push_back( 968 MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize)); 969 } 970 } 971 NewMI.setMemRefs(MF, NewMMOs); 972 } 973 974 /// Clone the instruction for the new pipelined loop and update the 975 /// memory operands, if needed. 976 MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI, 977 unsigned CurStageNum, 978 unsigned InstStageNum) { 979 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); 980 // Check for tied operands in inline asm instructions. This should be handled 981 // elsewhere, but I'm not sure of the best solution. 982 if (OldMI->isInlineAsm()) 983 for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) { 984 const auto &MO = OldMI->getOperand(i); 985 if (MO.isReg() && MO.isUse()) 986 break; 987 unsigned UseIdx; 988 if (OldMI->isRegTiedToUseOperand(i, &UseIdx)) 989 NewMI->tieOperands(i, UseIdx); 990 } 991 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); 992 return NewMI; 993 } 994 995 /// Clone the instruction for the new pipelined loop. If needed, this 996 /// function updates the instruction using the values saved in the 997 /// InstrChanges structure. 998 MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr( 999 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) { 1000 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); 1001 auto It = InstrChanges.find(OldMI); 1002 if (It != InstrChanges.end()) { 1003 std::pair<unsigned, int64_t> RegAndOffset = It->second; 1004 unsigned BasePos, OffsetPos; 1005 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos)) 1006 return nullptr; 1007 int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm(); 1008 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first); 1009 if (Schedule.getStage(LoopDef) > (signed)InstStageNum) 1010 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum); 1011 NewMI->getOperand(OffsetPos).setImm(NewOffset); 1012 } 1013 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); 1014 return NewMI; 1015 } 1016 1017 /// Update the machine instruction with new virtual registers. This 1018 /// function may change the defintions and/or uses. 1019 void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI, 1020 bool LastDef, 1021 unsigned CurStageNum, 1022 unsigned InstrStageNum, 1023 ValueMapTy *VRMap) { 1024 for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { 1025 MachineOperand &MO = NewMI->getOperand(i); 1026 if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) 1027 continue; 1028 Register reg = MO.getReg(); 1029 if (MO.isDef()) { 1030 // Create a new virtual register for the definition. 1031 const TargetRegisterClass *RC = MRI.getRegClass(reg); 1032 Register NewReg = MRI.createVirtualRegister(RC); 1033 MO.setReg(NewReg); 1034 VRMap[CurStageNum][reg] = NewReg; 1035 if (LastDef) 1036 replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS); 1037 } else if (MO.isUse()) { 1038 MachineInstr *Def = MRI.getVRegDef(reg); 1039 // Compute the stage that contains the last definition for instruction. 1040 int DefStageNum = Schedule.getStage(Def); 1041 unsigned StageNum = CurStageNum; 1042 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) { 1043 // Compute the difference in stages between the defintion and the use. 1044 unsigned StageDiff = (InstrStageNum - DefStageNum); 1045 // Make an adjustment to get the last definition. 1046 StageNum -= StageDiff; 1047 } 1048 if (VRMap[StageNum].count(reg)) 1049 MO.setReg(VRMap[StageNum][reg]); 1050 } 1051 } 1052 } 1053 1054 /// Return the instruction in the loop that defines the register. 1055 /// If the definition is a Phi, then follow the Phi operand to 1056 /// the instruction in the loop. 1057 MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) { 1058 SmallPtrSet<MachineInstr *, 8> Visited; 1059 MachineInstr *Def = MRI.getVRegDef(Reg); 1060 while (Def->isPHI()) { 1061 if (!Visited.insert(Def).second) 1062 break; 1063 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) 1064 if (Def->getOperand(i + 1).getMBB() == BB) { 1065 Def = MRI.getVRegDef(Def->getOperand(i).getReg()); 1066 break; 1067 } 1068 } 1069 return Def; 1070 } 1071 1072 /// Return the new name for the value from the previous stage. 1073 unsigned ModuloScheduleExpander::getPrevMapVal( 1074 unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage, 1075 ValueMapTy *VRMap, MachineBasicBlock *BB) { 1076 unsigned PrevVal = 0; 1077 if (StageNum > PhiStage) { 1078 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal); 1079 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal)) 1080 // The name is defined in the previous stage. 1081 PrevVal = VRMap[StageNum - 1][LoopVal]; 1082 else if (VRMap[StageNum].count(LoopVal)) 1083 // The previous name is defined in the current stage when the instruction 1084 // order is swapped. 1085 PrevVal = VRMap[StageNum][LoopVal]; 1086 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB) 1087 // The loop value hasn't yet been scheduled. 1088 PrevVal = LoopVal; 1089 else if (StageNum == PhiStage + 1) 1090 // The loop value is another phi, which has not been scheduled. 1091 PrevVal = getInitPhiReg(*LoopInst, BB); 1092 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB) 1093 // The loop value is another phi, which has been scheduled. 1094 PrevVal = 1095 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB), 1096 LoopStage, VRMap, BB); 1097 } 1098 return PrevVal; 1099 } 1100 1101 /// Rewrite the Phi values in the specified block to use the mappings 1102 /// from the initial operand. Once the Phi is scheduled, we switch 1103 /// to using the loop value instead of the Phi value, so those names 1104 /// do not need to be rewritten. 1105 void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB, 1106 unsigned StageNum, 1107 ValueMapTy *VRMap, 1108 InstrMapTy &InstrMap) { 1109 for (auto &PHI : BB->phis()) { 1110 unsigned InitVal = 0; 1111 unsigned LoopVal = 0; 1112 getPhiRegs(PHI, BB, InitVal, LoopVal); 1113 Register PhiDef = PHI.getOperand(0).getReg(); 1114 1115 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef)); 1116 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal)); 1117 unsigned NumPhis = getStagesForPhi(PhiDef); 1118 if (NumPhis > StageNum) 1119 NumPhis = StageNum; 1120 for (unsigned np = 0; np <= NumPhis; ++np) { 1121 unsigned NewVal = 1122 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB); 1123 if (!NewVal) 1124 NewVal = InitVal; 1125 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef, 1126 NewVal); 1127 } 1128 } 1129 } 1130 1131 /// Rewrite a previously scheduled instruction to use the register value 1132 /// from the new instruction. Make sure the instruction occurs in the 1133 /// basic block, and we don't change the uses in the new instruction. 1134 void ModuloScheduleExpander::rewriteScheduledInstr( 1135 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum, 1136 unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg, 1137 unsigned PrevReg) { 1138 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1); 1139 int StagePhi = Schedule.getStage(Phi) + PhiNum; 1140 // Rewrite uses that have been scheduled already to use the new 1141 // Phi register. 1142 for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg), 1143 EI = MRI.use_end(); 1144 UI != EI;) { 1145 MachineOperand &UseOp = *UI; 1146 MachineInstr *UseMI = UseOp.getParent(); 1147 ++UI; 1148 if (UseMI->getParent() != BB) 1149 continue; 1150 if (UseMI->isPHI()) { 1151 if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg) 1152 continue; 1153 if (getLoopPhiReg(*UseMI, BB) != OldReg) 1154 continue; 1155 } 1156 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI); 1157 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled."); 1158 MachineInstr *OrigMI = OrigInstr->second; 1159 int StageSched = Schedule.getStage(OrigMI); 1160 int CycleSched = Schedule.getCycle(OrigMI); 1161 unsigned ReplaceReg = 0; 1162 // This is the stage for the scheduled instruction. 1163 if (StagePhi == StageSched && Phi->isPHI()) { 1164 int CyclePhi = Schedule.getCycle(Phi); 1165 if (PrevReg && InProlog) 1166 ReplaceReg = PrevReg; 1167 else if (PrevReg && !isLoopCarried(*Phi) && 1168 (CyclePhi <= CycleSched || OrigMI->isPHI())) 1169 ReplaceReg = PrevReg; 1170 else 1171 ReplaceReg = NewReg; 1172 } 1173 // The scheduled instruction occurs before the scheduled Phi, and the 1174 // Phi is not loop carried. 1175 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi)) 1176 ReplaceReg = NewReg; 1177 if (StagePhi > StageSched && Phi->isPHI()) 1178 ReplaceReg = NewReg; 1179 if (!InProlog && !Phi->isPHI() && StagePhi < StageSched) 1180 ReplaceReg = NewReg; 1181 if (ReplaceReg) { 1182 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); 1183 UseOp.setReg(ReplaceReg); 1184 } 1185 } 1186 } 1187 1188 bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) { 1189 if (!Phi.isPHI()) 1190 return false; 1191 unsigned DefCycle = Schedule.getCycle(&Phi); 1192 int DefStage = Schedule.getStage(&Phi); 1193 1194 unsigned InitVal = 0; 1195 unsigned LoopVal = 0; 1196 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal); 1197 MachineInstr *Use = MRI.getVRegDef(LoopVal); 1198 if (!Use || Use->isPHI()) 1199 return true; 1200 unsigned LoopCycle = Schedule.getCycle(Use); 1201 int LoopStage = Schedule.getStage(Use); 1202 return (LoopCycle > DefCycle) || (LoopStage <= DefStage); 1203 } 1204 1205 //===----------------------------------------------------------------------===// 1206 // PeelingModuloScheduleExpander implementation 1207 //===----------------------------------------------------------------------===// 1208 // This is a reimplementation of ModuloScheduleExpander that works by creating 1209 // a fully correct steady-state kernel and peeling off the prolog and epilogs. 1210 //===----------------------------------------------------------------------===// 1211 1212 namespace { 1213 // Remove any dead phis in MBB. Dead phis either have only one block as input 1214 // (in which case they are the identity) or have no uses. 1215 void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI, 1216 LiveIntervals *LIS) { 1217 bool Changed = true; 1218 while (Changed) { 1219 Changed = false; 1220 for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) { 1221 MachineInstr &MI = *I++; 1222 assert(MI.isPHI()); 1223 if (MRI.use_empty(MI.getOperand(0).getReg())) { 1224 if (LIS) 1225 LIS->RemoveMachineInstrFromMaps(MI); 1226 MI.eraseFromParent(); 1227 Changed = true; 1228 } else if (MI.getNumExplicitOperands() == 3) { 1229 MRI.constrainRegClass(MI.getOperand(1).getReg(), 1230 MRI.getRegClass(MI.getOperand(0).getReg())); 1231 MRI.replaceRegWith(MI.getOperand(0).getReg(), 1232 MI.getOperand(1).getReg()); 1233 if (LIS) 1234 LIS->RemoveMachineInstrFromMaps(MI); 1235 MI.eraseFromParent(); 1236 Changed = true; 1237 } 1238 } 1239 } 1240 } 1241 1242 /// Rewrites the kernel block in-place to adhere to the given schedule. 1243 /// KernelRewriter holds all of the state required to perform the rewriting. 1244 class KernelRewriter { 1245 ModuloSchedule &S; 1246 MachineBasicBlock *BB; 1247 MachineBasicBlock *PreheaderBB, *ExitBB; 1248 MachineRegisterInfo &MRI; 1249 const TargetInstrInfo *TII; 1250 LiveIntervals *LIS; 1251 1252 // Map from register class to canonical undef register for that class. 1253 DenseMap<const TargetRegisterClass *, Register> Undefs; 1254 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that 1255 // this map is only used when InitReg is non-undef. 1256 DenseMap<std::pair<unsigned, unsigned>, Register> Phis; 1257 // Map from LoopReg to phi register where the InitReg is undef. 1258 DenseMap<Register, Register> UndefPhis; 1259 1260 // Reg is used by MI. Return the new register MI should use to adhere to the 1261 // schedule. Insert phis as necessary. 1262 Register remapUse(Register Reg, MachineInstr &MI); 1263 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise. 1264 // If InitReg is not given it is chosen arbitrarily. It will either be undef 1265 // or will be chosen so as to share another phi. 1266 Register phi(Register LoopReg, Optional<Register> InitReg = {}, 1267 const TargetRegisterClass *RC = nullptr); 1268 // Create an undef register of the given register class. 1269 Register undef(const TargetRegisterClass *RC); 1270 1271 public: 1272 KernelRewriter(MachineLoop &L, ModuloSchedule &S, 1273 LiveIntervals *LIS = nullptr); 1274 void rewrite(); 1275 }; 1276 } // namespace 1277 1278 KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S, 1279 LiveIntervals *LIS) 1280 : S(S), BB(L.getTopBlock()), PreheaderBB(L.getLoopPreheader()), 1281 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()), 1282 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) { 1283 PreheaderBB = *BB->pred_begin(); 1284 if (PreheaderBB == BB) 1285 PreheaderBB = *std::next(BB->pred_begin()); 1286 } 1287 1288 void KernelRewriter::rewrite() { 1289 // Rearrange the loop to be in schedule order. Note that the schedule may 1290 // contain instructions that are not owned by the loop block (InstrChanges and 1291 // friends), so we gracefully handle unowned instructions and delete any 1292 // instructions that weren't in the schedule. 1293 auto InsertPt = BB->getFirstTerminator(); 1294 MachineInstr *FirstMI = nullptr; 1295 for (MachineInstr *MI : S.getInstructions()) { 1296 if (MI->isPHI()) 1297 continue; 1298 if (MI->getParent()) 1299 MI->removeFromParent(); 1300 BB->insert(InsertPt, MI); 1301 if (!FirstMI) 1302 FirstMI = MI; 1303 } 1304 assert(FirstMI && "Failed to find first MI in schedule"); 1305 1306 // At this point all of the scheduled instructions are between FirstMI 1307 // and the end of the block. Kill from the first non-phi to FirstMI. 1308 for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) { 1309 if (LIS) 1310 LIS->RemoveMachineInstrFromMaps(*I); 1311 (I++)->eraseFromParent(); 1312 } 1313 1314 // Now remap every instruction in the loop. 1315 for (MachineInstr &MI : *BB) { 1316 if (MI.isPHI()) 1317 continue; 1318 for (MachineOperand &MO : MI.uses()) { 1319 if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit()) 1320 continue; 1321 Register Reg = remapUse(MO.getReg(), MI); 1322 MO.setReg(Reg); 1323 } 1324 } 1325 EliminateDeadPhis(BB, MRI, LIS); 1326 1327 // Ensure a phi exists for all instructions that are either referenced by 1328 // an illegal phi or by an instruction outside the loop. This allows us to 1329 // treat remaps of these values the same as "normal" values that come from 1330 // loop-carried phis. 1331 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) { 1332 if (MI->isPHI()) { 1333 Register R = MI->getOperand(0).getReg(); 1334 phi(R); 1335 continue; 1336 } 1337 1338 for (MachineOperand &Def : MI->defs()) { 1339 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) { 1340 if (MI.getParent() != BB) { 1341 phi(Def.getReg()); 1342 break; 1343 } 1344 } 1345 } 1346 } 1347 } 1348 1349 Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { 1350 MachineInstr *Producer = MRI.getUniqueVRegDef(Reg); 1351 if (!Producer) 1352 return Reg; 1353 1354 int ConsumerStage = S.getStage(&MI); 1355 if (!Producer->isPHI()) { 1356 // Non-phi producers are simple to remap. Insert as many phis as the 1357 // difference between the consumer and producer stages. 1358 if (Producer->getParent() != BB) 1359 // Producer was not inside the loop. Use the register as-is. 1360 return Reg; 1361 int ProducerStage = S.getStage(Producer); 1362 assert(ConsumerStage != -1 && 1363 "In-loop consumer should always be scheduled!"); 1364 assert(ConsumerStage >= ProducerStage); 1365 unsigned StageDiff = ConsumerStage - ProducerStage; 1366 1367 for (unsigned I = 0; I < StageDiff; ++I) 1368 Reg = phi(Reg); 1369 return Reg; 1370 } 1371 1372 // First, dive through the phi chain to find the defaults for the generated 1373 // phis. 1374 SmallVector<Optional<Register>, 4> Defaults; 1375 Register LoopReg = Reg; 1376 auto LoopProducer = Producer; 1377 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) { 1378 LoopReg = getLoopPhiReg(*LoopProducer, BB); 1379 Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB)); 1380 LoopProducer = MRI.getUniqueVRegDef(LoopReg); 1381 assert(LoopProducer); 1382 } 1383 int LoopProducerStage = S.getStage(LoopProducer); 1384 1385 Optional<Register> IllegalPhiDefault; 1386 1387 if (LoopProducerStage == -1) { 1388 // Do nothing. 1389 } else if (LoopProducerStage > ConsumerStage) { 1390 // This schedule is only representable if ProducerStage == ConsumerStage+1. 1391 // In addition, Consumer's cycle must be scheduled after Producer in the 1392 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP 1393 // functions. 1394 #ifndef NDEBUG // Silence unused variables in non-asserts mode. 1395 int LoopProducerCycle = S.getCycle(LoopProducer); 1396 int ConsumerCycle = S.getCycle(&MI); 1397 #endif 1398 assert(LoopProducerCycle <= ConsumerCycle); 1399 assert(LoopProducerStage == ConsumerStage + 1); 1400 // Peel off the first phi from Defaults and insert a phi between producer 1401 // and consumer. This phi will not be at the front of the block so we 1402 // consider it illegal. It will only exist during the rewrite process; it 1403 // needs to exist while we peel off prologs because these could take the 1404 // default value. After that we can replace all uses with the loop producer 1405 // value. 1406 IllegalPhiDefault = Defaults.front(); 1407 Defaults.erase(Defaults.begin()); 1408 } else { 1409 assert(ConsumerStage >= LoopProducerStage); 1410 int StageDiff = ConsumerStage - LoopProducerStage; 1411 if (StageDiff > 0) { 1412 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size() 1413 << " to " << (Defaults.size() + StageDiff) << "\n"); 1414 // If we need more phis than we have defaults for, pad out with undefs for 1415 // the earliest phis, which are at the end of the defaults chain (the 1416 // chain is in reverse order). 1417 Defaults.resize(Defaults.size() + StageDiff, Defaults.empty() 1418 ? Optional<Register>() 1419 : Defaults.back()); 1420 } 1421 } 1422 1423 // Now we know the number of stages to jump back, insert the phi chain. 1424 auto DefaultI = Defaults.rbegin(); 1425 while (DefaultI != Defaults.rend()) 1426 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg)); 1427 1428 if (IllegalPhiDefault.hasValue()) { 1429 // The consumer optionally consumes LoopProducer in the same iteration 1430 // (because the producer is scheduled at an earlier cycle than the consumer) 1431 // or the initial value. To facilitate this we create an illegal block here 1432 // by embedding a phi in the middle of the block. We will fix this up 1433 // immediately prior to pruning. 1434 auto RC = MRI.getRegClass(Reg); 1435 Register R = MRI.createVirtualRegister(RC); 1436 BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R) 1437 .addReg(IllegalPhiDefault.getValue()) 1438 .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect. 1439 .addReg(LoopReg) 1440 .addMBB(BB); // Block choice is arbitrary and has no effect. 1441 return R; 1442 } 1443 1444 return LoopReg; 1445 } 1446 1447 Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, 1448 const TargetRegisterClass *RC) { 1449 // If the init register is not undef, try and find an existing phi. 1450 if (InitReg.hasValue()) { 1451 auto I = Phis.find({LoopReg, InitReg.getValue()}); 1452 if (I != Phis.end()) 1453 return I->second; 1454 } else { 1455 for (auto &KV : Phis) { 1456 if (KV.first.first == LoopReg) 1457 return KV.second; 1458 } 1459 } 1460 1461 // InitReg is either undef or no existing phi takes InitReg as input. Try and 1462 // find a phi that takes undef as input. 1463 auto I = UndefPhis.find(LoopReg); 1464 if (I != UndefPhis.end()) { 1465 Register R = I->second; 1466 if (!InitReg.hasValue()) 1467 // Found a phi taking undef as input, and this input is undef so return 1468 // without any more changes. 1469 return R; 1470 // Found a phi taking undef as input, so rewrite it to take InitReg. 1471 MachineInstr *MI = MRI.getVRegDef(R); 1472 MI->getOperand(1).setReg(InitReg.getValue()); 1473 Phis.insert({{LoopReg, InitReg.getValue()}, R}); 1474 MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue())); 1475 UndefPhis.erase(I); 1476 return R; 1477 } 1478 1479 // Failed to find any existing phi to reuse, so create a new one. 1480 if (!RC) 1481 RC = MRI.getRegClass(LoopReg); 1482 Register R = MRI.createVirtualRegister(RC); 1483 if (InitReg.hasValue()) 1484 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg)); 1485 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R) 1486 .addReg(InitReg.hasValue() ? *InitReg : undef(RC)) 1487 .addMBB(PreheaderBB) 1488 .addReg(LoopReg) 1489 .addMBB(BB); 1490 if (!InitReg.hasValue()) 1491 UndefPhis[LoopReg] = R; 1492 else 1493 Phis[{LoopReg, *InitReg}] = R; 1494 return R; 1495 } 1496 1497 Register KernelRewriter::undef(const TargetRegisterClass *RC) { 1498 Register &R = Undefs[RC]; 1499 if (R == 0) { 1500 // Create an IMPLICIT_DEF that defines this register if we need it. 1501 // All uses of this should be removed by the time we have finished unrolling 1502 // prologs and epilogs. 1503 R = MRI.createVirtualRegister(RC); 1504 auto *InsertBB = &PreheaderBB->getParent()->front(); 1505 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(), 1506 TII->get(TargetOpcode::IMPLICIT_DEF), R); 1507 } 1508 return R; 1509 } 1510 1511 namespace { 1512 /// Describes an operand in the kernel of a pipelined loop. Characteristics of 1513 /// the operand are discovered, such as how many in-loop PHIs it has to jump 1514 /// through and defaults for these phis. 1515 class KernelOperandInfo { 1516 MachineBasicBlock *BB; 1517 MachineRegisterInfo &MRI; 1518 SmallVector<Register, 4> PhiDefaults; 1519 MachineOperand *Source; 1520 MachineOperand *Target; 1521 1522 public: 1523 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI, 1524 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis) 1525 : MRI(MRI) { 1526 Source = MO; 1527 BB = MO->getParent()->getParent(); 1528 while (isRegInLoop(MO)) { 1529 MachineInstr *MI = MRI.getVRegDef(MO->getReg()); 1530 if (MI->isFullCopy()) { 1531 MO = &MI->getOperand(1); 1532 continue; 1533 } 1534 if (!MI->isPHI()) 1535 break; 1536 // If this is an illegal phi, don't count it in distance. 1537 if (IllegalPhis.count(MI)) { 1538 MO = &MI->getOperand(3); 1539 continue; 1540 } 1541 1542 Register Default = getInitPhiReg(*MI, BB); 1543 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1) 1544 : &MI->getOperand(3); 1545 PhiDefaults.push_back(Default); 1546 } 1547 Target = MO; 1548 } 1549 1550 bool operator==(const KernelOperandInfo &Other) const { 1551 return PhiDefaults.size() == Other.PhiDefaults.size(); 1552 } 1553 1554 void print(raw_ostream &OS) const { 1555 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in " 1556 << *Source->getParent(); 1557 } 1558 1559 private: 1560 bool isRegInLoop(MachineOperand *MO) { 1561 return MO->isReg() && MO->getReg().isVirtual() && 1562 MRI.getVRegDef(MO->getReg())->getParent() == BB; 1563 } 1564 }; 1565 } // namespace 1566 1567 void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() { 1568 BB = Schedule.getLoop()->getTopBlock(); 1569 Preheader = Schedule.getLoop()->getLoopPreheader(); 1570 1571 // Dump the schedule before we invalidate and remap all its instructions. 1572 // Stash it in a string so we can print it if we found an error. 1573 std::string ScheduleDump; 1574 raw_string_ostream OS(ScheduleDump); 1575 Schedule.print(OS); 1576 OS.flush(); 1577 1578 // First, run the normal ModuleScheduleExpander. We don't support any 1579 // InstrChanges. 1580 assert(LIS && "Requires LiveIntervals!"); 1581 ModuloScheduleExpander MSE(MF, Schedule, *LIS, 1582 ModuloScheduleExpander::InstrChangesTy()); 1583 MSE.expand(); 1584 MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel(); 1585 if (!ExpandedKernel) { 1586 // The expander optimized away the kernel. We can't do any useful checking. 1587 MSE.cleanup(); 1588 return; 1589 } 1590 // Before running the KernelRewriter, re-add BB into the CFG. 1591 Preheader->addSuccessor(BB); 1592 1593 // Now run the new expansion algorithm. 1594 KernelRewriter KR(*Schedule.getLoop(), Schedule); 1595 KR.rewrite(); 1596 1597 // Collect all illegal phis that the new algorithm created. We'll give these 1598 // to KernelOperandInfo. 1599 SmallPtrSet<MachineInstr *, 4> IllegalPhis; 1600 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) { 1601 if (NI->isPHI()) 1602 IllegalPhis.insert(&*NI); 1603 } 1604 1605 // Co-iterate across both kernels. We expect them to be identical apart from 1606 // phis and full COPYs (we look through both). 1607 SmallVector<std::pair<KernelOperandInfo, KernelOperandInfo>, 8> KOIs; 1608 auto OI = ExpandedKernel->begin(); 1609 auto NI = BB->begin(); 1610 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) { 1611 while (OI->isPHI() || OI->isFullCopy()) 1612 ++OI; 1613 while (NI->isPHI() || NI->isFullCopy()) 1614 ++NI; 1615 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!"); 1616 // Analyze every operand separately. 1617 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin(); 1618 OOpI != OI->operands_end(); ++OOpI, ++NOpI) 1619 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis), 1620 KernelOperandInfo(&*NOpI, MRI, IllegalPhis)); 1621 } 1622 1623 bool Failed = false; 1624 for (auto &OldAndNew : KOIs) { 1625 if (OldAndNew.first == OldAndNew.second) 1626 continue; 1627 Failed = true; 1628 errs() << "Modulo kernel validation error: [\n"; 1629 errs() << " [golden] "; 1630 OldAndNew.first.print(errs()); 1631 errs() << " "; 1632 OldAndNew.second.print(errs()); 1633 errs() << "]\n"; 1634 } 1635 1636 if (Failed) { 1637 errs() << "Golden reference kernel:\n"; 1638 ExpandedKernel->print(errs()); 1639 errs() << "New kernel:\n"; 1640 BB->print(errs()); 1641 errs() << ScheduleDump; 1642 report_fatal_error( 1643 "Modulo kernel validation (-pipeliner-experimental-cg) failed"); 1644 } 1645 1646 // Cleanup by removing BB from the CFG again as the original 1647 // ModuloScheduleExpander intended. 1648 Preheader->removeSuccessor(BB); 1649 MSE.cleanup(); 1650 } 1651 1652 //===----------------------------------------------------------------------===// 1653 // ModuloScheduleTestPass implementation 1654 //===----------------------------------------------------------------------===// 1655 // This pass constructs a ModuloSchedule from its module and runs 1656 // ModuloScheduleExpander. 1657 // 1658 // The module is expected to contain a single-block analyzable loop. 1659 // The total order of instructions is taken from the loop as-is. 1660 // Instructions are expected to be annotated with a PostInstrSymbol. 1661 // This PostInstrSymbol must have the following format: 1662 // "Stage=%d Cycle=%d". 1663 //===----------------------------------------------------------------------===// 1664 1665 namespace { 1666 class ModuloScheduleTest : public MachineFunctionPass { 1667 public: 1668 static char ID; 1669 1670 ModuloScheduleTest() : MachineFunctionPass(ID) { 1671 initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry()); 1672 } 1673 1674 bool runOnMachineFunction(MachineFunction &MF) override; 1675 void runOnLoop(MachineFunction &MF, MachineLoop &L); 1676 1677 void getAnalysisUsage(AnalysisUsage &AU) const override { 1678 AU.addRequired<MachineLoopInfo>(); 1679 AU.addRequired<LiveIntervals>(); 1680 MachineFunctionPass::getAnalysisUsage(AU); 1681 } 1682 }; 1683 } // namespace 1684 1685 char ModuloScheduleTest::ID = 0; 1686 1687 INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test", 1688 "Modulo Schedule test pass", false, false) 1689 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 1690 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 1691 INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test", 1692 "Modulo Schedule test pass", false, false) 1693 1694 bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) { 1695 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 1696 for (auto *L : MLI) { 1697 if (L->getTopBlock() != L->getBottomBlock()) 1698 continue; 1699 runOnLoop(MF, *L); 1700 return false; 1701 } 1702 return false; 1703 } 1704 1705 static void parseSymbolString(StringRef S, int &Cycle, int &Stage) { 1706 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_"); 1707 std::pair<StringRef, StringRef> StageTokenAndValue = 1708 getToken(StageAndCycle.first, "-"); 1709 std::pair<StringRef, StringRef> CycleTokenAndValue = 1710 getToken(StageAndCycle.second, "-"); 1711 if (StageTokenAndValue.first != "Stage" || 1712 CycleTokenAndValue.first != "_Cycle") { 1713 llvm_unreachable( 1714 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest"); 1715 return; 1716 } 1717 1718 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage); 1719 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle); 1720 1721 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n"; 1722 } 1723 1724 void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) { 1725 LiveIntervals &LIS = getAnalysis<LiveIntervals>(); 1726 MachineBasicBlock *BB = L.getTopBlock(); 1727 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n"; 1728 1729 DenseMap<MachineInstr *, int> Cycle, Stage; 1730 std::vector<MachineInstr *> Instrs; 1731 for (MachineInstr &MI : *BB) { 1732 if (MI.isTerminator()) 1733 continue; 1734 Instrs.push_back(&MI); 1735 if (MCSymbol *Sym = MI.getPostInstrSymbol()) { 1736 dbgs() << "Parsing post-instr symbol for " << MI; 1737 parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]); 1738 } 1739 } 1740 1741 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle), 1742 std::move(Stage)); 1743 ModuloScheduleExpander MSE( 1744 MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy()); 1745 MSE.expand(); 1746 MSE.cleanup(); 1747 } 1748 1749 //===----------------------------------------------------------------------===// 1750 // ModuloScheduleTestAnnotater implementation 1751 //===----------------------------------------------------------------------===// 1752 1753 void ModuloScheduleTestAnnotater::annotate() { 1754 for (MachineInstr *MI : S.getInstructions()) { 1755 SmallVector<char, 16> SV; 1756 raw_svector_ostream OS(SV); 1757 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI); 1758 MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str()); 1759 MI->setPostInstrSymbol(MF, Sym); 1760 } 1761 } 1762