1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This is the LLVM vectorization plan. It represents a candidate for 12 /// vectorization, allowing to plan and optimize how to vectorize a given loop 13 /// before generating LLVM-IR. 14 /// The vectorizer uses vectorization plans to estimate the costs of potential 15 /// candidates and if profitable to execute the desired plan, generating vector 16 /// LLVM-IR code. 17 /// 18 //===----------------------------------------------------------------------===// 19 20 #include "VPlan.h" 21 #include "llvm/ADT/PostOrderIterator.h" 22 #include "llvm/Analysis/LoopInfo.h" 23 #include "llvm/IR/BasicBlock.h" 24 #include "llvm/IR/Dominators.h" 25 #include "llvm/Support/GraphWriter.h" 26 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "vplan" 31 32 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly. 33 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { 34 const VPBlockBase *Block = this; 35 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 36 Block = Region->getEntry(); 37 return cast<VPBasicBlock>(Block); 38 } 39 40 VPBasicBlock *VPBlockBase::getEntryBasicBlock() { 41 VPBlockBase *Block = this; 42 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 43 Block = Region->getEntry(); 44 return cast<VPBasicBlock>(Block); 45 } 46 47 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. 48 const VPBasicBlock *VPBlockBase::getExitBasicBlock() const { 49 const VPBlockBase *Block = this; 50 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 51 Block = Region->getExit(); 52 return cast<VPBasicBlock>(Block); 53 } 54 55 VPBasicBlock *VPBlockBase::getExitBasicBlock() { 56 VPBlockBase *Block = this; 57 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 58 Block = Region->getExit(); 59 return cast<VPBasicBlock>(Block); 60 } 61 62 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { 63 if (!Successors.empty() || !Parent) 64 return this; 65 assert(Parent->getExit() == this && 66 "Block w/o successors not the exit of its parent."); 67 return Parent->getEnclosingBlockWithSuccessors(); 68 } 69 70 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { 71 if (!Predecessors.empty() || !Parent) 72 return this; 73 assert(Parent->getEntry() == this && 74 "Block w/o predecessors not the entry of its parent."); 75 return Parent->getEnclosingBlockWithPredecessors(); 76 } 77 78 void VPBlockBase::deleteCFG(VPBlockBase *Entry) { 79 SmallVector<VPBlockBase *, 8> Blocks; 80 for (VPBlockBase *Block : depth_first(Entry)) 81 Blocks.push_back(Block); 82 83 for (VPBlockBase *Block : Blocks) 84 delete Block; 85 } 86 87 BasicBlock * 88 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { 89 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. 90 // Pred stands for Predessor. Prev stands for Previous - last visited/created. 91 BasicBlock *PrevBB = CFG.PrevBB; 92 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), 93 PrevBB->getParent(), CFG.LastBB); 94 DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); 95 96 // Hook up the new basic block to its predecessors. 97 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { 98 VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); 99 auto &PredVPSuccessors = PredVPBB->getSuccessors(); 100 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; 101 assert(PredBB && "Predecessor basic-block not found building successor."); 102 auto *PredBBTerminator = PredBB->getTerminator(); 103 DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); 104 if (isa<UnreachableInst>(PredBBTerminator)) { 105 assert(PredVPSuccessors.size() == 1 && 106 "Predecessor ending w/o branch must have single successor."); 107 PredBBTerminator->eraseFromParent(); 108 BranchInst::Create(NewBB, PredBB); 109 } else { 110 assert(PredVPSuccessors.size() == 2 && 111 "Predecessor ending with branch must have two successors."); 112 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; 113 assert(!PredBBTerminator->getSuccessor(idx) && 114 "Trying to reset an existing successor block."); 115 PredBBTerminator->setSuccessor(idx, NewBB); 116 } 117 } 118 return NewBB; 119 } 120 121 void VPBasicBlock::execute(VPTransformState *State) { 122 bool Replica = State->Instance && 123 !(State->Instance->Part == 0 && State->Instance->Lane == 0); 124 VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; 125 VPBlockBase *SingleHPred = nullptr; 126 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. 127 128 // 1. Create an IR basic block, or reuse the last one if possible. 129 // The last IR basic block is reused, as an optimization, in three cases: 130 // A. the first VPBB reuses the loop header BB - when PrevVPBB is null; 131 // B. when the current VPBB has a single (hierarchical) predecessor which 132 // is PrevVPBB and the latter has a single (hierarchical) successor; and 133 // C. when the current VPBB is an entry of a region replica - where PrevVPBB 134 // is the exit of this region from a previous instance, or the predecessor 135 // of this region. 136 if (PrevVPBB && /* A */ 137 !((SingleHPred = getSingleHierarchicalPredecessor()) && 138 SingleHPred->getExitBasicBlock() == PrevVPBB && 139 PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */ 140 !(Replica && getPredecessors().empty())) { /* C */ 141 142 NewBB = createEmptyBasicBlock(State->CFG); 143 State->Builder.SetInsertPoint(NewBB); 144 // Temporarily terminate with unreachable until CFG is rewired. 145 UnreachableInst *Terminator = State->Builder.CreateUnreachable(); 146 State->Builder.SetInsertPoint(Terminator); 147 // Register NewBB in its loop. In innermost loops its the same for all BB's. 148 Loop *L = State->LI->getLoopFor(State->CFG.LastBB); 149 L->addBasicBlockToLoop(NewBB, *State->LI); 150 State->CFG.PrevBB = NewBB; 151 } 152 153 // 2. Fill the IR basic block with IR instructions. 154 DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() 155 << " in BB:" << NewBB->getName() << '\n'); 156 157 State->CFG.VPBB2IRBB[this] = NewBB; 158 State->CFG.PrevVPBB = this; 159 160 for (VPRecipeBase &Recipe : Recipes) 161 Recipe.execute(*State); 162 163 DEBUG(dbgs() << "LV: filled BB:" << *NewBB); 164 } 165 166 void VPRegionBlock::execute(VPTransformState *State) { 167 ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry); 168 169 if (!isReplicator()) { 170 // Visit the VPBlocks connected to "this", starting from it. 171 for (VPBlockBase *Block : RPOT) { 172 DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 173 Block->execute(State); 174 } 175 return; 176 } 177 178 assert(!State->Instance && "Replicating a Region with non-null instance."); 179 180 // Enter replicating mode. 181 State->Instance = {0, 0}; 182 183 for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { 184 State->Instance->Part = Part; 185 for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) { 186 State->Instance->Lane = Lane; 187 // Visit the VPBlocks connected to \p this, starting from it. 188 for (VPBlockBase *Block : RPOT) { 189 DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 190 Block->execute(State); 191 } 192 } 193 } 194 195 // Exit replicating mode. 196 State->Instance.reset(); 197 } 198 199 /// Generate the code inside the body of the vectorized loop. Assumes a single 200 /// LoopVectorBody basic-block was created for this. Introduce additional 201 /// basic-blocks as needed, and fill them all. 202 void VPlan::execute(VPTransformState *State) { 203 BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB; 204 BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor(); 205 assert(VectorHeaderBB && "Loop preheader does not have a single successor."); 206 BasicBlock *VectorLatchBB = VectorHeaderBB; 207 208 // 1. Make room to generate basic-blocks inside loop body if needed. 209 VectorLatchBB = VectorHeaderBB->splitBasicBlock( 210 VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch"); 211 Loop *L = State->LI->getLoopFor(VectorHeaderBB); 212 L->addBasicBlockToLoop(VectorLatchBB, *State->LI); 213 // Remove the edge between Header and Latch to allow other connections. 214 // Temporarily terminate with unreachable until CFG is rewired. 215 // Note: this asserts the generated code's assumption that 216 // getFirstInsertionPt() can be dereferenced into an Instruction. 217 VectorHeaderBB->getTerminator()->eraseFromParent(); 218 State->Builder.SetInsertPoint(VectorHeaderBB); 219 UnreachableInst *Terminator = State->Builder.CreateUnreachable(); 220 State->Builder.SetInsertPoint(Terminator); 221 222 // 2. Generate code in loop body. 223 State->CFG.PrevVPBB = nullptr; 224 State->CFG.PrevBB = VectorHeaderBB; 225 State->CFG.LastBB = VectorLatchBB; 226 227 for (VPBlockBase *Block : depth_first(Entry)) 228 Block->execute(State); 229 230 // 3. Merge the temporary latch created with the last basic-block filled. 231 BasicBlock *LastBB = State->CFG.PrevBB; 232 // Connect LastBB to VectorLatchBB to facilitate their merge. 233 assert(isa<UnreachableInst>(LastBB->getTerminator()) && 234 "Expected VPlan CFG to terminate with unreachable"); 235 LastBB->getTerminator()->eraseFromParent(); 236 BranchInst::Create(VectorLatchBB, LastBB); 237 238 // Merge LastBB with Latch. 239 bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI); 240 (void)Merged; 241 assert(Merged && "Could not merge last basic block with latch."); 242 VectorLatchBB = LastBB; 243 244 updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); 245 } 246 247 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, 248 BasicBlock *LoopLatchBB) { 249 BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor(); 250 assert(LoopHeaderBB && "Loop preheader does not have a single successor."); 251 DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB); 252 // The vector body may be more than a single basic-block by this point. 253 // Update the dominator tree information inside the vector body by propagating 254 // it from header to latch, expecting only triangular control-flow, if any. 255 BasicBlock *PostDomSucc = nullptr; 256 for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) { 257 // Get the list of successors of this block. 258 std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB)); 259 assert(Succs.size() <= 2 && 260 "Basic block in vector loop has more than 2 successors."); 261 PostDomSucc = Succs[0]; 262 if (Succs.size() == 1) { 263 assert(PostDomSucc->getSinglePredecessor() && 264 "PostDom successor has more than one predecessor."); 265 DT->addNewBlock(PostDomSucc, BB); 266 continue; 267 } 268 BasicBlock *InterimSucc = Succs[1]; 269 if (PostDomSucc->getSingleSuccessor() == InterimSucc) { 270 PostDomSucc = Succs[1]; 271 InterimSucc = Succs[0]; 272 } 273 assert(InterimSucc->getSingleSuccessor() == PostDomSucc && 274 "One successor of a basic block does not lead to the other."); 275 assert(InterimSucc->getSinglePredecessor() && 276 "Interim successor has more than one predecessor."); 277 assert(std::distance(pred_begin(PostDomSucc), pred_end(PostDomSucc)) == 2 && 278 "PostDom successor has more than two predecessors."); 279 DT->addNewBlock(InterimSucc, BB); 280 DT->addNewBlock(PostDomSucc, BB); 281 } 282 } 283 284 const Twine VPlanPrinter::getUID(const VPBlockBase *Block) { 285 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") + 286 Twine(getOrCreateBID(Block)); 287 } 288 289 const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { 290 const std::string &Name = Block->getName(); 291 if (!Name.empty()) 292 return Name; 293 return "VPB" + Twine(getOrCreateBID(Block)); 294 } 295 296 void VPlanPrinter::dump() { 297 Depth = 1; 298 bumpIndent(0); 299 OS << "digraph VPlan {\n"; 300 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; 301 if (!Plan.getName().empty()) 302 OS << "\\n" << DOT::EscapeString(Plan.getName()); 303 OS << "\"]\n"; 304 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n"; 305 OS << "edge [fontname=Courier, fontsize=30]\n"; 306 OS << "compound=true\n"; 307 308 for (VPBlockBase *Block : depth_first(Plan.getEntry())) 309 dumpBlock(Block); 310 311 OS << "}\n"; 312 } 313 314 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { 315 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block)) 316 dumpBasicBlock(BasicBlock); 317 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 318 dumpRegion(Region); 319 else 320 llvm_unreachable("Unsupported kind of VPBlock."); 321 } 322 323 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, 324 bool Hidden, const Twine &Label) { 325 // Due to "dot" we print an edge between two regions as an edge between the 326 // exit basic block and the entry basic of the respective regions. 327 const VPBlockBase *Tail = From->getExitBasicBlock(); 328 const VPBlockBase *Head = To->getEntryBasicBlock(); 329 OS << Indent << getUID(Tail) << " -> " << getUID(Head); 330 OS << " [ label=\"" << Label << '\"'; 331 if (Tail != From) 332 OS << " ltail=" << getUID(From); 333 if (Head != To) 334 OS << " lhead=" << getUID(To); 335 if (Hidden) 336 OS << "; splines=none"; 337 OS << "]\n"; 338 } 339 340 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { 341 auto &Successors = Block->getSuccessors(); 342 if (Successors.size() == 1) 343 drawEdge(Block, Successors.front(), false, ""); 344 else if (Successors.size() == 2) { 345 drawEdge(Block, Successors.front(), false, "T"); 346 drawEdge(Block, Successors.back(), false, "F"); 347 } else { 348 unsigned SuccessorNumber = 0; 349 for (auto *Successor : Successors) 350 drawEdge(Block, Successor, false, Twine(SuccessorNumber++)); 351 } 352 } 353 354 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { 355 OS << Indent << getUID(BasicBlock) << " [label =\n"; 356 bumpIndent(1); 357 OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\""; 358 bumpIndent(1); 359 for (const VPRecipeBase &Recipe : *BasicBlock) 360 Recipe.print(OS, Indent); 361 bumpIndent(-2); 362 OS << "\n" << Indent << "]\n"; 363 dumpEdges(BasicBlock); 364 } 365 366 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { 367 OS << Indent << "subgraph " << getUID(Region) << " {\n"; 368 bumpIndent(1); 369 OS << Indent << "fontname=Courier\n" 370 << Indent << "label=\"" 371 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ") 372 << DOT::EscapeString(Region->getName()) << "\"\n"; 373 // Dump the blocks of the region. 374 assert(Region->getEntry() && "Region contains no inner blocks."); 375 for (const VPBlockBase *Block : depth_first(Region->getEntry())) 376 dumpBlock(Block); 377 bumpIndent(-1); 378 OS << Indent << "}\n"; 379 dumpEdges(Region); 380 } 381 382 void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) { 383 std::string IngredientString; 384 raw_string_ostream RSO(IngredientString); 385 if (auto *Inst = dyn_cast<Instruction>(V)) { 386 if (!Inst->getType()->isVoidTy()) { 387 Inst->printAsOperand(RSO, false); 388 RSO << " = "; 389 } 390 RSO << Inst->getOpcodeName() << " "; 391 unsigned E = Inst->getNumOperands(); 392 if (E > 0) { 393 Inst->getOperand(0)->printAsOperand(RSO, false); 394 for (unsigned I = 1; I < E; ++I) 395 Inst->getOperand(I)->printAsOperand(RSO << ", ", false); 396 } 397 } else // !Inst 398 V->printAsOperand(RSO, false); 399 RSO.flush(); 400 O << DOT::EscapeString(IngredientString); 401 } 402