1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This is the LLVM vectorization plan. It represents a candidate for 12 /// vectorization, allowing to plan and optimize how to vectorize a given loop 13 /// before generating LLVM-IR. 14 /// The vectorizer uses vectorization plans to estimate the costs of potential 15 /// candidates and if profitable to execute the desired plan, generating vector 16 /// LLVM-IR code. 17 /// 18 //===----------------------------------------------------------------------===// 19 20 #include "VPlan.h" 21 #include "llvm/ADT/DepthFirstIterator.h" 22 #include "llvm/ADT/PostOrderIterator.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Twine.h" 25 #include "llvm/Analysis/LoopInfo.h" 26 #include "llvm/IR/BasicBlock.h" 27 #include "llvm/IR/CFG.h" 28 #include "llvm/IR/Dominators.h" 29 #include "llvm/IR/InstrTypes.h" 30 #include "llvm/IR/Instruction.h" 31 #include "llvm/IR/Instructions.h" 32 #include "llvm/IR/Type.h" 33 #include "llvm/IR/Value.h" 34 #include "llvm/Support/Casting.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/GraphWriter.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 40 #include <cassert> 41 #include <iterator> 42 #include <string> 43 #include <vector> 44 45 using namespace llvm; 46 47 #define DEBUG_TYPE "vplan" 48 49 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly. 50 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { 51 const VPBlockBase *Block = this; 52 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 53 Block = Region->getEntry(); 54 return cast<VPBasicBlock>(Block); 55 } 56 57 VPBasicBlock *VPBlockBase::getEntryBasicBlock() { 58 VPBlockBase *Block = this; 59 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 60 Block = Region->getEntry(); 61 return cast<VPBasicBlock>(Block); 62 } 63 64 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. 65 const VPBasicBlock *VPBlockBase::getExitBasicBlock() const { 66 const VPBlockBase *Block = this; 67 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 68 Block = Region->getExit(); 69 return cast<VPBasicBlock>(Block); 70 } 71 72 VPBasicBlock *VPBlockBase::getExitBasicBlock() { 73 VPBlockBase *Block = this; 74 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 75 Block = Region->getExit(); 76 return cast<VPBasicBlock>(Block); 77 } 78 79 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { 80 if (!Successors.empty() || !Parent) 81 return this; 82 assert(Parent->getExit() == this && 83 "Block w/o successors not the exit of its parent."); 84 return Parent->getEnclosingBlockWithSuccessors(); 85 } 86 87 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { 88 if (!Predecessors.empty() || !Parent) 89 return this; 90 assert(Parent->getEntry() == this && 91 "Block w/o predecessors not the entry of its parent."); 92 return Parent->getEnclosingBlockWithPredecessors(); 93 } 94 95 void VPBlockBase::deleteCFG(VPBlockBase *Entry) { 96 SmallVector<VPBlockBase *, 8> Blocks; 97 for (VPBlockBase *Block : depth_first(Entry)) 98 Blocks.push_back(Block); 99 100 for (VPBlockBase *Block : Blocks) 101 delete Block; 102 } 103 104 BasicBlock * 105 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { 106 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. 107 // Pred stands for Predessor. Prev stands for Previous - last visited/created. 108 BasicBlock *PrevBB = CFG.PrevBB; 109 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), 110 PrevBB->getParent(), CFG.LastBB); 111 DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); 112 113 // Hook up the new basic block to its predecessors. 114 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { 115 VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); 116 auto &PredVPSuccessors = PredVPBB->getSuccessors(); 117 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; 118 assert(PredBB && "Predecessor basic-block not found building successor."); 119 auto *PredBBTerminator = PredBB->getTerminator(); 120 DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); 121 if (isa<UnreachableInst>(PredBBTerminator)) { 122 assert(PredVPSuccessors.size() == 1 && 123 "Predecessor ending w/o branch must have single successor."); 124 PredBBTerminator->eraseFromParent(); 125 BranchInst::Create(NewBB, PredBB); 126 } else { 127 assert(PredVPSuccessors.size() == 2 && 128 "Predecessor ending with branch must have two successors."); 129 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; 130 assert(!PredBBTerminator->getSuccessor(idx) && 131 "Trying to reset an existing successor block."); 132 PredBBTerminator->setSuccessor(idx, NewBB); 133 } 134 } 135 return NewBB; 136 } 137 138 void VPBasicBlock::execute(VPTransformState *State) { 139 bool Replica = State->Instance && 140 !(State->Instance->Part == 0 && State->Instance->Lane == 0); 141 VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; 142 VPBlockBase *SingleHPred = nullptr; 143 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. 144 145 // 1. Create an IR basic block, or reuse the last one if possible. 146 // The last IR basic block is reused, as an optimization, in three cases: 147 // A. the first VPBB reuses the loop header BB - when PrevVPBB is null; 148 // B. when the current VPBB has a single (hierarchical) predecessor which 149 // is PrevVPBB and the latter has a single (hierarchical) successor; and 150 // C. when the current VPBB is an entry of a region replica - where PrevVPBB 151 // is the exit of this region from a previous instance, or the predecessor 152 // of this region. 153 if (PrevVPBB && /* A */ 154 !((SingleHPred = getSingleHierarchicalPredecessor()) && 155 SingleHPred->getExitBasicBlock() == PrevVPBB && 156 PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */ 157 !(Replica && getPredecessors().empty())) { /* C */ 158 NewBB = createEmptyBasicBlock(State->CFG); 159 State->Builder.SetInsertPoint(NewBB); 160 // Temporarily terminate with unreachable until CFG is rewired. 161 UnreachableInst *Terminator = State->Builder.CreateUnreachable(); 162 State->Builder.SetInsertPoint(Terminator); 163 // Register NewBB in its loop. In innermost loops its the same for all BB's. 164 Loop *L = State->LI->getLoopFor(State->CFG.LastBB); 165 L->addBasicBlockToLoop(NewBB, *State->LI); 166 State->CFG.PrevBB = NewBB; 167 } 168 169 // 2. Fill the IR basic block with IR instructions. 170 DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() 171 << " in BB:" << NewBB->getName() << '\n'); 172 173 State->CFG.VPBB2IRBB[this] = NewBB; 174 State->CFG.PrevVPBB = this; 175 176 for (VPRecipeBase &Recipe : Recipes) 177 Recipe.execute(*State); 178 179 DEBUG(dbgs() << "LV: filled BB:" << *NewBB); 180 } 181 182 void VPRegionBlock::execute(VPTransformState *State) { 183 ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry); 184 185 if (!isReplicator()) { 186 // Visit the VPBlocks connected to "this", starting from it. 187 for (VPBlockBase *Block : RPOT) { 188 DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 189 Block->execute(State); 190 } 191 return; 192 } 193 194 assert(!State->Instance && "Replicating a Region with non-null instance."); 195 196 // Enter replicating mode. 197 State->Instance = {0, 0}; 198 199 for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { 200 State->Instance->Part = Part; 201 for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) { 202 State->Instance->Lane = Lane; 203 // Visit the VPBlocks connected to \p this, starting from it. 204 for (VPBlockBase *Block : RPOT) { 205 DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 206 Block->execute(State); 207 } 208 } 209 } 210 211 // Exit replicating mode. 212 State->Instance.reset(); 213 } 214 215 /// Generate the code inside the body of the vectorized loop. Assumes a single 216 /// LoopVectorBody basic-block was created for this. Introduce additional 217 /// basic-blocks as needed, and fill them all. 218 void VPlan::execute(VPTransformState *State) { 219 BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB; 220 BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor(); 221 assert(VectorHeaderBB && "Loop preheader does not have a single successor."); 222 BasicBlock *VectorLatchBB = VectorHeaderBB; 223 224 // 1. Make room to generate basic-blocks inside loop body if needed. 225 VectorLatchBB = VectorHeaderBB->splitBasicBlock( 226 VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch"); 227 Loop *L = State->LI->getLoopFor(VectorHeaderBB); 228 L->addBasicBlockToLoop(VectorLatchBB, *State->LI); 229 // Remove the edge between Header and Latch to allow other connections. 230 // Temporarily terminate with unreachable until CFG is rewired. 231 // Note: this asserts the generated code's assumption that 232 // getFirstInsertionPt() can be dereferenced into an Instruction. 233 VectorHeaderBB->getTerminator()->eraseFromParent(); 234 State->Builder.SetInsertPoint(VectorHeaderBB); 235 UnreachableInst *Terminator = State->Builder.CreateUnreachable(); 236 State->Builder.SetInsertPoint(Terminator); 237 238 // 2. Generate code in loop body. 239 State->CFG.PrevVPBB = nullptr; 240 State->CFG.PrevBB = VectorHeaderBB; 241 State->CFG.LastBB = VectorLatchBB; 242 243 for (VPBlockBase *Block : depth_first(Entry)) 244 Block->execute(State); 245 246 // 3. Merge the temporary latch created with the last basic-block filled. 247 BasicBlock *LastBB = State->CFG.PrevBB; 248 // Connect LastBB to VectorLatchBB to facilitate their merge. 249 assert(isa<UnreachableInst>(LastBB->getTerminator()) && 250 "Expected VPlan CFG to terminate with unreachable"); 251 LastBB->getTerminator()->eraseFromParent(); 252 BranchInst::Create(VectorLatchBB, LastBB); 253 254 // Merge LastBB with Latch. 255 bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI); 256 (void)Merged; 257 assert(Merged && "Could not merge last basic block with latch."); 258 VectorLatchBB = LastBB; 259 260 updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); 261 } 262 263 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, 264 BasicBlock *LoopLatchBB) { 265 BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor(); 266 assert(LoopHeaderBB && "Loop preheader does not have a single successor."); 267 DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB); 268 // The vector body may be more than a single basic-block by this point. 269 // Update the dominator tree information inside the vector body by propagating 270 // it from header to latch, expecting only triangular control-flow, if any. 271 BasicBlock *PostDomSucc = nullptr; 272 for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) { 273 // Get the list of successors of this block. 274 std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB)); 275 assert(Succs.size() <= 2 && 276 "Basic block in vector loop has more than 2 successors."); 277 PostDomSucc = Succs[0]; 278 if (Succs.size() == 1) { 279 assert(PostDomSucc->getSinglePredecessor() && 280 "PostDom successor has more than one predecessor."); 281 DT->addNewBlock(PostDomSucc, BB); 282 continue; 283 } 284 BasicBlock *InterimSucc = Succs[1]; 285 if (PostDomSucc->getSingleSuccessor() == InterimSucc) { 286 PostDomSucc = Succs[1]; 287 InterimSucc = Succs[0]; 288 } 289 assert(InterimSucc->getSingleSuccessor() == PostDomSucc && 290 "One successor of a basic block does not lead to the other."); 291 assert(InterimSucc->getSinglePredecessor() && 292 "Interim successor has more than one predecessor."); 293 assert(std::distance(pred_begin(PostDomSucc), pred_end(PostDomSucc)) == 2 && 294 "PostDom successor has more than two predecessors."); 295 DT->addNewBlock(InterimSucc, BB); 296 DT->addNewBlock(PostDomSucc, BB); 297 } 298 } 299 300 const Twine VPlanPrinter::getUID(const VPBlockBase *Block) { 301 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") + 302 Twine(getOrCreateBID(Block)); 303 } 304 305 const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { 306 const std::string &Name = Block->getName(); 307 if (!Name.empty()) 308 return Name; 309 return "VPB" + Twine(getOrCreateBID(Block)); 310 } 311 312 void VPlanPrinter::dump() { 313 Depth = 1; 314 bumpIndent(0); 315 OS << "digraph VPlan {\n"; 316 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; 317 if (!Plan.getName().empty()) 318 OS << "\\n" << DOT::EscapeString(Plan.getName()); 319 OS << "\"]\n"; 320 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n"; 321 OS << "edge [fontname=Courier, fontsize=30]\n"; 322 OS << "compound=true\n"; 323 324 for (VPBlockBase *Block : depth_first(Plan.getEntry())) 325 dumpBlock(Block); 326 327 OS << "}\n"; 328 } 329 330 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { 331 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block)) 332 dumpBasicBlock(BasicBlock); 333 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 334 dumpRegion(Region); 335 else 336 llvm_unreachable("Unsupported kind of VPBlock."); 337 } 338 339 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, 340 bool Hidden, const Twine &Label) { 341 // Due to "dot" we print an edge between two regions as an edge between the 342 // exit basic block and the entry basic of the respective regions. 343 const VPBlockBase *Tail = From->getExitBasicBlock(); 344 const VPBlockBase *Head = To->getEntryBasicBlock(); 345 OS << Indent << getUID(Tail) << " -> " << getUID(Head); 346 OS << " [ label=\"" << Label << '\"'; 347 if (Tail != From) 348 OS << " ltail=" << getUID(From); 349 if (Head != To) 350 OS << " lhead=" << getUID(To); 351 if (Hidden) 352 OS << "; splines=none"; 353 OS << "]\n"; 354 } 355 356 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { 357 auto &Successors = Block->getSuccessors(); 358 if (Successors.size() == 1) 359 drawEdge(Block, Successors.front(), false, ""); 360 else if (Successors.size() == 2) { 361 drawEdge(Block, Successors.front(), false, "T"); 362 drawEdge(Block, Successors.back(), false, "F"); 363 } else { 364 unsigned SuccessorNumber = 0; 365 for (auto *Successor : Successors) 366 drawEdge(Block, Successor, false, Twine(SuccessorNumber++)); 367 } 368 } 369 370 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { 371 OS << Indent << getUID(BasicBlock) << " [label =\n"; 372 bumpIndent(1); 373 OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\""; 374 bumpIndent(1); 375 for (const VPRecipeBase &Recipe : *BasicBlock) 376 Recipe.print(OS, Indent); 377 bumpIndent(-2); 378 OS << "\n" << Indent << "]\n"; 379 dumpEdges(BasicBlock); 380 } 381 382 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { 383 OS << Indent << "subgraph " << getUID(Region) << " {\n"; 384 bumpIndent(1); 385 OS << Indent << "fontname=Courier\n" 386 << Indent << "label=\"" 387 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ") 388 << DOT::EscapeString(Region->getName()) << "\"\n"; 389 // Dump the blocks of the region. 390 assert(Region->getEntry() && "Region contains no inner blocks."); 391 for (const VPBlockBase *Block : depth_first(Region->getEntry())) 392 dumpBlock(Block); 393 bumpIndent(-1); 394 OS << Indent << "}\n"; 395 dumpEdges(Region); 396 } 397 398 void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) { 399 std::string IngredientString; 400 raw_string_ostream RSO(IngredientString); 401 if (auto *Inst = dyn_cast<Instruction>(V)) { 402 if (!Inst->getType()->isVoidTy()) { 403 Inst->printAsOperand(RSO, false); 404 RSO << " = "; 405 } 406 RSO << Inst->getOpcodeName() << " "; 407 unsigned E = Inst->getNumOperands(); 408 if (E > 0) { 409 Inst->getOperand(0)->printAsOperand(RSO, false); 410 for (unsigned I = 1; I < E; ++I) 411 Inst->getOperand(I)->printAsOperand(RSO << ", ", false); 412 } 413 } else // !Inst 414 V->printAsOperand(RSO, false); 415 RSO.flush(); 416 O << DOT::EscapeString(IngredientString); 417 } 418