1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This is the LLVM vectorization plan. It represents a candidate for
12 /// vectorization, allowing to plan and optimize how to vectorize a given loop
13 /// before generating LLVM-IR.
14 /// The vectorizer uses vectorization plans to estimate the costs of potential
15 /// candidates and if profitable to execute the desired plan, generating vector
16 /// LLVM-IR code.
17 ///
18 //===----------------------------------------------------------------------===//
19 
20 #include "VPlan.h"
21 #include "llvm/ADT/PostOrderIterator.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/Support/GraphWriter.h"
26 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "vplan"
31 
32 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
33 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
34   const VPBlockBase *Block = this;
35   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
36     Block = Region->getEntry();
37   return cast<VPBasicBlock>(Block);
38 }
39 
40 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
41   VPBlockBase *Block = this;
42   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
43     Block = Region->getEntry();
44   return cast<VPBasicBlock>(Block);
45 }
46 
47 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
48 const VPBasicBlock *VPBlockBase::getExitBasicBlock() const {
49   const VPBlockBase *Block = this;
50   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
51     Block = Region->getExit();
52   return cast<VPBasicBlock>(Block);
53 }
54 
55 VPBasicBlock *VPBlockBase::getExitBasicBlock() {
56   VPBlockBase *Block = this;
57   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
58     Block = Region->getExit();
59   return cast<VPBasicBlock>(Block);
60 }
61 
62 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
63   if (!Successors.empty() || !Parent)
64     return this;
65   assert(Parent->getExit() == this &&
66          "Block w/o successors not the exit of its parent.");
67   return Parent->getEnclosingBlockWithSuccessors();
68 }
69 
70 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
71   if (!Predecessors.empty() || !Parent)
72     return this;
73   assert(Parent->getEntry() == this &&
74          "Block w/o predecessors not the entry of its parent.");
75   return Parent->getEnclosingBlockWithPredecessors();
76 }
77 
78 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
79   SmallVector<VPBlockBase *, 8> Blocks;
80   for (VPBlockBase *Block : depth_first(Entry))
81     Blocks.push_back(Block);
82 
83   for (VPBlockBase *Block : Blocks)
84     delete Block;
85 }
86 
87 BasicBlock *
88 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
89   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
90   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
91   BasicBlock *PrevBB = CFG.PrevBB;
92   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
93                                          PrevBB->getParent(), CFG.LastBB);
94   DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
95 
96   // Hook up the new basic block to its predecessors.
97   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
98     VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
99     auto &PredVPSuccessors = PredVPBB->getSuccessors();
100     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
101     assert(PredBB && "Predecessor basic-block not found building successor.");
102     auto *PredBBTerminator = PredBB->getTerminator();
103     DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
104     if (isa<UnreachableInst>(PredBBTerminator)) {
105       assert(PredVPSuccessors.size() == 1 &&
106              "Predecessor ending w/o branch must have single successor.");
107       PredBBTerminator->eraseFromParent();
108       BranchInst::Create(NewBB, PredBB);
109     } else {
110       assert(PredVPSuccessors.size() == 2 &&
111              "Predecessor ending with branch must have two successors.");
112       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
113       assert(!PredBBTerminator->getSuccessor(idx) &&
114              "Trying to reset an existing successor block.");
115       PredBBTerminator->setSuccessor(idx, NewBB);
116     }
117   }
118   return NewBB;
119 }
120 
121 void VPBasicBlock::execute(VPTransformState *State) {
122   bool Replica = State->Instance &&
123                  !(State->Instance->Part == 0 && State->Instance->Lane == 0);
124   VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
125   VPBlockBase *SingleHPred = nullptr;
126   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
127 
128   // 1. Create an IR basic block, or reuse the last one if possible.
129   // The last IR basic block is reused, as an optimization, in three cases:
130   // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
131   // B. when the current VPBB has a single (hierarchical) predecessor which
132   //    is PrevVPBB and the latter has a single (hierarchical) successor; and
133   // C. when the current VPBB is an entry of a region replica - where PrevVPBB
134   //    is the exit of this region from a previous instance, or the predecessor
135   //    of this region.
136   if (PrevVPBB && /* A */
137       !((SingleHPred = getSingleHierarchicalPredecessor()) &&
138         SingleHPred->getExitBasicBlock() == PrevVPBB &&
139         PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
140       !(Replica && getPredecessors().empty())) {       /* C */
141 
142     NewBB = createEmptyBasicBlock(State->CFG);
143     State->Builder.SetInsertPoint(NewBB);
144     // Temporarily terminate with unreachable until CFG is rewired.
145     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
146     State->Builder.SetInsertPoint(Terminator);
147     // Register NewBB in its loop. In innermost loops its the same for all BB's.
148     Loop *L = State->LI->getLoopFor(State->CFG.LastBB);
149     L->addBasicBlockToLoop(NewBB, *State->LI);
150     State->CFG.PrevBB = NewBB;
151   }
152 
153   // 2. Fill the IR basic block with IR instructions.
154   DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
155                << " in BB:" << NewBB->getName() << '\n');
156 
157   State->CFG.VPBB2IRBB[this] = NewBB;
158   State->CFG.PrevVPBB = this;
159 
160   for (VPRecipeBase &Recipe : Recipes)
161     Recipe.execute(*State);
162 
163   DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
164 }
165 
166 void VPRegionBlock::execute(VPTransformState *State) {
167   ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
168 
169   if (!isReplicator()) {
170     // Visit the VPBlocks connected to "this", starting from it.
171     for (VPBlockBase *Block : RPOT) {
172       DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
173       Block->execute(State);
174     }
175     return;
176   }
177 
178   assert(!State->Instance && "Replicating a Region with non-null instance.");
179 
180   // Enter replicating mode.
181   State->Instance = {0, 0};
182 
183   for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
184     State->Instance->Part = Part;
185     for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) {
186       State->Instance->Lane = Lane;
187       // Visit the VPBlocks connected to \p this, starting from it.
188       for (VPBlockBase *Block : RPOT) {
189         DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
190         Block->execute(State);
191       }
192     }
193   }
194 
195   // Exit replicating mode.
196   State->Instance.reset();
197 }
198 
199 /// Generate the code inside the body of the vectorized loop. Assumes a single
200 /// LoopVectorBody basic-block was created for this. Introduce additional
201 /// basic-blocks as needed, and fill them all.
202 void VPlan::execute(VPTransformState *State) {
203   BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
204   BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
205   assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
206   BasicBlock *VectorLatchBB = VectorHeaderBB;
207 
208   // 1. Make room to generate basic-blocks inside loop body if needed.
209   VectorLatchBB = VectorHeaderBB->splitBasicBlock(
210       VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
211   Loop *L = State->LI->getLoopFor(VectorHeaderBB);
212   L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
213   // Remove the edge between Header and Latch to allow other connections.
214   // Temporarily terminate with unreachable until CFG is rewired.
215   // Note: this asserts the generated code's assumption that
216   // getFirstInsertionPt() can be dereferenced into an Instruction.
217   VectorHeaderBB->getTerminator()->eraseFromParent();
218   State->Builder.SetInsertPoint(VectorHeaderBB);
219   UnreachableInst *Terminator = State->Builder.CreateUnreachable();
220   State->Builder.SetInsertPoint(Terminator);
221 
222   // 2. Generate code in loop body.
223   State->CFG.PrevVPBB = nullptr;
224   State->CFG.PrevBB = VectorHeaderBB;
225   State->CFG.LastBB = VectorLatchBB;
226 
227   for (VPBlockBase *Block : depth_first(Entry))
228     Block->execute(State);
229 
230   // 3. Merge the temporary latch created with the last basic-block filled.
231   BasicBlock *LastBB = State->CFG.PrevBB;
232   // Connect LastBB to VectorLatchBB to facilitate their merge.
233   assert(isa<UnreachableInst>(LastBB->getTerminator()) &&
234          "Expected VPlan CFG to terminate with unreachable");
235   LastBB->getTerminator()->eraseFromParent();
236   BranchInst::Create(VectorLatchBB, LastBB);
237 
238   // Merge LastBB with Latch.
239   bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
240   (void)Merged;
241   assert(Merged && "Could not merge last basic block with latch.");
242   VectorLatchBB = LastBB;
243 
244   updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB);
245 }
246 
247 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
248                                 BasicBlock *LoopLatchBB) {
249   BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
250   assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
251   DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB);
252   // The vector body may be more than a single basic-block by this point.
253   // Update the dominator tree information inside the vector body by propagating
254   // it from header to latch, expecting only triangular control-flow, if any.
255   BasicBlock *PostDomSucc = nullptr;
256   for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
257     // Get the list of successors of this block.
258     std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
259     assert(Succs.size() <= 2 &&
260            "Basic block in vector loop has more than 2 successors.");
261     PostDomSucc = Succs[0];
262     if (Succs.size() == 1) {
263       assert(PostDomSucc->getSinglePredecessor() &&
264              "PostDom successor has more than one predecessor.");
265       DT->addNewBlock(PostDomSucc, BB);
266       continue;
267     }
268     BasicBlock *InterimSucc = Succs[1];
269     if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
270       PostDomSucc = Succs[1];
271       InterimSucc = Succs[0];
272     }
273     assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
274            "One successor of a basic block does not lead to the other.");
275     assert(InterimSucc->getSinglePredecessor() &&
276            "Interim successor has more than one predecessor.");
277     assert(std::distance(pred_begin(PostDomSucc), pred_end(PostDomSucc)) == 2 &&
278            "PostDom successor has more than two predecessors.");
279     DT->addNewBlock(InterimSucc, BB);
280     DT->addNewBlock(PostDomSucc, BB);
281   }
282 }
283 
284 const Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
285   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
286          Twine(getOrCreateBID(Block));
287 }
288 
289 const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
290   const std::string &Name = Block->getName();
291   if (!Name.empty())
292     return Name;
293   return "VPB" + Twine(getOrCreateBID(Block));
294 }
295 
296 void VPlanPrinter::dump() {
297   Depth = 1;
298   bumpIndent(0);
299   OS << "digraph VPlan {\n";
300   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
301   if (!Plan.getName().empty())
302     OS << "\\n" << DOT::EscapeString(Plan.getName());
303   OS << "\"]\n";
304   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
305   OS << "edge [fontname=Courier, fontsize=30]\n";
306   OS << "compound=true\n";
307 
308   for (VPBlockBase *Block : depth_first(Plan.getEntry()))
309     dumpBlock(Block);
310 
311   OS << "}\n";
312 }
313 
314 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
315   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
316     dumpBasicBlock(BasicBlock);
317   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
318     dumpRegion(Region);
319   else
320     llvm_unreachable("Unsupported kind of VPBlock.");
321 }
322 
323 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
324                             bool Hidden, const Twine &Label) {
325   // Due to "dot" we print an edge between two regions as an edge between the
326   // exit basic block and the entry basic of the respective regions.
327   const VPBlockBase *Tail = From->getExitBasicBlock();
328   const VPBlockBase *Head = To->getEntryBasicBlock();
329   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
330   OS << " [ label=\"" << Label << '\"';
331   if (Tail != From)
332     OS << " ltail=" << getUID(From);
333   if (Head != To)
334     OS << " lhead=" << getUID(To);
335   if (Hidden)
336     OS << "; splines=none";
337   OS << "]\n";
338 }
339 
340 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
341   auto &Successors = Block->getSuccessors();
342   if (Successors.size() == 1)
343     drawEdge(Block, Successors.front(), false, "");
344   else if (Successors.size() == 2) {
345     drawEdge(Block, Successors.front(), false, "T");
346     drawEdge(Block, Successors.back(), false, "F");
347   } else {
348     unsigned SuccessorNumber = 0;
349     for (auto *Successor : Successors)
350       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
351   }
352 }
353 
354 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
355   OS << Indent << getUID(BasicBlock) << " [label =\n";
356   bumpIndent(1);
357   OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\"";
358   bumpIndent(1);
359   for (const VPRecipeBase &Recipe : *BasicBlock)
360     Recipe.print(OS, Indent);
361   bumpIndent(-2);
362   OS << "\n" << Indent << "]\n";
363   dumpEdges(BasicBlock);
364 }
365 
366 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
367   OS << Indent << "subgraph " << getUID(Region) << " {\n";
368   bumpIndent(1);
369   OS << Indent << "fontname=Courier\n"
370      << Indent << "label=\""
371      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
372      << DOT::EscapeString(Region->getName()) << "\"\n";
373   // Dump the blocks of the region.
374   assert(Region->getEntry() && "Region contains no inner blocks.");
375   for (const VPBlockBase *Block : depth_first(Region->getEntry()))
376     dumpBlock(Block);
377   bumpIndent(-1);
378   OS << Indent << "}\n";
379   dumpEdges(Region);
380 }
381 
382 void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) {
383   std::string IngredientString;
384   raw_string_ostream RSO(IngredientString);
385   if (auto *Inst = dyn_cast<Instruction>(V)) {
386     if (!Inst->getType()->isVoidTy()) {
387       Inst->printAsOperand(RSO, false);
388       RSO << " = ";
389     }
390     RSO << Inst->getOpcodeName() << " ";
391     unsigned E = Inst->getNumOperands();
392     if (E > 0) {
393       Inst->getOperand(0)->printAsOperand(RSO, false);
394       for (unsigned I = 1; I < E; ++I)
395         Inst->getOperand(I)->printAsOperand(RSO << ", ", false);
396     }
397   } else // !Inst
398     V->printAsOperand(RSO, false);
399   RSO.flush();
400   O << DOT::EscapeString(IngredientString);
401 }
402