1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This is the LLVM vectorization plan. It represents a candidate for
12 /// vectorization, allowing to plan and optimize how to vectorize a given loop
13 /// before generating LLVM-IR.
14 /// The vectorizer uses vectorization plans to estimate the costs of potential
15 /// candidates and if profitable to execute the desired plan, generating vector
16 /// LLVM-IR code.
17 ///
18 //===----------------------------------------------------------------------===//
19 
20 #include "VPlan.h"
21 #include "llvm/ADT/DepthFirstIterator.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/CFG.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/InstrTypes.h"
30 #include "llvm/IR/Instruction.h"
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/GraphWriter.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
40 #include <cassert>
41 #include <iterator>
42 #include <string>
43 #include <vector>
44 
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "vplan"
48 
49 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
50 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
51   const VPBlockBase *Block = this;
52   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
53     Block = Region->getEntry();
54   return cast<VPBasicBlock>(Block);
55 }
56 
57 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
58   VPBlockBase *Block = this;
59   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
60     Block = Region->getEntry();
61   return cast<VPBasicBlock>(Block);
62 }
63 
64 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
65 const VPBasicBlock *VPBlockBase::getExitBasicBlock() const {
66   const VPBlockBase *Block = this;
67   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
68     Block = Region->getExit();
69   return cast<VPBasicBlock>(Block);
70 }
71 
72 VPBasicBlock *VPBlockBase::getExitBasicBlock() {
73   VPBlockBase *Block = this;
74   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
75     Block = Region->getExit();
76   return cast<VPBasicBlock>(Block);
77 }
78 
79 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
80   if (!Successors.empty() || !Parent)
81     return this;
82   assert(Parent->getExit() == this &&
83          "Block w/o successors not the exit of its parent.");
84   return Parent->getEnclosingBlockWithSuccessors();
85 }
86 
87 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
88   if (!Predecessors.empty() || !Parent)
89     return this;
90   assert(Parent->getEntry() == this &&
91          "Block w/o predecessors not the entry of its parent.");
92   return Parent->getEnclosingBlockWithPredecessors();
93 }
94 
95 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
96   SmallVector<VPBlockBase *, 8> Blocks;
97   for (VPBlockBase *Block : depth_first(Entry))
98     Blocks.push_back(Block);
99 
100   for (VPBlockBase *Block : Blocks)
101     delete Block;
102 }
103 
104 BasicBlock *
105 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
106   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
107   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
108   BasicBlock *PrevBB = CFG.PrevBB;
109   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
110                                          PrevBB->getParent(), CFG.LastBB);
111   DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
112 
113   // Hook up the new basic block to its predecessors.
114   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
115     VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
116     auto &PredVPSuccessors = PredVPBB->getSuccessors();
117     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
118     assert(PredBB && "Predecessor basic-block not found building successor.");
119     auto *PredBBTerminator = PredBB->getTerminator();
120     DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
121     if (isa<UnreachableInst>(PredBBTerminator)) {
122       assert(PredVPSuccessors.size() == 1 &&
123              "Predecessor ending w/o branch must have single successor.");
124       PredBBTerminator->eraseFromParent();
125       BranchInst::Create(NewBB, PredBB);
126     } else {
127       assert(PredVPSuccessors.size() == 2 &&
128              "Predecessor ending with branch must have two successors.");
129       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
130       assert(!PredBBTerminator->getSuccessor(idx) &&
131              "Trying to reset an existing successor block.");
132       PredBBTerminator->setSuccessor(idx, NewBB);
133     }
134   }
135   return NewBB;
136 }
137 
138 void VPBasicBlock::execute(VPTransformState *State) {
139   bool Replica = State->Instance &&
140                  !(State->Instance->Part == 0 && State->Instance->Lane == 0);
141   VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
142   VPBlockBase *SingleHPred = nullptr;
143   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
144 
145   // 1. Create an IR basic block, or reuse the last one if possible.
146   // The last IR basic block is reused, as an optimization, in three cases:
147   // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
148   // B. when the current VPBB has a single (hierarchical) predecessor which
149   //    is PrevVPBB and the latter has a single (hierarchical) successor; and
150   // C. when the current VPBB is an entry of a region replica - where PrevVPBB
151   //    is the exit of this region from a previous instance, or the predecessor
152   //    of this region.
153   if (PrevVPBB && /* A */
154       !((SingleHPred = getSingleHierarchicalPredecessor()) &&
155         SingleHPred->getExitBasicBlock() == PrevVPBB &&
156         PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
157       !(Replica && getPredecessors().empty())) {       /* C */
158     NewBB = createEmptyBasicBlock(State->CFG);
159     State->Builder.SetInsertPoint(NewBB);
160     // Temporarily terminate with unreachable until CFG is rewired.
161     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
162     State->Builder.SetInsertPoint(Terminator);
163     // Register NewBB in its loop. In innermost loops its the same for all BB's.
164     Loop *L = State->LI->getLoopFor(State->CFG.LastBB);
165     L->addBasicBlockToLoop(NewBB, *State->LI);
166     State->CFG.PrevBB = NewBB;
167   }
168 
169   // 2. Fill the IR basic block with IR instructions.
170   DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
171                << " in BB:" << NewBB->getName() << '\n');
172 
173   State->CFG.VPBB2IRBB[this] = NewBB;
174   State->CFG.PrevVPBB = this;
175 
176   for (VPRecipeBase &Recipe : Recipes)
177     Recipe.execute(*State);
178 
179   DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
180 }
181 
182 void VPRegionBlock::execute(VPTransformState *State) {
183   ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
184 
185   if (!isReplicator()) {
186     // Visit the VPBlocks connected to "this", starting from it.
187     for (VPBlockBase *Block : RPOT) {
188       DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
189       Block->execute(State);
190     }
191     return;
192   }
193 
194   assert(!State->Instance && "Replicating a Region with non-null instance.");
195 
196   // Enter replicating mode.
197   State->Instance = {0, 0};
198 
199   for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
200     State->Instance->Part = Part;
201     for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) {
202       State->Instance->Lane = Lane;
203       // Visit the VPBlocks connected to \p this, starting from it.
204       for (VPBlockBase *Block : RPOT) {
205         DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
206         Block->execute(State);
207       }
208     }
209   }
210 
211   // Exit replicating mode.
212   State->Instance.reset();
213 }
214 
215 /// Generate the code inside the body of the vectorized loop. Assumes a single
216 /// LoopVectorBody basic-block was created for this. Introduce additional
217 /// basic-blocks as needed, and fill them all.
218 void VPlan::execute(VPTransformState *State) {
219   BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
220   BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
221   assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
222   BasicBlock *VectorLatchBB = VectorHeaderBB;
223 
224   // 1. Make room to generate basic-blocks inside loop body if needed.
225   VectorLatchBB = VectorHeaderBB->splitBasicBlock(
226       VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
227   Loop *L = State->LI->getLoopFor(VectorHeaderBB);
228   L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
229   // Remove the edge between Header and Latch to allow other connections.
230   // Temporarily terminate with unreachable until CFG is rewired.
231   // Note: this asserts the generated code's assumption that
232   // getFirstInsertionPt() can be dereferenced into an Instruction.
233   VectorHeaderBB->getTerminator()->eraseFromParent();
234   State->Builder.SetInsertPoint(VectorHeaderBB);
235   UnreachableInst *Terminator = State->Builder.CreateUnreachable();
236   State->Builder.SetInsertPoint(Terminator);
237 
238   // 2. Generate code in loop body.
239   State->CFG.PrevVPBB = nullptr;
240   State->CFG.PrevBB = VectorHeaderBB;
241   State->CFG.LastBB = VectorLatchBB;
242 
243   for (VPBlockBase *Block : depth_first(Entry))
244     Block->execute(State);
245 
246   // 3. Merge the temporary latch created with the last basic-block filled.
247   BasicBlock *LastBB = State->CFG.PrevBB;
248   // Connect LastBB to VectorLatchBB to facilitate their merge.
249   assert(isa<UnreachableInst>(LastBB->getTerminator()) &&
250          "Expected VPlan CFG to terminate with unreachable");
251   LastBB->getTerminator()->eraseFromParent();
252   BranchInst::Create(VectorLatchBB, LastBB);
253 
254   // Merge LastBB with Latch.
255   bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
256   (void)Merged;
257   assert(Merged && "Could not merge last basic block with latch.");
258   VectorLatchBB = LastBB;
259 
260   updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB);
261 }
262 
263 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
264                                 BasicBlock *LoopLatchBB) {
265   BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
266   assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
267   DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB);
268   // The vector body may be more than a single basic-block by this point.
269   // Update the dominator tree information inside the vector body by propagating
270   // it from header to latch, expecting only triangular control-flow, if any.
271   BasicBlock *PostDomSucc = nullptr;
272   for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
273     // Get the list of successors of this block.
274     std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
275     assert(Succs.size() <= 2 &&
276            "Basic block in vector loop has more than 2 successors.");
277     PostDomSucc = Succs[0];
278     if (Succs.size() == 1) {
279       assert(PostDomSucc->getSinglePredecessor() &&
280              "PostDom successor has more than one predecessor.");
281       DT->addNewBlock(PostDomSucc, BB);
282       continue;
283     }
284     BasicBlock *InterimSucc = Succs[1];
285     if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
286       PostDomSucc = Succs[1];
287       InterimSucc = Succs[0];
288     }
289     assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
290            "One successor of a basic block does not lead to the other.");
291     assert(InterimSucc->getSinglePredecessor() &&
292            "Interim successor has more than one predecessor.");
293     assert(std::distance(pred_begin(PostDomSucc), pred_end(PostDomSucc)) == 2 &&
294            "PostDom successor has more than two predecessors.");
295     DT->addNewBlock(InterimSucc, BB);
296     DT->addNewBlock(PostDomSucc, BB);
297   }
298 }
299 
300 const Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
301   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
302          Twine(getOrCreateBID(Block));
303 }
304 
305 const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
306   const std::string &Name = Block->getName();
307   if (!Name.empty())
308     return Name;
309   return "VPB" + Twine(getOrCreateBID(Block));
310 }
311 
312 void VPlanPrinter::dump() {
313   Depth = 1;
314   bumpIndent(0);
315   OS << "digraph VPlan {\n";
316   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
317   if (!Plan.getName().empty())
318     OS << "\\n" << DOT::EscapeString(Plan.getName());
319   OS << "\"]\n";
320   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
321   OS << "edge [fontname=Courier, fontsize=30]\n";
322   OS << "compound=true\n";
323 
324   for (VPBlockBase *Block : depth_first(Plan.getEntry()))
325     dumpBlock(Block);
326 
327   OS << "}\n";
328 }
329 
330 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
331   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
332     dumpBasicBlock(BasicBlock);
333   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
334     dumpRegion(Region);
335   else
336     llvm_unreachable("Unsupported kind of VPBlock.");
337 }
338 
339 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
340                             bool Hidden, const Twine &Label) {
341   // Due to "dot" we print an edge between two regions as an edge between the
342   // exit basic block and the entry basic of the respective regions.
343   const VPBlockBase *Tail = From->getExitBasicBlock();
344   const VPBlockBase *Head = To->getEntryBasicBlock();
345   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
346   OS << " [ label=\"" << Label << '\"';
347   if (Tail != From)
348     OS << " ltail=" << getUID(From);
349   if (Head != To)
350     OS << " lhead=" << getUID(To);
351   if (Hidden)
352     OS << "; splines=none";
353   OS << "]\n";
354 }
355 
356 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
357   auto &Successors = Block->getSuccessors();
358   if (Successors.size() == 1)
359     drawEdge(Block, Successors.front(), false, "");
360   else if (Successors.size() == 2) {
361     drawEdge(Block, Successors.front(), false, "T");
362     drawEdge(Block, Successors.back(), false, "F");
363   } else {
364     unsigned SuccessorNumber = 0;
365     for (auto *Successor : Successors)
366       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
367   }
368 }
369 
370 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
371   OS << Indent << getUID(BasicBlock) << " [label =\n";
372   bumpIndent(1);
373   OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\"";
374   bumpIndent(1);
375   for (const VPRecipeBase &Recipe : *BasicBlock)
376     Recipe.print(OS, Indent);
377   bumpIndent(-2);
378   OS << "\n" << Indent << "]\n";
379   dumpEdges(BasicBlock);
380 }
381 
382 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
383   OS << Indent << "subgraph " << getUID(Region) << " {\n";
384   bumpIndent(1);
385   OS << Indent << "fontname=Courier\n"
386      << Indent << "label=\""
387      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
388      << DOT::EscapeString(Region->getName()) << "\"\n";
389   // Dump the blocks of the region.
390   assert(Region->getEntry() && "Region contains no inner blocks.");
391   for (const VPBlockBase *Block : depth_first(Region->getEntry()))
392     dumpBlock(Block);
393   bumpIndent(-1);
394   OS << Indent << "}\n";
395   dumpEdges(Region);
396 }
397 
398 void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) {
399   std::string IngredientString;
400   raw_string_ostream RSO(IngredientString);
401   if (auto *Inst = dyn_cast<Instruction>(V)) {
402     if (!Inst->getType()->isVoidTy()) {
403       Inst->printAsOperand(RSO, false);
404       RSO << " = ";
405     }
406     RSO << Inst->getOpcodeName() << " ";
407     unsigned E = Inst->getNumOperands();
408     if (E > 0) {
409       Inst->getOperand(0)->printAsOperand(RSO, false);
410       for (unsigned I = 1; I < E; ++I)
411         Inst->getOperand(I)->printAsOperand(RSO << ", ", false);
412     }
413   } else // !Inst
414     V->printAsOperand(RSO, false);
415   RSO.flush();
416   O << DOT::EscapeString(IngredientString);
417 }
418