1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This is the LLVM vectorization plan. It represents a candidate for
11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
12 /// before generating LLVM-IR.
13 /// The vectorizer uses vectorization plans to estimate the costs of potential
14 /// candidates and if profitable to execute the desired plan, generating vector
15 /// LLVM-IR code.
16 ///
17 //===----------------------------------------------------------------------===//
18
19 #include "VPlan.h"
20 #include "VPlanDominatorTree.h"
21 #include "llvm/ADT/DepthFirstIterator.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/CFG.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/Instruction.h"
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/GenericDomTreeConstruction.h"
38 #include "llvm/Support/GraphWriter.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
41 #include "llvm/Transforms/Utils/LoopVersioning.h"
42 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
43 #include <cassert>
44 #include <string>
45 #include <vector>
46
47 using namespace llvm;
48 extern cl::opt<bool> EnableVPlanNativePath;
49
50 #define DEBUG_TYPE "vplan"
51
52 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
operator <<(raw_ostream & OS,const VPValue & V)53 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
54 const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
55 VPSlotTracker SlotTracker(
56 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
57 V.print(OS, SlotTracker);
58 return OS;
59 }
60 #endif
61
getAsRuntimeExpr(IRBuilderBase & Builder,const ElementCount & VF) const62 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
63 const ElementCount &VF) const {
64 switch (LaneKind) {
65 case VPLane::Kind::ScalableLast:
66 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
67 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
68 Builder.getInt32(VF.getKnownMinValue() - Lane));
69 case VPLane::Kind::First:
70 return Builder.getInt32(Lane);
71 }
72 llvm_unreachable("Unknown lane kind");
73 }
74
VPValue(const unsigned char SC,Value * UV,VPDef * Def)75 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
76 : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
77 if (Def)
78 Def->addDefinedValue(this);
79 }
80
~VPValue()81 VPValue::~VPValue() {
82 assert(Users.empty() && "trying to delete a VPValue with remaining users");
83 if (Def)
84 Def->removeDefinedValue(this);
85 }
86
87 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS,VPSlotTracker & SlotTracker) const88 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
89 if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
90 R->print(OS, "", SlotTracker);
91 else
92 printAsOperand(OS, SlotTracker);
93 }
94
dump() const95 void VPValue::dump() const {
96 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
97 VPSlotTracker SlotTracker(
98 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
99 print(dbgs(), SlotTracker);
100 dbgs() << "\n";
101 }
102
dump() const103 void VPDef::dump() const {
104 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
105 VPSlotTracker SlotTracker(
106 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
107 print(dbgs(), "", SlotTracker);
108 dbgs() << "\n";
109 }
110 #endif
111
112 // Get the top-most entry block of \p Start. This is the entry block of the
113 // containing VPlan. This function is templated to support both const and non-const blocks
getPlanEntry(T * Start)114 template <typename T> static T *getPlanEntry(T *Start) {
115 T *Next = Start;
116 T *Current = Start;
117 while ((Next = Next->getParent()))
118 Current = Next;
119
120 SmallSetVector<T *, 8> WorkList;
121 WorkList.insert(Current);
122
123 for (unsigned i = 0; i < WorkList.size(); i++) {
124 T *Current = WorkList[i];
125 if (Current->getNumPredecessors() == 0)
126 return Current;
127 auto &Predecessors = Current->getPredecessors();
128 WorkList.insert(Predecessors.begin(), Predecessors.end());
129 }
130
131 llvm_unreachable("VPlan without any entry node without predecessors");
132 }
133
getPlan()134 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
135
getPlan() const136 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
137
138 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
getEntryBasicBlock() const139 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
140 const VPBlockBase *Block = this;
141 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
142 Block = Region->getEntry();
143 return cast<VPBasicBlock>(Block);
144 }
145
getEntryBasicBlock()146 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
147 VPBlockBase *Block = this;
148 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
149 Block = Region->getEntry();
150 return cast<VPBasicBlock>(Block);
151 }
152
setPlan(VPlan * ParentPlan)153 void VPBlockBase::setPlan(VPlan *ParentPlan) {
154 assert(ParentPlan->getEntry() == this &&
155 "Can only set plan on its entry block.");
156 Plan = ParentPlan;
157 }
158
159 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
getExitingBasicBlock() const160 const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const {
161 const VPBlockBase *Block = this;
162 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
163 Block = Region->getExiting();
164 return cast<VPBasicBlock>(Block);
165 }
166
getExitingBasicBlock()167 VPBasicBlock *VPBlockBase::getExitingBasicBlock() {
168 VPBlockBase *Block = this;
169 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
170 Block = Region->getExiting();
171 return cast<VPBasicBlock>(Block);
172 }
173
getEnclosingBlockWithSuccessors()174 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
175 if (!Successors.empty() || !Parent)
176 return this;
177 assert(Parent->getExiting() == this &&
178 "Block w/o successors not the exiting block of its parent.");
179 return Parent->getEnclosingBlockWithSuccessors();
180 }
181
getEnclosingBlockWithPredecessors()182 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
183 if (!Predecessors.empty() || !Parent)
184 return this;
185 assert(Parent->getEntry() == this &&
186 "Block w/o predecessors not the entry of its parent.");
187 return Parent->getEnclosingBlockWithPredecessors();
188 }
189
deleteCFG(VPBlockBase * Entry)190 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
191 SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry));
192
193 for (VPBlockBase *Block : Blocks)
194 delete Block;
195 }
196
getFirstNonPhi()197 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
198 iterator It = begin();
199 while (It != end() && It->isPhi())
200 It++;
201 return It;
202 }
203
get(VPValue * Def,const VPIteration & Instance)204 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
205 if (!Def->getDef())
206 return Def->getLiveInIRValue();
207
208 if (hasScalarValue(Def, Instance)) {
209 return Data
210 .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
211 }
212
213 assert(hasVectorValue(Def, Instance.Part));
214 auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
215 if (!VecPart->getType()->isVectorTy()) {
216 assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
217 return VecPart;
218 }
219 // TODO: Cache created scalar values.
220 Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
221 auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
222 // set(Def, Extract, Instance);
223 return Extract;
224 }
getPreheaderBBFor(VPRecipeBase * R)225 BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
226 VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
227 return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
228 }
229
addNewMetadata(Instruction * To,const Instruction * Orig)230 void VPTransformState::addNewMetadata(Instruction *To,
231 const Instruction *Orig) {
232 // If the loop was versioned with memchecks, add the corresponding no-alias
233 // metadata.
234 if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
235 LVer->annotateInstWithNoAlias(To, Orig);
236 }
237
addMetadata(Instruction * To,Instruction * From)238 void VPTransformState::addMetadata(Instruction *To, Instruction *From) {
239 propagateMetadata(To, From);
240 addNewMetadata(To, From);
241 }
242
addMetadata(ArrayRef<Value * > To,Instruction * From)243 void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) {
244 for (Value *V : To) {
245 if (Instruction *I = dyn_cast<Instruction>(V))
246 addMetadata(I, From);
247 }
248 }
249
setDebugLocFromInst(const Value * V)250 void VPTransformState::setDebugLocFromInst(const Value *V) {
251 const Instruction *Inst = dyn_cast<Instruction>(V);
252 if (!Inst) {
253 Builder.SetCurrentDebugLocation(DebugLoc());
254 return;
255 }
256
257 const DILocation *DIL = Inst->getDebugLoc();
258 // When a FSDiscriminator is enabled, we don't need to add the multiply
259 // factors to the discriminators.
260 if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
261 !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
262 // FIXME: For scalable vectors, assume vscale=1.
263 auto NewDIL =
264 DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
265 if (NewDIL)
266 Builder.SetCurrentDebugLocation(*NewDIL);
267 else
268 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
269 << DIL->getFilename() << " Line: " << DIL->getLine());
270 } else
271 Builder.SetCurrentDebugLocation(DIL);
272 }
273
274 BasicBlock *
createEmptyBasicBlock(VPTransformState::CFGState & CFG)275 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
276 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
277 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
278 BasicBlock *PrevBB = CFG.PrevBB;
279 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
280 PrevBB->getParent(), CFG.ExitBB);
281 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
282
283 // Hook up the new basic block to its predecessors.
284 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
285 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
286 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
287 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
288
289 assert(PredBB && "Predecessor basic-block not found building successor.");
290 auto *PredBBTerminator = PredBB->getTerminator();
291 LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
292
293 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
294 if (isa<UnreachableInst>(PredBBTerminator)) {
295 assert(PredVPSuccessors.size() == 1 &&
296 "Predecessor ending w/o branch must have single successor.");
297 DebugLoc DL = PredBBTerminator->getDebugLoc();
298 PredBBTerminator->eraseFromParent();
299 auto *Br = BranchInst::Create(NewBB, PredBB);
300 Br->setDebugLoc(DL);
301 } else if (TermBr && !TermBr->isConditional()) {
302 TermBr->setSuccessor(0, NewBB);
303 } else {
304 // Set each forward successor here when it is created, excluding
305 // backedges. A backward successor is set when the branch is created.
306 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
307 assert(!TermBr->getSuccessor(idx) &&
308 "Trying to reset an existing successor block.");
309 TermBr->setSuccessor(idx, NewBB);
310 }
311 }
312 return NewBB;
313 }
314
execute(VPTransformState * State)315 void VPBasicBlock::execute(VPTransformState *State) {
316 bool Replica = State->Instance && !State->Instance->isFirstIteration();
317 VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
318 VPBlockBase *SingleHPred = nullptr;
319 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
320
321 auto IsLoopRegion = [](VPBlockBase *BB) {
322 auto *R = dyn_cast<VPRegionBlock>(BB);
323 return R && !R->isReplicator();
324 };
325
326 // 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
327 if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
328 // ExitBB can be re-used for the exit block of the Plan.
329 NewBB = State->CFG.ExitBB;
330 State->CFG.PrevBB = NewBB;
331
332 // Update the branch instruction in the predecessor to branch to ExitBB.
333 VPBlockBase *PredVPB = getSingleHierarchicalPredecessor();
334 VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock();
335 assert(PredVPB->getSingleSuccessor() == this &&
336 "predecessor must have the current block as only successor");
337 BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB];
338 // The Exit block of a loop is always set to be successor 0 of the Exiting
339 // block.
340 cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
341 } else if (PrevVPBB && /* A */
342 !((SingleHPred = getSingleHierarchicalPredecessor()) &&
343 SingleHPred->getExitingBasicBlock() == PrevVPBB &&
344 PrevVPBB->getSingleHierarchicalSuccessor() &&
345 (SingleHPred->getParent() == getEnclosingLoopRegion() &&
346 !IsLoopRegion(SingleHPred))) && /* B */
347 !(Replica && getPredecessors().empty())) { /* C */
348 // The last IR basic block is reused, as an optimization, in three cases:
349 // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
350 // B. when the current VPBB has a single (hierarchical) predecessor which
351 // is PrevVPBB and the latter has a single (hierarchical) successor which
352 // both are in the same non-replicator region; and
353 // C. when the current VPBB is an entry of a region replica - where PrevVPBB
354 // is the exiting VPBB of this region from a previous instance, or the
355 // predecessor of this region.
356
357 NewBB = createEmptyBasicBlock(State->CFG);
358 State->Builder.SetInsertPoint(NewBB);
359 // Temporarily terminate with unreachable until CFG is rewired.
360 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
361 // Register NewBB in its loop. In innermost loops its the same for all
362 // BB's.
363 if (State->CurrentVectorLoop)
364 State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
365 State->Builder.SetInsertPoint(Terminator);
366 State->CFG.PrevBB = NewBB;
367 }
368
369 // 2. Fill the IR basic block with IR instructions.
370 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
371 << " in BB:" << NewBB->getName() << '\n');
372
373 State->CFG.VPBB2IRBB[this] = NewBB;
374 State->CFG.PrevVPBB = this;
375
376 for (VPRecipeBase &Recipe : Recipes)
377 Recipe.execute(*State);
378
379 LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
380 }
381
dropAllReferences(VPValue * NewValue)382 void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
383 for (VPRecipeBase &R : Recipes) {
384 for (auto *Def : R.definedValues())
385 Def->replaceAllUsesWith(NewValue);
386
387 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
388 R.setOperand(I, NewValue);
389 }
390 }
391
splitAt(iterator SplitAt)392 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
393 assert((SplitAt == end() || SplitAt->getParent() == this) &&
394 "can only split at a position in the same block");
395
396 SmallVector<VPBlockBase *, 2> Succs(successors());
397 // First, disconnect the current block from its successors.
398 for (VPBlockBase *Succ : Succs)
399 VPBlockUtils::disconnectBlocks(this, Succ);
400
401 // Create new empty block after the block to split.
402 auto *SplitBlock = new VPBasicBlock(getName() + ".split");
403 VPBlockUtils::insertBlockAfter(SplitBlock, this);
404
405 // Add successors for block to split to new block.
406 for (VPBlockBase *Succ : Succs)
407 VPBlockUtils::connectBlocks(SplitBlock, Succ);
408
409 // Finally, move the recipes starting at SplitAt to new block.
410 for (VPRecipeBase &ToMove :
411 make_early_inc_range(make_range(SplitAt, this->end())))
412 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
413
414 return SplitBlock;
415 }
416
getEnclosingLoopRegion()417 VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
418 VPRegionBlock *P = getParent();
419 if (P && P->isReplicator()) {
420 P = P->getParent();
421 assert(!cast<VPRegionBlock>(P)->isReplicator() &&
422 "unexpected nested replicate regions");
423 }
424 return P;
425 }
426
hasConditionalTerminator(const VPBasicBlock * VPBB)427 static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
428 if (VPBB->empty()) {
429 assert(
430 VPBB->getNumSuccessors() < 2 &&
431 "block with multiple successors doesn't have a recipe as terminator");
432 return false;
433 }
434
435 const VPRecipeBase *R = &VPBB->back();
436 auto *VPI = dyn_cast<VPInstruction>(R);
437 bool IsCondBranch =
438 isa<VPBranchOnMaskRecipe>(R) ||
439 (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond ||
440 VPI->getOpcode() == VPInstruction::BranchOnCount));
441 (void)IsCondBranch;
442
443 if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) {
444 assert(IsCondBranch && "block with multiple successors not terminated by "
445 "conditional branch recipe");
446
447 return true;
448 }
449
450 assert(
451 !IsCondBranch &&
452 "block with 0 or 1 successors terminated by conditional branch recipe");
453 return false;
454 }
455
getTerminator()456 VPRecipeBase *VPBasicBlock::getTerminator() {
457 if (hasConditionalTerminator(this))
458 return &back();
459 return nullptr;
460 }
461
getTerminator() const462 const VPRecipeBase *VPBasicBlock::getTerminator() const {
463 if (hasConditionalTerminator(this))
464 return &back();
465 return nullptr;
466 }
467
isExiting() const468 bool VPBasicBlock::isExiting() const {
469 return getParent()->getExitingBasicBlock() == this;
470 }
471
472 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
printSuccessors(raw_ostream & O,const Twine & Indent) const473 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
474 if (getSuccessors().empty()) {
475 O << Indent << "No successors\n";
476 } else {
477 O << Indent << "Successor(s): ";
478 ListSeparator LS;
479 for (auto *Succ : getSuccessors())
480 O << LS << Succ->getName();
481 O << '\n';
482 }
483 }
484
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const485 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
486 VPSlotTracker &SlotTracker) const {
487 O << Indent << getName() << ":\n";
488
489 auto RecipeIndent = Indent + " ";
490 for (const VPRecipeBase &Recipe : *this) {
491 Recipe.print(O, RecipeIndent, SlotTracker);
492 O << '\n';
493 }
494
495 printSuccessors(O, Indent);
496 }
497 #endif
498
dropAllReferences(VPValue * NewValue)499 void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
500 for (VPBlockBase *Block : depth_first(Entry))
501 // Drop all references in VPBasicBlocks and replace all uses with
502 // DummyValue.
503 Block->dropAllReferences(NewValue);
504 }
505
execute(VPTransformState * State)506 void VPRegionBlock::execute(VPTransformState *State) {
507 ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
508
509 if (!isReplicator()) {
510 // Create and register the new vector loop.
511 Loop *PrevLoop = State->CurrentVectorLoop;
512 State->CurrentVectorLoop = State->LI->AllocateLoop();
513 BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
514 Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
515
516 // Insert the new loop into the loop nest and register the new basic blocks
517 // before calling any utilities such as SCEV that require valid LoopInfo.
518 if (ParentLoop)
519 ParentLoop->addChildLoop(State->CurrentVectorLoop);
520 else
521 State->LI->addTopLevelLoop(State->CurrentVectorLoop);
522
523 // Visit the VPBlocks connected to "this", starting from it.
524 for (VPBlockBase *Block : RPOT) {
525 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
526 Block->execute(State);
527 }
528
529 State->CurrentVectorLoop = PrevLoop;
530 return;
531 }
532
533 assert(!State->Instance && "Replicating a Region with non-null instance.");
534
535 // Enter replicating mode.
536 State->Instance = VPIteration(0, 0);
537
538 for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
539 State->Instance->Part = Part;
540 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
541 for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
542 ++Lane) {
543 State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
544 // Visit the VPBlocks connected to \p this, starting from it.
545 for (VPBlockBase *Block : RPOT) {
546 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
547 Block->execute(State);
548 }
549 }
550 }
551
552 // Exit replicating mode.
553 State->Instance.reset();
554 }
555
556 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const557 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
558 VPSlotTracker &SlotTracker) const {
559 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
560 auto NewIndent = Indent + " ";
561 for (auto *BlockBase : depth_first(Entry)) {
562 O << '\n';
563 BlockBase->print(O, NewIndent, SlotTracker);
564 }
565 O << Indent << "}\n";
566
567 printSuccessors(O, Indent);
568 }
569 #endif
570
getActiveLaneMaskPhi()571 VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() {
572 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
573 for (VPRecipeBase &R : Header->phis()) {
574 if (isa<VPActiveLaneMaskPHIRecipe>(&R))
575 return cast<VPActiveLaneMaskPHIRecipe>(&R);
576 }
577 return nullptr;
578 }
579
canSimplifyBranchOnCond(VPInstruction * Term)580 static bool canSimplifyBranchOnCond(VPInstruction *Term) {
581 VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0));
582 if (!Not || Not->getOpcode() != VPInstruction::Not)
583 return false;
584
585 VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0));
586 return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask;
587 }
588
prepareToExecute(Value * TripCountV,Value * VectorTripCountV,Value * CanonicalIVStartValue,VPTransformState & State,bool IsEpilogueVectorization)589 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
590 Value *CanonicalIVStartValue,
591 VPTransformState &State,
592 bool IsEpilogueVectorization) {
593
594 VPBasicBlock *ExitingVPBB = getVectorLoopRegion()->getExitingBasicBlock();
595 auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back());
596 // Try to simplify the branch condition if TC <= VF * UF when preparing to
597 // execute the plan for the main vector loop. We only do this if the
598 // terminator is:
599 // 1. BranchOnCount, or
600 // 2. BranchOnCond where the input is Not(ActiveLaneMask).
601 if (!IsEpilogueVectorization && Term && isa<ConstantInt>(TripCountV) &&
602 (Term->getOpcode() == VPInstruction::BranchOnCount ||
603 (Term->getOpcode() == VPInstruction::BranchOnCond &&
604 canSimplifyBranchOnCond(Term)))) {
605 ConstantInt *C = cast<ConstantInt>(TripCountV);
606 uint64_t TCVal = C->getZExtValue();
607 if (TCVal && TCVal <= State.VF.getKnownMinValue() * State.UF) {
608 auto *BOC =
609 new VPInstruction(VPInstruction::BranchOnCond,
610 {getOrAddExternalDef(State.Builder.getTrue())});
611 Term->eraseFromParent();
612 ExitingVPBB->appendRecipe(BOC);
613 // TODO: Further simplifications are possible
614 // 1. Replace inductions with constants.
615 // 2. Replace vector loop region with VPBasicBlock.
616 }
617 }
618
619 // Check if the trip count is needed, and if so build it.
620 if (TripCount && TripCount->getNumUsers()) {
621 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
622 State.set(TripCount, TripCountV, Part);
623 }
624
625 // Check if the backedge taken count is needed, and if so build it.
626 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
627 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
628 auto *TCMO = Builder.CreateSub(TripCountV,
629 ConstantInt::get(TripCountV->getType(), 1),
630 "trip.count.minus.1");
631 auto VF = State.VF;
632 Value *VTCMO =
633 VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
634 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
635 State.set(BackedgeTakenCount, VTCMO, Part);
636 }
637
638 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
639 State.set(&VectorTripCount, VectorTripCountV, Part);
640
641 // When vectorizing the epilogue loop, the canonical induction start value
642 // needs to be changed from zero to the value after the main vector loop.
643 if (CanonicalIVStartValue) {
644 VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue);
645 auto *IV = getCanonicalIV();
646 assert(all_of(IV->users(),
647 [](const VPUser *U) {
648 if (isa<VPScalarIVStepsRecipe>(U))
649 return true;
650 auto *VPI = cast<VPInstruction>(U);
651 return VPI->getOpcode() ==
652 VPInstruction::CanonicalIVIncrement ||
653 VPI->getOpcode() ==
654 VPInstruction::CanonicalIVIncrementNUW;
655 }) &&
656 "the canonical IV should only be used by its increments or "
657 "ScalarIVSteps when "
658 "resetting the start value");
659 IV->setOperand(0, VPV);
660 }
661 }
662
663 /// Generate the code inside the preheader and body of the vectorized loop.
664 /// Assumes a single pre-header basic-block was created for this. Introduce
665 /// additional basic-blocks as needed, and fill them all.
execute(VPTransformState * State)666 void VPlan::execute(VPTransformState *State) {
667 // Set the reverse mapping from VPValues to Values for code generation.
668 for (auto &Entry : Value2VPValue)
669 State->VPValue2Value[Entry.second] = Entry.first;
670
671 // Initialize CFG state.
672 State->CFG.PrevVPBB = nullptr;
673 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
674 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
675 State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
676
677 // Generate code in the loop pre-header and body.
678 for (VPBlockBase *Block : depth_first(Entry))
679 Block->execute(State);
680
681 VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
682 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
683
684 // Fix the latch value of canonical, reduction and first-order recurrences
685 // phis in the vector loop.
686 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
687 for (VPRecipeBase &R : Header->phis()) {
688 // Skip phi-like recipes that generate their backedege values themselves.
689 if (isa<VPWidenPHIRecipe>(&R))
690 continue;
691
692 if (isa<VPWidenPointerInductionRecipe>(&R) ||
693 isa<VPWidenIntOrFpInductionRecipe>(&R)) {
694 PHINode *Phi = nullptr;
695 if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
696 Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
697 } else {
698 auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
699 // TODO: Split off the case that all users of a pointer phi are scalar
700 // from the VPWidenPointerInductionRecipe.
701 if (WidenPhi->onlyScalarsGenerated(State->VF))
702 continue;
703
704 auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
705 Phi = cast<PHINode>(GEP->getPointerOperand());
706 }
707
708 Phi->setIncomingBlock(1, VectorLatchBB);
709
710 // Move the last step to the end of the latch block. This ensures
711 // consistent placement of all induction updates.
712 Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
713 Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
714 continue;
715 }
716
717 auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
718 // For canonical IV, first-order recurrences and in-order reduction phis,
719 // only a single part is generated, which provides the last part from the
720 // previous iteration. For non-ordered reductions all UF parts are
721 // generated.
722 bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
723 isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
724 (isa<VPReductionPHIRecipe>(PhiR) &&
725 cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
726 unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
727
728 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
729 Value *Phi = State->get(PhiR, Part);
730 Value *Val = State->get(PhiR->getBackedgeValue(),
731 SinglePartNeeded ? State->UF - 1 : Part);
732 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
733 }
734 }
735
736 // We do not attempt to preserve DT for outer loop vectorization currently.
737 if (!EnableVPlanNativePath) {
738 BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
739 State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
740 updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
741 State->CFG.ExitBB);
742 }
743 }
744
745 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
746 LLVM_DUMP_METHOD
print(raw_ostream & O) const747 void VPlan::print(raw_ostream &O) const {
748 VPSlotTracker SlotTracker(this);
749
750 O << "VPlan '" << Name << "' {";
751
752 if (VectorTripCount.getNumUsers() > 0) {
753 O << "\nLive-in ";
754 VectorTripCount.printAsOperand(O, SlotTracker);
755 O << " = vector-trip-count\n";
756 }
757
758 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
759 O << "\nLive-in ";
760 BackedgeTakenCount->printAsOperand(O, SlotTracker);
761 O << " = backedge-taken count\n";
762 }
763
764 for (const VPBlockBase *Block : depth_first(getEntry())) {
765 O << '\n';
766 Block->print(O, "", SlotTracker);
767 }
768
769 if (!LiveOuts.empty())
770 O << "\n";
771 for (auto &KV : LiveOuts) {
772 O << "Live-out ";
773 KV.second->getPhi()->printAsOperand(O);
774 O << " = ";
775 KV.second->getOperand(0)->printAsOperand(O, SlotTracker);
776 O << "\n";
777 }
778
779 O << "}\n";
780 }
781
782 LLVM_DUMP_METHOD
printDOT(raw_ostream & O) const783 void VPlan::printDOT(raw_ostream &O) const {
784 VPlanPrinter Printer(O, *this);
785 Printer.dump();
786 }
787
788 LLVM_DUMP_METHOD
dump() const789 void VPlan::dump() const { print(dbgs()); }
790 #endif
791
addLiveOut(PHINode * PN,VPValue * V)792 void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
793 assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
794 LiveOuts.insert({PN, new VPLiveOut(PN, V)});
795 }
796
updateDominatorTree(DominatorTree * DT,BasicBlock * LoopHeaderBB,BasicBlock * LoopLatchBB,BasicBlock * LoopExitBB)797 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
798 BasicBlock *LoopLatchBB,
799 BasicBlock *LoopExitBB) {
800 // The vector body may be more than a single basic-block by this point.
801 // Update the dominator tree information inside the vector body by propagating
802 // it from header to latch, expecting only triangular control-flow, if any.
803 BasicBlock *PostDomSucc = nullptr;
804 for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
805 // Get the list of successors of this block.
806 std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
807 assert(Succs.size() <= 2 &&
808 "Basic block in vector loop has more than 2 successors.");
809 PostDomSucc = Succs[0];
810 if (Succs.size() == 1) {
811 assert(PostDomSucc->getSinglePredecessor() &&
812 "PostDom successor has more than one predecessor.");
813 DT->addNewBlock(PostDomSucc, BB);
814 continue;
815 }
816 BasicBlock *InterimSucc = Succs[1];
817 if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
818 PostDomSucc = Succs[1];
819 InterimSucc = Succs[0];
820 }
821 assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
822 "One successor of a basic block does not lead to the other.");
823 assert(InterimSucc->getSinglePredecessor() &&
824 "Interim successor has more than one predecessor.");
825 assert(PostDomSucc->hasNPredecessors(2) &&
826 "PostDom successor has more than two predecessors.");
827 DT->addNewBlock(InterimSucc, BB);
828 DT->addNewBlock(PostDomSucc, BB);
829 }
830 // Latch block is a new dominator for the loop exit.
831 DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
832 assert(DT->verify(DominatorTree::VerificationLevel::Fast));
833 }
834
835 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
836
getUID(const VPBlockBase * Block)837 Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
838 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
839 Twine(getOrCreateBID(Block));
840 }
841
getOrCreateName(const VPBlockBase * Block)842 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
843 const std::string &Name = Block->getName();
844 if (!Name.empty())
845 return Name;
846 return "VPB" + Twine(getOrCreateBID(Block));
847 }
848
dump()849 void VPlanPrinter::dump() {
850 Depth = 1;
851 bumpIndent(0);
852 OS << "digraph VPlan {\n";
853 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
854 if (!Plan.getName().empty())
855 OS << "\\n" << DOT::EscapeString(Plan.getName());
856 if (Plan.BackedgeTakenCount) {
857 OS << ", where:\\n";
858 Plan.BackedgeTakenCount->print(OS, SlotTracker);
859 OS << " := BackedgeTakenCount";
860 }
861 OS << "\"]\n";
862 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
863 OS << "edge [fontname=Courier, fontsize=30]\n";
864 OS << "compound=true\n";
865
866 for (const VPBlockBase *Block : depth_first(Plan.getEntry()))
867 dumpBlock(Block);
868
869 OS << "}\n";
870 }
871
dumpBlock(const VPBlockBase * Block)872 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
873 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
874 dumpBasicBlock(BasicBlock);
875 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
876 dumpRegion(Region);
877 else
878 llvm_unreachable("Unsupported kind of VPBlock.");
879 }
880
drawEdge(const VPBlockBase * From,const VPBlockBase * To,bool Hidden,const Twine & Label)881 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
882 bool Hidden, const Twine &Label) {
883 // Due to "dot" we print an edge between two regions as an edge between the
884 // exiting basic block and the entry basic of the respective regions.
885 const VPBlockBase *Tail = From->getExitingBasicBlock();
886 const VPBlockBase *Head = To->getEntryBasicBlock();
887 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
888 OS << " [ label=\"" << Label << '\"';
889 if (Tail != From)
890 OS << " ltail=" << getUID(From);
891 if (Head != To)
892 OS << " lhead=" << getUID(To);
893 if (Hidden)
894 OS << "; splines=none";
895 OS << "]\n";
896 }
897
dumpEdges(const VPBlockBase * Block)898 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
899 auto &Successors = Block->getSuccessors();
900 if (Successors.size() == 1)
901 drawEdge(Block, Successors.front(), false, "");
902 else if (Successors.size() == 2) {
903 drawEdge(Block, Successors.front(), false, "T");
904 drawEdge(Block, Successors.back(), false, "F");
905 } else {
906 unsigned SuccessorNumber = 0;
907 for (auto *Successor : Successors)
908 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
909 }
910 }
911
dumpBasicBlock(const VPBasicBlock * BasicBlock)912 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
913 // Implement dot-formatted dump by performing plain-text dump into the
914 // temporary storage followed by some post-processing.
915 OS << Indent << getUID(BasicBlock) << " [label =\n";
916 bumpIndent(1);
917 std::string Str;
918 raw_string_ostream SS(Str);
919 // Use no indentation as we need to wrap the lines into quotes ourselves.
920 BasicBlock->print(SS, "", SlotTracker);
921
922 // We need to process each line of the output separately, so split
923 // single-string plain-text dump.
924 SmallVector<StringRef, 0> Lines;
925 StringRef(Str).rtrim('\n').split(Lines, "\n");
926
927 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
928 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
929 };
930
931 // Don't need the "+" after the last line.
932 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
933 EmitLine(Line, " +\n");
934 EmitLine(Lines.back(), "\n");
935
936 bumpIndent(-1);
937 OS << Indent << "]\n";
938
939 dumpEdges(BasicBlock);
940 }
941
dumpRegion(const VPRegionBlock * Region)942 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
943 OS << Indent << "subgraph " << getUID(Region) << " {\n";
944 bumpIndent(1);
945 OS << Indent << "fontname=Courier\n"
946 << Indent << "label=\""
947 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
948 << DOT::EscapeString(Region->getName()) << "\"\n";
949 // Dump the blocks of the region.
950 assert(Region->getEntry() && "Region contains no inner blocks.");
951 for (const VPBlockBase *Block : depth_first(Region->getEntry()))
952 dumpBlock(Block);
953 bumpIndent(-1);
954 OS << Indent << "}\n";
955 dumpEdges(Region);
956 }
957
print(raw_ostream & O) const958 void VPlanIngredient::print(raw_ostream &O) const {
959 if (auto *Inst = dyn_cast<Instruction>(V)) {
960 if (!Inst->getType()->isVoidTy()) {
961 Inst->printAsOperand(O, false);
962 O << " = ";
963 }
964 O << Inst->getOpcodeName() << " ";
965 unsigned E = Inst->getNumOperands();
966 if (E > 0) {
967 Inst->getOperand(0)->printAsOperand(O, false);
968 for (unsigned I = 1; I < E; ++I)
969 Inst->getOperand(I)->printAsOperand(O << ", ", false);
970 }
971 } else // !Inst
972 V->printAsOperand(O, false);
973 }
974
975 #endif
976
977 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
978
replaceAllUsesWith(VPValue * New)979 void VPValue::replaceAllUsesWith(VPValue *New) {
980 for (unsigned J = 0; J < getNumUsers();) {
981 VPUser *User = Users[J];
982 unsigned NumUsers = getNumUsers();
983 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
984 if (User->getOperand(I) == this)
985 User->setOperand(I, New);
986 // If a user got removed after updating the current user, the next user to
987 // update will be moved to the current position, so we only need to
988 // increment the index if the number of users did not change.
989 if (NumUsers == getNumUsers())
990 J++;
991 }
992 }
993
994 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
printAsOperand(raw_ostream & OS,VPSlotTracker & Tracker) const995 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
996 if (const Value *UV = getUnderlyingValue()) {
997 OS << "ir<";
998 UV->printAsOperand(OS, false);
999 OS << ">";
1000 return;
1001 }
1002
1003 unsigned Slot = Tracker.getSlot(this);
1004 if (Slot == unsigned(-1))
1005 OS << "<badref>";
1006 else
1007 OS << "vp<%" << Tracker.getSlot(this) << ">";
1008 }
1009
printOperands(raw_ostream & O,VPSlotTracker & SlotTracker) const1010 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1011 interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1012 Op->printAsOperand(O, SlotTracker);
1013 });
1014 }
1015 #endif
1016
visitRegion(VPRegionBlock * Region,Old2NewTy & Old2New,InterleavedAccessInfo & IAI)1017 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1018 Old2NewTy &Old2New,
1019 InterleavedAccessInfo &IAI) {
1020 ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
1021 for (VPBlockBase *Base : RPOT) {
1022 visitBlock(Base, Old2New, IAI);
1023 }
1024 }
1025
visitBlock(VPBlockBase * Block,Old2NewTy & Old2New,InterleavedAccessInfo & IAI)1026 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1027 InterleavedAccessInfo &IAI) {
1028 if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1029 for (VPRecipeBase &VPI : *VPBB) {
1030 if (isa<VPHeaderPHIRecipe>(&VPI))
1031 continue;
1032 assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1033 auto *VPInst = cast<VPInstruction>(&VPI);
1034
1035 auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
1036 if (!Inst)
1037 continue;
1038 auto *IG = IAI.getInterleaveGroup(Inst);
1039 if (!IG)
1040 continue;
1041
1042 auto NewIGIter = Old2New.find(IG);
1043 if (NewIGIter == Old2New.end())
1044 Old2New[IG] = new InterleaveGroup<VPInstruction>(
1045 IG->getFactor(), IG->isReverse(), IG->getAlign());
1046
1047 if (Inst == IG->getInsertPos())
1048 Old2New[IG]->setInsertPos(VPInst);
1049
1050 InterleaveGroupMap[VPInst] = Old2New[IG];
1051 InterleaveGroupMap[VPInst]->insertMember(
1052 VPInst, IG->getIndex(Inst),
1053 Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1054 : IG->getFactor()));
1055 }
1056 } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1057 visitRegion(Region, Old2New, IAI);
1058 else
1059 llvm_unreachable("Unsupported kind of VPBlock.");
1060 }
1061
VPInterleavedAccessInfo(VPlan & Plan,InterleavedAccessInfo & IAI)1062 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
1063 InterleavedAccessInfo &IAI) {
1064 Old2NewTy Old2New;
1065 visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
1066 }
1067
assignSlot(const VPValue * V)1068 void VPSlotTracker::assignSlot(const VPValue *V) {
1069 assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
1070 Slots[V] = NextSlot++;
1071 }
1072
assignSlots(const VPlan & Plan)1073 void VPSlotTracker::assignSlots(const VPlan &Plan) {
1074
1075 for (const auto &P : Plan.VPExternalDefs)
1076 assignSlot(P.second);
1077
1078 assignSlot(&Plan.VectorTripCount);
1079 if (Plan.BackedgeTakenCount)
1080 assignSlot(Plan.BackedgeTakenCount);
1081
1082 ReversePostOrderTraversal<
1083 VPBlockRecursiveTraversalWrapper<const VPBlockBase *>>
1084 RPOT(VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(
1085 Plan.getEntry()));
1086 for (const VPBasicBlock *VPBB :
1087 VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1088 for (const VPRecipeBase &Recipe : *VPBB)
1089 for (VPValue *Def : Recipe.definedValues())
1090 assignSlot(Def);
1091 }
1092
onlyFirstLaneUsed(VPValue * Def)1093 bool vputils::onlyFirstLaneUsed(VPValue *Def) {
1094 return all_of(Def->users(),
1095 [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
1096 }
1097
getOrCreateVPValueForSCEVExpr(VPlan & Plan,const SCEV * Expr,ScalarEvolution & SE)1098 VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
1099 ScalarEvolution &SE) {
1100 if (auto *E = dyn_cast<SCEVConstant>(Expr))
1101 return Plan.getOrAddExternalDef(E->getValue());
1102 if (auto *E = dyn_cast<SCEVUnknown>(Expr))
1103 return Plan.getOrAddExternalDef(E->getValue());
1104
1105 VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
1106 VPValue *Step = new VPExpandSCEVRecipe(Expr, SE);
1107 Preheader->appendRecipe(cast<VPRecipeBase>(Step->getDef()));
1108 return Step;
1109 }
1110