1e60b36cfSFlorian Hahn //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===//
2e60b36cfSFlorian Hahn //
3e60b36cfSFlorian Hahn // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e60b36cfSFlorian Hahn // See https://llvm.org/LICENSE.txt for license information.
5e60b36cfSFlorian Hahn // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e60b36cfSFlorian Hahn //
7e60b36cfSFlorian Hahn //===----------------------------------------------------------------------===//
8e60b36cfSFlorian Hahn ///
9e60b36cfSFlorian Hahn /// \file
10e60b36cfSFlorian Hahn /// This file implements a set of utility VPlan to VPlan transformations.
11e60b36cfSFlorian Hahn ///
12e60b36cfSFlorian Hahn //===----------------------------------------------------------------------===//
13e60b36cfSFlorian Hahn 
14e60b36cfSFlorian Hahn #include "VPlanTransforms.h"
15e60b36cfSFlorian Hahn #include "llvm/ADT/PostOrderIterator.h"
16e60b36cfSFlorian Hahn 
17e60b36cfSFlorian Hahn using namespace llvm;
18e60b36cfSFlorian Hahn 
19e60b36cfSFlorian Hahn void VPlanTransforms::VPInstructionsToVPRecipes(
20e60b36cfSFlorian Hahn     Loop *OrigLoop, VPlanPtr &Plan,
21d0d38df0SDavid Green     LoopVectorizationLegality::InductionList &Inductions,
220de8aeaeSMauri Mustonen     SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE) {
23e60b36cfSFlorian Hahn 
24e60b36cfSFlorian Hahn   auto *TopRegion = cast<VPRegionBlock>(Plan->getEntry());
25e60b36cfSFlorian Hahn   ReversePostOrderTraversal<VPBlockBase *> RPOT(TopRegion->getEntry());
26e60b36cfSFlorian Hahn 
27e60b36cfSFlorian Hahn   for (VPBlockBase *Base : RPOT) {
28e60b36cfSFlorian Hahn     // Do not widen instructions in pre-header and exit blocks.
29e60b36cfSFlorian Hahn     if (Base->getNumPredecessors() == 0 || Base->getNumSuccessors() == 0)
30e60b36cfSFlorian Hahn       continue;
31e60b36cfSFlorian Hahn 
32e60b36cfSFlorian Hahn     VPBasicBlock *VPBB = Base->getEntryBasicBlock();
33e60b36cfSFlorian Hahn     // Introduce each ingredient into VPlan.
34e60b36cfSFlorian Hahn     for (auto I = VPBB->begin(), E = VPBB->end(); I != E;) {
35e60b36cfSFlorian Hahn       VPRecipeBase *Ingredient = &*I++;
36a0e1313cSFlorian Hahn       VPValue *VPV = Ingredient->getVPSingleValue();
3715a74b64SFlorian Hahn       Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
38e60b36cfSFlorian Hahn       if (DeadInstructions.count(Inst)) {
3976afbf60SFlorian Hahn         VPValue DummyValue;
4015a74b64SFlorian Hahn         VPV->replaceAllUsesWith(&DummyValue);
41e60b36cfSFlorian Hahn         Ingredient->eraseFromParent();
42e60b36cfSFlorian Hahn         continue;
43e60b36cfSFlorian Hahn       }
44e60b36cfSFlorian Hahn 
45e60b36cfSFlorian Hahn       VPRecipeBase *NewRecipe = nullptr;
4615a74b64SFlorian Hahn       if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(Ingredient)) {
4715a74b64SFlorian Hahn         auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
48d0d38df0SDavid Green         InductionDescriptor II = Inductions.lookup(Phi);
49e60b36cfSFlorian Hahn         if (II.getKind() == InductionDescriptor::IK_IntInduction ||
50e60b36cfSFlorian Hahn             II.getKind() == InductionDescriptor::IK_FpInduction) {
51816dba48SFlorian Hahn           VPValue *Start = Plan->getOrAddVPValue(II.getStartValue());
52daaa0e35SFlorian Hahn           NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, nullptr);
5315a74b64SFlorian Hahn         } else {
5415a74b64SFlorian Hahn           Plan->addVPValue(Phi, VPPhi);
5515a74b64SFlorian Hahn           continue;
5615a74b64SFlorian Hahn         }
5715a74b64SFlorian Hahn       } else {
5815a74b64SFlorian Hahn         assert(isa<VPInstruction>(Ingredient) &&
5915a74b64SFlorian Hahn                "only VPInstructions expected here");
6015a74b64SFlorian Hahn         assert(!isa<PHINode>(Inst) && "phis should be handled above");
6115a74b64SFlorian Hahn         // Create VPWidenMemoryInstructionRecipe for loads and stores.
6215a74b64SFlorian Hahn         if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
6315a74b64SFlorian Hahn           NewRecipe = new VPWidenMemoryInstructionRecipe(
6415a74b64SFlorian Hahn               *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
6515a74b64SFlorian Hahn               nullptr /*Mask*/);
6615a74b64SFlorian Hahn         } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
6715a74b64SFlorian Hahn           NewRecipe = new VPWidenMemoryInstructionRecipe(
6815a74b64SFlorian Hahn               *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
6915a74b64SFlorian Hahn               Plan->getOrAddVPValue(Store->getValueOperand()),
7015a74b64SFlorian Hahn               nullptr /*Mask*/);
71e60b36cfSFlorian Hahn         } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
72c0cdba72SFlorian Hahn           NewRecipe = new VPWidenGEPRecipe(
73c0cdba72SFlorian Hahn               GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop);
74494b5ba3SMauri Mustonen         } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
75494b5ba3SMauri Mustonen           NewRecipe = new VPWidenCallRecipe(
76494b5ba3SMauri Mustonen               *CI, Plan->mapToVPValues(CI->arg_operands()));
770de8aeaeSMauri Mustonen         } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
780de8aeaeSMauri Mustonen           bool InvariantCond =
790de8aeaeSMauri Mustonen               SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop);
800de8aeaeSMauri Mustonen           NewRecipe = new VPWidenSelectRecipe(
810de8aeaeSMauri Mustonen               *SI, Plan->mapToVPValues(SI->operands()), InvariantCond);
8215a74b64SFlorian Hahn         } else {
83e8937985SFlorian Hahn           NewRecipe =
84e8937985SFlorian Hahn               new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands()));
8515a74b64SFlorian Hahn         }
8615a74b64SFlorian Hahn       }
87e60b36cfSFlorian Hahn 
88e60b36cfSFlorian Hahn       NewRecipe->insertBefore(Ingredient);
8976afbf60SFlorian Hahn       if (NewRecipe->getNumDefinedValues() == 1)
90a0e1313cSFlorian Hahn         VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
9176afbf60SFlorian Hahn       else
9276afbf60SFlorian Hahn         assert(NewRecipe->getNumDefinedValues() == 0 &&
9376afbf60SFlorian Hahn                "Only recpies with zero or one defined values expected");
94e60b36cfSFlorian Hahn       Ingredient->eraseFromParent();
9554a14c26SFlorian Hahn       Plan->removeVPValueFor(Inst);
9654a14c26SFlorian Hahn       for (auto *Def : NewRecipe->definedValues()) {
9754a14c26SFlorian Hahn         Plan->addVPValue(Inst, Def);
9854a14c26SFlorian Hahn       }
99e60b36cfSFlorian Hahn     }
100e60b36cfSFlorian Hahn   }
101e60b36cfSFlorian Hahn }
10265d3dd7cSFlorian Hahn 
10365d3dd7cSFlorian Hahn bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
10465d3dd7cSFlorian Hahn   auto Iter = depth_first(
10565d3dd7cSFlorian Hahn       VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry()));
10665d3dd7cSFlorian Hahn   bool Changed = false;
10765d3dd7cSFlorian Hahn   // First, collect the operands of all predicated replicate recipes as seeds
10865d3dd7cSFlorian Hahn   // for sinking.
10965d3dd7cSFlorian Hahn   SetVector<VPValue *> WorkList;
11065d3dd7cSFlorian Hahn   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
11165d3dd7cSFlorian Hahn     for (auto &Recipe : *VPBB) {
11265d3dd7cSFlorian Hahn       auto *RepR = dyn_cast<VPReplicateRecipe>(&Recipe);
11365d3dd7cSFlorian Hahn       if (!RepR || !RepR->isPredicated())
11465d3dd7cSFlorian Hahn         continue;
11565d3dd7cSFlorian Hahn       WorkList.insert(RepR->op_begin(), RepR->op_end());
11665d3dd7cSFlorian Hahn     }
11765d3dd7cSFlorian Hahn   }
11865d3dd7cSFlorian Hahn 
11965d3dd7cSFlorian Hahn   // Try to sink each replicate recipe in the worklist.
12065d3dd7cSFlorian Hahn   while (!WorkList.empty()) {
12165d3dd7cSFlorian Hahn     auto *C = WorkList.pop_back_val();
12265d3dd7cSFlorian Hahn     auto *SinkCandidate = dyn_cast_or_null<VPReplicateRecipe>(C->Def);
12338641ddfSFlorian Hahn     if (!SinkCandidate || SinkCandidate->isUniform())
12465d3dd7cSFlorian Hahn       continue;
12565d3dd7cSFlorian Hahn 
12665d3dd7cSFlorian Hahn     // All users of SinkCandidate must be in the same block in order to perform
12765d3dd7cSFlorian Hahn     // sinking. Therefore the destination block for sinking must match the block
12865d3dd7cSFlorian Hahn     // containing the first user.
12965d3dd7cSFlorian Hahn     auto *FirstUser = dyn_cast<VPRecipeBase>(*SinkCandidate->user_begin());
13065d3dd7cSFlorian Hahn     if (!FirstUser)
13165d3dd7cSFlorian Hahn       continue;
13265d3dd7cSFlorian Hahn     VPBasicBlock *SinkTo = FirstUser->getParent();
13365d3dd7cSFlorian Hahn     if (SinkCandidate->getParent() == SinkTo ||
13465d3dd7cSFlorian Hahn         SinkCandidate->mayHaveSideEffects() ||
13565d3dd7cSFlorian Hahn         SinkCandidate->mayReadOrWriteMemory())
13665d3dd7cSFlorian Hahn       continue;
13765d3dd7cSFlorian Hahn 
13865d3dd7cSFlorian Hahn     // All recipe users of the sink candidate must be in the same block SinkTo.
13965d3dd7cSFlorian Hahn     if (any_of(SinkCandidate->users(), [SinkTo](VPUser *U) {
14065d3dd7cSFlorian Hahn           auto *UI = dyn_cast<VPRecipeBase>(U);
14165d3dd7cSFlorian Hahn           return !UI || UI->getParent() != SinkTo;
14265d3dd7cSFlorian Hahn         }))
14365d3dd7cSFlorian Hahn       continue;
14465d3dd7cSFlorian Hahn 
14565d3dd7cSFlorian Hahn     SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
14665d3dd7cSFlorian Hahn     WorkList.insert(SinkCandidate->op_begin(), SinkCandidate->op_end());
14765d3dd7cSFlorian Hahn     Changed = true;
14865d3dd7cSFlorian Hahn   }
14965d3dd7cSFlorian Hahn   return Changed;
15065d3dd7cSFlorian Hahn }
151*80aa7e14SFlorian Hahn 
152*80aa7e14SFlorian Hahn /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return
153*80aa7e14SFlorian Hahn /// the mask.
154*80aa7e14SFlorian Hahn VPValue *getPredicatedMask(VPRegionBlock *R) {
155*80aa7e14SFlorian Hahn   auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry());
156*80aa7e14SFlorian Hahn   if (!EntryBB || EntryBB->size() != 1 ||
157*80aa7e14SFlorian Hahn       !isa<VPBranchOnMaskRecipe>(EntryBB->begin()))
158*80aa7e14SFlorian Hahn     return nullptr;
159*80aa7e14SFlorian Hahn 
160*80aa7e14SFlorian Hahn   return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0);
161*80aa7e14SFlorian Hahn }
162*80aa7e14SFlorian Hahn 
163*80aa7e14SFlorian Hahn /// If \p R is a triangle region, return the 'then' block of the triangle.
164*80aa7e14SFlorian Hahn static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {
165*80aa7e14SFlorian Hahn   auto *EntryBB = cast<VPBasicBlock>(R->getEntry());
166*80aa7e14SFlorian Hahn   if (EntryBB->getNumSuccessors() != 2)
167*80aa7e14SFlorian Hahn     return nullptr;
168*80aa7e14SFlorian Hahn 
169*80aa7e14SFlorian Hahn   auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]);
170*80aa7e14SFlorian Hahn   auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]);
171*80aa7e14SFlorian Hahn   if (!Succ0 || !Succ1)
172*80aa7e14SFlorian Hahn     return nullptr;
173*80aa7e14SFlorian Hahn 
174*80aa7e14SFlorian Hahn   if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
175*80aa7e14SFlorian Hahn     return nullptr;
176*80aa7e14SFlorian Hahn   if (Succ0->getSingleSuccessor() == Succ1)
177*80aa7e14SFlorian Hahn     return Succ0;
178*80aa7e14SFlorian Hahn   if (Succ1->getSingleSuccessor() == Succ0)
179*80aa7e14SFlorian Hahn     return Succ1;
180*80aa7e14SFlorian Hahn   return nullptr;
181*80aa7e14SFlorian Hahn }
182*80aa7e14SFlorian Hahn 
183*80aa7e14SFlorian Hahn bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
184*80aa7e14SFlorian Hahn   SetVector<VPRegionBlock *> DeletedRegions;
185*80aa7e14SFlorian Hahn   bool Changed = false;
186*80aa7e14SFlorian Hahn 
187*80aa7e14SFlorian Hahn   // Collect region blocks to process up-front, to avoid iterator invalidation
188*80aa7e14SFlorian Hahn   // issues while merging regions.
189*80aa7e14SFlorian Hahn   SmallVector<VPRegionBlock *, 8> CandidateRegions(
190*80aa7e14SFlorian Hahn       VPBlockUtils::blocksOnly<VPRegionBlock>(depth_first(
191*80aa7e14SFlorian Hahn           VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry()))));
192*80aa7e14SFlorian Hahn 
193*80aa7e14SFlorian Hahn   // Check if Base is a predicated triangle, followed by an empty block,
194*80aa7e14SFlorian Hahn   // followed by another predicate triangle. If that's the case, move the
195*80aa7e14SFlorian Hahn   // recipes from the first to the second triangle.
196*80aa7e14SFlorian Hahn   for (VPRegionBlock *Region1 : CandidateRegions) {
197*80aa7e14SFlorian Hahn     if (DeletedRegions.contains(Region1))
198*80aa7e14SFlorian Hahn       continue;
199*80aa7e14SFlorian Hahn     auto *MiddleBasicBlock =
200*80aa7e14SFlorian Hahn         dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());
201*80aa7e14SFlorian Hahn     if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
202*80aa7e14SFlorian Hahn       continue;
203*80aa7e14SFlorian Hahn 
204*80aa7e14SFlorian Hahn     auto *Region2 =
205*80aa7e14SFlorian Hahn         dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
206*80aa7e14SFlorian Hahn     if (!Region2)
207*80aa7e14SFlorian Hahn       continue;
208*80aa7e14SFlorian Hahn 
209*80aa7e14SFlorian Hahn     VPValue *Mask1 = getPredicatedMask(Region1);
210*80aa7e14SFlorian Hahn     VPValue *Mask2 = getPredicatedMask(Region2);
211*80aa7e14SFlorian Hahn     if (!Mask1 || Mask1 != Mask2)
212*80aa7e14SFlorian Hahn       continue;
213*80aa7e14SFlorian Hahn     VPBasicBlock *Then1 = getPredicatedThenBlock(Region1);
214*80aa7e14SFlorian Hahn     VPBasicBlock *Then2 = getPredicatedThenBlock(Region2);
215*80aa7e14SFlorian Hahn     if (!Then1 || !Then2)
216*80aa7e14SFlorian Hahn       continue;
217*80aa7e14SFlorian Hahn 
218*80aa7e14SFlorian Hahn     assert(Mask1 && Mask2 && "both region must have conditions");
219*80aa7e14SFlorian Hahn 
220*80aa7e14SFlorian Hahn     // Note: No fusion-preventing memory dependencies are expected in either
221*80aa7e14SFlorian Hahn     // region. Such dependencies should be rejected during earlier dependence
222*80aa7e14SFlorian Hahn     // checks, which guarantee accesses can be re-ordered for vectorization.
223*80aa7e14SFlorian Hahn     //
224*80aa7e14SFlorian Hahn     // If a recipe is used by a first-order recurrence phi, we cannot move it at
225*80aa7e14SFlorian Hahn     // the moment: a recipe R feeding a first order recurrence phi must allow
226*80aa7e14SFlorian Hahn     // for a *vector* shuffle to be inserted immediately after it, and therefore
227*80aa7e14SFlorian Hahn     // if R is *scalarized and predicated* it must appear last in its basic
228*80aa7e14SFlorian Hahn     // block. In addition, other recipes may need to "sink after" R, so best if
229*80aa7e14SFlorian Hahn     // R not be moved at all.
230*80aa7e14SFlorian Hahn     auto IsImmovableRecipe = [](VPRecipeBase &R) {
231*80aa7e14SFlorian Hahn       assert(R.getNumDefinedValues() <= 1 &&
232*80aa7e14SFlorian Hahn              "no multi-defs are expected in predicated blocks");
233*80aa7e14SFlorian Hahn       for (VPUser *U : R.getVPValue()->users()) {
234*80aa7e14SFlorian Hahn         auto *UI = dyn_cast<VPRecipeBase>(U);
235*80aa7e14SFlorian Hahn         if (!UI)
236*80aa7e14SFlorian Hahn           continue;
237*80aa7e14SFlorian Hahn         auto *PhiR = dyn_cast<VPWidenPHIRecipe>(UI);
238*80aa7e14SFlorian Hahn         if (PhiR && !PhiR->getRecurrenceDescriptor())
239*80aa7e14SFlorian Hahn           return true;
240*80aa7e14SFlorian Hahn       }
241*80aa7e14SFlorian Hahn       return false;
242*80aa7e14SFlorian Hahn     };
243*80aa7e14SFlorian Hahn     if (any_of(*Then1, IsImmovableRecipe))
244*80aa7e14SFlorian Hahn       continue;
245*80aa7e14SFlorian Hahn 
246*80aa7e14SFlorian Hahn     // Move recipes to the successor region.
247*80aa7e14SFlorian Hahn     for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1)))
248*80aa7e14SFlorian Hahn       ToMove.moveBefore(*Then2, Then2->getFirstNonPhi());
249*80aa7e14SFlorian Hahn 
250*80aa7e14SFlorian Hahn     auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor());
251*80aa7e14SFlorian Hahn     auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor());
252*80aa7e14SFlorian Hahn 
253*80aa7e14SFlorian Hahn     // Move VPPredInstPHIRecipes from the merge block to the successor region's
254*80aa7e14SFlorian Hahn     // merge block. Update all users inside the successor region to use the
255*80aa7e14SFlorian Hahn     // original values.
256*80aa7e14SFlorian Hahn     for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
257*80aa7e14SFlorian Hahn       VPValue *PredInst1 =
258*80aa7e14SFlorian Hahn           cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
259*80aa7e14SFlorian Hahn       for (VPUser *U : Phi1ToMove.getVPValue()->users()) {
260*80aa7e14SFlorian Hahn         auto *UI = dyn_cast<VPRecipeBase>(U);
261*80aa7e14SFlorian Hahn         if (!UI || UI->getParent() != Then2)
262*80aa7e14SFlorian Hahn           continue;
263*80aa7e14SFlorian Hahn         for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) {
264*80aa7e14SFlorian Hahn           if (Phi1ToMove.getVPValue() != U->getOperand(I))
265*80aa7e14SFlorian Hahn             continue;
266*80aa7e14SFlorian Hahn           U->setOperand(I, PredInst1);
267*80aa7e14SFlorian Hahn         }
268*80aa7e14SFlorian Hahn       }
269*80aa7e14SFlorian Hahn 
270*80aa7e14SFlorian Hahn       Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
271*80aa7e14SFlorian Hahn     }
272*80aa7e14SFlorian Hahn 
273*80aa7e14SFlorian Hahn     // Finally, remove the first region.
274*80aa7e14SFlorian Hahn     for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) {
275*80aa7e14SFlorian Hahn       VPBlockUtils::disconnectBlocks(Pred, Region1);
276*80aa7e14SFlorian Hahn       VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock);
277*80aa7e14SFlorian Hahn     }
278*80aa7e14SFlorian Hahn     VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock);
279*80aa7e14SFlorian Hahn     DeletedRegions.insert(Region1);
280*80aa7e14SFlorian Hahn   }
281*80aa7e14SFlorian Hahn 
282*80aa7e14SFlorian Hahn   for (VPRegionBlock *ToDelete : DeletedRegions)
283*80aa7e14SFlorian Hahn     delete ToDelete;
284*80aa7e14SFlorian Hahn   return Changed;
285*80aa7e14SFlorian Hahn }
286