1*b5893f02SDimitry Andric //===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
2*b5893f02SDimitry Andric //
3*b5893f02SDimitry Andric //                     The LLVM Compiler Infrastructure
4*b5893f02SDimitry Andric //
5*b5893f02SDimitry Andric // This file is distributed under the University of Illinois Open Source
6*b5893f02SDimitry Andric // License. See LICENSE.TXT for details.
7*b5893f02SDimitry Andric //
8*b5893f02SDimitry Andric //===----------------------------------------------------------------------===//
9*b5893f02SDimitry Andric /// This file implements SLP analysis based on VPlan. The analysis is based on
10*b5893f02SDimitry Andric /// the ideas described in
11*b5893f02SDimitry Andric ///
12*b5893f02SDimitry Andric ///   Look-ahead SLP: auto-vectorization in the presence of commutative
13*b5893f02SDimitry Andric ///   operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
14*b5893f02SDimitry Andric ///   Luís F. W. Góes
15*b5893f02SDimitry Andric ///
16*b5893f02SDimitry Andric //===----------------------------------------------------------------------===//
17*b5893f02SDimitry Andric 
18*b5893f02SDimitry Andric #include "VPlan.h"
19*b5893f02SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
20*b5893f02SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
21*b5893f02SDimitry Andric #include "llvm/ADT/SmallVector.h"
22*b5893f02SDimitry Andric #include "llvm/ADT/Twine.h"
23*b5893f02SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
24*b5893f02SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
25*b5893f02SDimitry Andric #include "llvm/IR/BasicBlock.h"
26*b5893f02SDimitry Andric #include "llvm/IR/CFG.h"
27*b5893f02SDimitry Andric #include "llvm/IR/Dominators.h"
28*b5893f02SDimitry Andric #include "llvm/IR/InstrTypes.h"
29*b5893f02SDimitry Andric #include "llvm/IR/Instruction.h"
30*b5893f02SDimitry Andric #include "llvm/IR/Instructions.h"
31*b5893f02SDimitry Andric #include "llvm/IR/Type.h"
32*b5893f02SDimitry Andric #include "llvm/IR/Value.h"
33*b5893f02SDimitry Andric #include "llvm/Support/Casting.h"
34*b5893f02SDimitry Andric #include "llvm/Support/Debug.h"
35*b5893f02SDimitry Andric #include "llvm/Support/ErrorHandling.h"
36*b5893f02SDimitry Andric #include "llvm/Support/GraphWriter.h"
37*b5893f02SDimitry Andric #include "llvm/Support/raw_ostream.h"
38*b5893f02SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
39*b5893f02SDimitry Andric #include <cassert>
40*b5893f02SDimitry Andric #include <iterator>
41*b5893f02SDimitry Andric #include <string>
42*b5893f02SDimitry Andric #include <vector>
43*b5893f02SDimitry Andric 
44*b5893f02SDimitry Andric using namespace llvm;
45*b5893f02SDimitry Andric 
46*b5893f02SDimitry Andric #define DEBUG_TYPE "vplan-slp"
47*b5893f02SDimitry Andric 
48*b5893f02SDimitry Andric // Number of levels to look ahead when re-ordering multi node operands.
49*b5893f02SDimitry Andric static unsigned LookaheadMaxDepth = 5;
50*b5893f02SDimitry Andric 
markFailed()51*b5893f02SDimitry Andric VPInstruction *VPlanSlp::markFailed() {
52*b5893f02SDimitry Andric   // FIXME: Currently this is used to signal we hit instructions we cannot
53*b5893f02SDimitry Andric   //        trivially SLP'ize.
54*b5893f02SDimitry Andric   CompletelySLP = false;
55*b5893f02SDimitry Andric   return nullptr;
56*b5893f02SDimitry Andric }
57*b5893f02SDimitry Andric 
addCombined(ArrayRef<VPValue * > Operands,VPInstruction * New)58*b5893f02SDimitry Andric void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
59*b5893f02SDimitry Andric   if (all_of(Operands, [](VPValue *V) {
60*b5893f02SDimitry Andric         return cast<VPInstruction>(V)->getUnderlyingInstr();
61*b5893f02SDimitry Andric       })) {
62*b5893f02SDimitry Andric     unsigned BundleSize = 0;
63*b5893f02SDimitry Andric     for (VPValue *V : Operands) {
64*b5893f02SDimitry Andric       Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
65*b5893f02SDimitry Andric       assert(!T->isVectorTy() && "Only scalar types supported for now");
66*b5893f02SDimitry Andric       BundleSize += T->getScalarSizeInBits();
67*b5893f02SDimitry Andric     }
68*b5893f02SDimitry Andric     WidestBundleBits = std::max(WidestBundleBits, BundleSize);
69*b5893f02SDimitry Andric   }
70*b5893f02SDimitry Andric 
71*b5893f02SDimitry Andric   auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
72*b5893f02SDimitry Andric   assert(Res.second &&
73*b5893f02SDimitry Andric          "Already created a combined instruction for the operand bundle");
74*b5893f02SDimitry Andric   (void)Res;
75*b5893f02SDimitry Andric }
76*b5893f02SDimitry Andric 
areVectorizable(ArrayRef<VPValue * > Operands) const77*b5893f02SDimitry Andric bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
78*b5893f02SDimitry Andric   // Currently we only support VPInstructions.
79*b5893f02SDimitry Andric   if (!all_of(Operands, [](VPValue *Op) {
80*b5893f02SDimitry Andric         return Op && isa<VPInstruction>(Op) &&
81*b5893f02SDimitry Andric                cast<VPInstruction>(Op)->getUnderlyingInstr();
82*b5893f02SDimitry Andric       })) {
83*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
84*b5893f02SDimitry Andric     return false;
85*b5893f02SDimitry Andric   }
86*b5893f02SDimitry Andric 
87*b5893f02SDimitry Andric   // Check if opcodes and type width agree for all instructions in the bundle.
88*b5893f02SDimitry Andric   // FIXME: Differing widths/opcodes can be handled by inserting additional
89*b5893f02SDimitry Andric   //        instructions.
90*b5893f02SDimitry Andric   // FIXME: Deal with non-primitive types.
91*b5893f02SDimitry Andric   const Instruction *OriginalInstr =
92*b5893f02SDimitry Andric       cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
93*b5893f02SDimitry Andric   unsigned Opcode = OriginalInstr->getOpcode();
94*b5893f02SDimitry Andric   unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
95*b5893f02SDimitry Andric   if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
96*b5893f02SDimitry Andric         const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
97*b5893f02SDimitry Andric         return I->getOpcode() == Opcode &&
98*b5893f02SDimitry Andric                I->getType()->getPrimitiveSizeInBits() == Width;
99*b5893f02SDimitry Andric       })) {
100*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
101*b5893f02SDimitry Andric     return false;
102*b5893f02SDimitry Andric   }
103*b5893f02SDimitry Andric 
104*b5893f02SDimitry Andric   // For now, all operands must be defined in the same BB.
105*b5893f02SDimitry Andric   if (any_of(Operands, [this](VPValue *Op) {
106*b5893f02SDimitry Andric         return cast<VPInstruction>(Op)->getParent() != &this->BB;
107*b5893f02SDimitry Andric       })) {
108*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
109*b5893f02SDimitry Andric     return false;
110*b5893f02SDimitry Andric   }
111*b5893f02SDimitry Andric 
112*b5893f02SDimitry Andric   if (any_of(Operands,
113*b5893f02SDimitry Andric              [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
114*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
115*b5893f02SDimitry Andric     return false;
116*b5893f02SDimitry Andric   }
117*b5893f02SDimitry Andric 
118*b5893f02SDimitry Andric   // For loads, check that there are no instructions writing to memory in
119*b5893f02SDimitry Andric   // between them.
120*b5893f02SDimitry Andric   // TODO: we only have to forbid instructions writing to memory that could
121*b5893f02SDimitry Andric   //       interfere with any of the loads in the bundle
122*b5893f02SDimitry Andric   if (Opcode == Instruction::Load) {
123*b5893f02SDimitry Andric     unsigned LoadsSeen = 0;
124*b5893f02SDimitry Andric     VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
125*b5893f02SDimitry Andric     for (auto &I : *Parent) {
126*b5893f02SDimitry Andric       auto *VPI = cast<VPInstruction>(&I);
127*b5893f02SDimitry Andric       if (VPI->getOpcode() == Instruction::Load &&
128*b5893f02SDimitry Andric           std::find(Operands.begin(), Operands.end(), VPI) != Operands.end())
129*b5893f02SDimitry Andric         LoadsSeen++;
130*b5893f02SDimitry Andric 
131*b5893f02SDimitry Andric       if (LoadsSeen == Operands.size())
132*b5893f02SDimitry Andric         break;
133*b5893f02SDimitry Andric       if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
134*b5893f02SDimitry Andric         LLVM_DEBUG(
135*b5893f02SDimitry Andric             dbgs() << "VPSLP: instruction modifying memory between loads\n");
136*b5893f02SDimitry Andric         return false;
137*b5893f02SDimitry Andric       }
138*b5893f02SDimitry Andric     }
139*b5893f02SDimitry Andric 
140*b5893f02SDimitry Andric     if (!all_of(Operands, [](VPValue *Op) {
141*b5893f02SDimitry Andric           return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
142*b5893f02SDimitry Andric               ->isSimple();
143*b5893f02SDimitry Andric         })) {
144*b5893f02SDimitry Andric       LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
145*b5893f02SDimitry Andric       return false;
146*b5893f02SDimitry Andric     }
147*b5893f02SDimitry Andric   }
148*b5893f02SDimitry Andric 
149*b5893f02SDimitry Andric   if (Opcode == Instruction::Store)
150*b5893f02SDimitry Andric     if (!all_of(Operands, [](VPValue *Op) {
151*b5893f02SDimitry Andric           return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
152*b5893f02SDimitry Andric               ->isSimple();
153*b5893f02SDimitry Andric         })) {
154*b5893f02SDimitry Andric       LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
155*b5893f02SDimitry Andric       return false;
156*b5893f02SDimitry Andric     }
157*b5893f02SDimitry Andric 
158*b5893f02SDimitry Andric   return true;
159*b5893f02SDimitry Andric }
160*b5893f02SDimitry Andric 
getOperands(ArrayRef<VPValue * > Values,unsigned OperandIndex)161*b5893f02SDimitry Andric static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
162*b5893f02SDimitry Andric                                              unsigned OperandIndex) {
163*b5893f02SDimitry Andric   SmallVector<VPValue *, 4> Operands;
164*b5893f02SDimitry Andric   for (VPValue *V : Values) {
165*b5893f02SDimitry Andric     auto *U = cast<VPUser>(V);
166*b5893f02SDimitry Andric     Operands.push_back(U->getOperand(OperandIndex));
167*b5893f02SDimitry Andric   }
168*b5893f02SDimitry Andric   return Operands;
169*b5893f02SDimitry Andric }
170*b5893f02SDimitry Andric 
areCommutative(ArrayRef<VPValue * > Values)171*b5893f02SDimitry Andric static bool areCommutative(ArrayRef<VPValue *> Values) {
172*b5893f02SDimitry Andric   return Instruction::isCommutative(
173*b5893f02SDimitry Andric       cast<VPInstruction>(Values[0])->getOpcode());
174*b5893f02SDimitry Andric }
175*b5893f02SDimitry Andric 
176*b5893f02SDimitry Andric static SmallVector<SmallVector<VPValue *, 4>, 4>
getOperands(ArrayRef<VPValue * > Values)177*b5893f02SDimitry Andric getOperands(ArrayRef<VPValue *> Values) {
178*b5893f02SDimitry Andric   SmallVector<SmallVector<VPValue *, 4>, 4> Result;
179*b5893f02SDimitry Andric   auto *VPI = cast<VPInstruction>(Values[0]);
180*b5893f02SDimitry Andric 
181*b5893f02SDimitry Andric   switch (VPI->getOpcode()) {
182*b5893f02SDimitry Andric   case Instruction::Load:
183*b5893f02SDimitry Andric     llvm_unreachable("Loads terminate a tree, no need to get operands");
184*b5893f02SDimitry Andric   case Instruction::Store:
185*b5893f02SDimitry Andric     Result.push_back(getOperands(Values, 0));
186*b5893f02SDimitry Andric     break;
187*b5893f02SDimitry Andric   default:
188*b5893f02SDimitry Andric     for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
189*b5893f02SDimitry Andric       Result.push_back(getOperands(Values, I));
190*b5893f02SDimitry Andric     break;
191*b5893f02SDimitry Andric   }
192*b5893f02SDimitry Andric 
193*b5893f02SDimitry Andric   return Result;
194*b5893f02SDimitry Andric }
195*b5893f02SDimitry Andric 
196*b5893f02SDimitry Andric /// Returns the opcode of Values or ~0 if they do not all agree.
getOpcode(ArrayRef<VPValue * > Values)197*b5893f02SDimitry Andric static Optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {
198*b5893f02SDimitry Andric   unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
199*b5893f02SDimitry Andric   if (any_of(Values, [Opcode](VPValue *V) {
200*b5893f02SDimitry Andric         return cast<VPInstruction>(V)->getOpcode() != Opcode;
201*b5893f02SDimitry Andric       }))
202*b5893f02SDimitry Andric     return None;
203*b5893f02SDimitry Andric   return {Opcode};
204*b5893f02SDimitry Andric }
205*b5893f02SDimitry Andric 
206*b5893f02SDimitry Andric /// Returns true if A and B access sequential memory if they are loads or
207*b5893f02SDimitry Andric /// stores or if they have identical opcodes otherwise.
areConsecutiveOrMatch(VPInstruction * A,VPInstruction * B,VPInterleavedAccessInfo & IAI)208*b5893f02SDimitry Andric static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
209*b5893f02SDimitry Andric                                   VPInterleavedAccessInfo &IAI) {
210*b5893f02SDimitry Andric   if (A->getOpcode() != B->getOpcode())
211*b5893f02SDimitry Andric     return false;
212*b5893f02SDimitry Andric 
213*b5893f02SDimitry Andric   if (A->getOpcode() != Instruction::Load &&
214*b5893f02SDimitry Andric       A->getOpcode() != Instruction::Store)
215*b5893f02SDimitry Andric     return true;
216*b5893f02SDimitry Andric   auto *GA = IAI.getInterleaveGroup(A);
217*b5893f02SDimitry Andric   auto *GB = IAI.getInterleaveGroup(B);
218*b5893f02SDimitry Andric 
219*b5893f02SDimitry Andric   return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
220*b5893f02SDimitry Andric }
221*b5893f02SDimitry Andric 
222*b5893f02SDimitry Andric /// Implements getLAScore from Listing 7 in the paper.
223*b5893f02SDimitry Andric /// Traverses and compares operands of V1 and V2 to MaxLevel.
getLAScore(VPValue * V1,VPValue * V2,unsigned MaxLevel,VPInterleavedAccessInfo & IAI)224*b5893f02SDimitry Andric static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
225*b5893f02SDimitry Andric                            VPInterleavedAccessInfo &IAI) {
226*b5893f02SDimitry Andric   if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2))
227*b5893f02SDimitry Andric     return 0;
228*b5893f02SDimitry Andric 
229*b5893f02SDimitry Andric   if (MaxLevel == 0)
230*b5893f02SDimitry Andric     return (unsigned)areConsecutiveOrMatch(cast<VPInstruction>(V1),
231*b5893f02SDimitry Andric                                            cast<VPInstruction>(V2), IAI);
232*b5893f02SDimitry Andric 
233*b5893f02SDimitry Andric   unsigned Score = 0;
234*b5893f02SDimitry Andric   for (unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands(); I < EV1; ++I)
235*b5893f02SDimitry Andric     for (unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J < EV2; ++J)
236*b5893f02SDimitry Andric       Score += getLAScore(cast<VPUser>(V1)->getOperand(I),
237*b5893f02SDimitry Andric                           cast<VPUser>(V2)->getOperand(J), MaxLevel - 1, IAI);
238*b5893f02SDimitry Andric   return Score;
239*b5893f02SDimitry Andric }
240*b5893f02SDimitry Andric 
241*b5893f02SDimitry Andric std::pair<VPlanSlp::OpMode, VPValue *>
getBest(OpMode Mode,VPValue * Last,SmallPtrSetImpl<VPValue * > & Candidates,VPInterleavedAccessInfo & IAI)242*b5893f02SDimitry Andric VPlanSlp::getBest(OpMode Mode, VPValue *Last,
243*b5893f02SDimitry Andric                   SmallPtrSetImpl<VPValue *> &Candidates,
244*b5893f02SDimitry Andric                   VPInterleavedAccessInfo &IAI) {
245*b5893f02SDimitry Andric   assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
246*b5893f02SDimitry Andric          "Currently we only handle load and commutative opcodes");
247*b5893f02SDimitry Andric   LLVM_DEBUG(dbgs() << "      getBest\n");
248*b5893f02SDimitry Andric 
249*b5893f02SDimitry Andric   SmallVector<VPValue *, 4> BestCandidates;
250*b5893f02SDimitry Andric   LLVM_DEBUG(dbgs() << "        Candidates  for "
251*b5893f02SDimitry Andric                     << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
252*b5893f02SDimitry Andric   for (auto *Candidate : Candidates) {
253*b5893f02SDimitry Andric     auto *LastI = cast<VPInstruction>(Last);
254*b5893f02SDimitry Andric     auto *CandidateI = cast<VPInstruction>(Candidate);
255*b5893f02SDimitry Andric     if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
256*b5893f02SDimitry Andric       LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
257*b5893f02SDimitry Andric                         << " ");
258*b5893f02SDimitry Andric       BestCandidates.push_back(Candidate);
259*b5893f02SDimitry Andric     }
260*b5893f02SDimitry Andric   }
261*b5893f02SDimitry Andric   LLVM_DEBUG(dbgs() << "\n");
262*b5893f02SDimitry Andric 
263*b5893f02SDimitry Andric   if (BestCandidates.empty())
264*b5893f02SDimitry Andric     return {OpMode::Failed, nullptr};
265*b5893f02SDimitry Andric 
266*b5893f02SDimitry Andric   if (BestCandidates.size() == 1)
267*b5893f02SDimitry Andric     return {Mode, BestCandidates[0]};
268*b5893f02SDimitry Andric 
269*b5893f02SDimitry Andric   VPValue *Best = nullptr;
270*b5893f02SDimitry Andric   unsigned BestScore = 0;
271*b5893f02SDimitry Andric   for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
272*b5893f02SDimitry Andric     unsigned PrevScore = ~0u;
273*b5893f02SDimitry Andric     bool AllSame = true;
274*b5893f02SDimitry Andric 
275*b5893f02SDimitry Andric     // FIXME: Avoid visiting the same operands multiple times.
276*b5893f02SDimitry Andric     for (auto *Candidate : BestCandidates) {
277*b5893f02SDimitry Andric       unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
278*b5893f02SDimitry Andric       if (PrevScore == ~0u)
279*b5893f02SDimitry Andric         PrevScore = Score;
280*b5893f02SDimitry Andric       if (PrevScore != Score)
281*b5893f02SDimitry Andric         AllSame = false;
282*b5893f02SDimitry Andric       PrevScore = Score;
283*b5893f02SDimitry Andric 
284*b5893f02SDimitry Andric       if (Score > BestScore) {
285*b5893f02SDimitry Andric         BestScore = Score;
286*b5893f02SDimitry Andric         Best = Candidate;
287*b5893f02SDimitry Andric       }
288*b5893f02SDimitry Andric     }
289*b5893f02SDimitry Andric     if (!AllSame)
290*b5893f02SDimitry Andric       break;
291*b5893f02SDimitry Andric   }
292*b5893f02SDimitry Andric   LLVM_DEBUG(dbgs() << "Found best "
293*b5893f02SDimitry Andric                     << *cast<VPInstruction>(Best)->getUnderlyingInstr()
294*b5893f02SDimitry Andric                     << "\n");
295*b5893f02SDimitry Andric   Candidates.erase(Best);
296*b5893f02SDimitry Andric 
297*b5893f02SDimitry Andric   return {Mode, Best};
298*b5893f02SDimitry Andric }
299*b5893f02SDimitry Andric 
reorderMultiNodeOps()300*b5893f02SDimitry Andric SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
301*b5893f02SDimitry Andric   SmallVector<MultiNodeOpTy, 4> FinalOrder;
302*b5893f02SDimitry Andric   SmallVector<OpMode, 4> Mode;
303*b5893f02SDimitry Andric   FinalOrder.reserve(MultiNodeOps.size());
304*b5893f02SDimitry Andric   Mode.reserve(MultiNodeOps.size());
305*b5893f02SDimitry Andric 
306*b5893f02SDimitry Andric   LLVM_DEBUG(dbgs() << "Reordering multinode\n");
307*b5893f02SDimitry Andric 
308*b5893f02SDimitry Andric   for (auto &Operands : MultiNodeOps) {
309*b5893f02SDimitry Andric     FinalOrder.push_back({Operands.first, {Operands.second[0]}});
310*b5893f02SDimitry Andric     if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
311*b5893f02SDimitry Andric         Instruction::Load)
312*b5893f02SDimitry Andric       Mode.push_back(OpMode::Load);
313*b5893f02SDimitry Andric     else
314*b5893f02SDimitry Andric       Mode.push_back(OpMode::Opcode);
315*b5893f02SDimitry Andric   }
316*b5893f02SDimitry Andric 
317*b5893f02SDimitry Andric   for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
318*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "  Finding best value for lane " << Lane << "\n");
319*b5893f02SDimitry Andric     SmallPtrSet<VPValue *, 4> Candidates;
320*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "  Candidates  ");
321*b5893f02SDimitry Andric     for (auto Ops : MultiNodeOps) {
322*b5893f02SDimitry Andric       LLVM_DEBUG(
323*b5893f02SDimitry Andric           dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
324*b5893f02SDimitry Andric                  << " ");
325*b5893f02SDimitry Andric       Candidates.insert(Ops.second[Lane]);
326*b5893f02SDimitry Andric     }
327*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "\n");
328*b5893f02SDimitry Andric 
329*b5893f02SDimitry Andric     for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
330*b5893f02SDimitry Andric       LLVM_DEBUG(dbgs() << "  Checking " << Op << "\n");
331*b5893f02SDimitry Andric       if (Mode[Op] == OpMode::Failed)
332*b5893f02SDimitry Andric         continue;
333*b5893f02SDimitry Andric 
334*b5893f02SDimitry Andric       VPValue *Last = FinalOrder[Op].second[Lane - 1];
335*b5893f02SDimitry Andric       std::pair<OpMode, VPValue *> Res =
336*b5893f02SDimitry Andric           getBest(Mode[Op], Last, Candidates, IAI);
337*b5893f02SDimitry Andric       if (Res.second)
338*b5893f02SDimitry Andric         FinalOrder[Op].second.push_back(Res.second);
339*b5893f02SDimitry Andric       else
340*b5893f02SDimitry Andric         // TODO: handle this case
341*b5893f02SDimitry Andric         FinalOrder[Op].second.push_back(markFailed());
342*b5893f02SDimitry Andric     }
343*b5893f02SDimitry Andric   }
344*b5893f02SDimitry Andric 
345*b5893f02SDimitry Andric   return FinalOrder;
346*b5893f02SDimitry Andric }
347*b5893f02SDimitry Andric 
dumpBundle(ArrayRef<VPValue * > Values)348*b5893f02SDimitry Andric void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
349*b5893f02SDimitry Andric   dbgs() << " Ops: ";
350*b5893f02SDimitry Andric   for (auto Op : Values)
351*b5893f02SDimitry Andric     if (auto *Instr = cast_or_null<VPInstruction>(Op)->getUnderlyingInstr())
352*b5893f02SDimitry Andric       dbgs() << *Instr << " | ";
353*b5893f02SDimitry Andric     else
354*b5893f02SDimitry Andric       dbgs() << " nullptr | ";
355*b5893f02SDimitry Andric   dbgs() << "\n";
356*b5893f02SDimitry Andric }
357*b5893f02SDimitry Andric 
buildGraph(ArrayRef<VPValue * > Values)358*b5893f02SDimitry Andric VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
359*b5893f02SDimitry Andric   assert(!Values.empty() && "Need some operands!");
360*b5893f02SDimitry Andric 
361*b5893f02SDimitry Andric   // If we already visited this instruction bundle, re-use the existing node
362*b5893f02SDimitry Andric   auto I = BundleToCombined.find(to_vector<4>(Values));
363*b5893f02SDimitry Andric   if (I != BundleToCombined.end()) {
364*b5893f02SDimitry Andric #ifndef NDEBUG
365*b5893f02SDimitry Andric     // Check that the resulting graph is a tree. If we re-use a node, this means
366*b5893f02SDimitry Andric     // its values have multiple users. We only allow this, if all users of each
367*b5893f02SDimitry Andric     // value are the same instruction.
368*b5893f02SDimitry Andric     for (auto *V : Values) {
369*b5893f02SDimitry Andric       auto UI = V->user_begin();
370*b5893f02SDimitry Andric       auto *FirstUser = *UI++;
371*b5893f02SDimitry Andric       while (UI != V->user_end()) {
372*b5893f02SDimitry Andric         assert(*UI == FirstUser && "Currently we only support SLP trees.");
373*b5893f02SDimitry Andric         UI++;
374*b5893f02SDimitry Andric       }
375*b5893f02SDimitry Andric     }
376*b5893f02SDimitry Andric #endif
377*b5893f02SDimitry Andric     return I->second;
378*b5893f02SDimitry Andric   }
379*b5893f02SDimitry Andric 
380*b5893f02SDimitry Andric   // Dump inputs
381*b5893f02SDimitry Andric   LLVM_DEBUG({
382*b5893f02SDimitry Andric     dbgs() << "buildGraph: ";
383*b5893f02SDimitry Andric     dumpBundle(Values);
384*b5893f02SDimitry Andric   });
385*b5893f02SDimitry Andric 
386*b5893f02SDimitry Andric   if (!areVectorizable(Values))
387*b5893f02SDimitry Andric     return markFailed();
388*b5893f02SDimitry Andric 
389*b5893f02SDimitry Andric   assert(getOpcode(Values) && "Opcodes for all values must match");
390*b5893f02SDimitry Andric   unsigned ValuesOpcode = getOpcode(Values).getValue();
391*b5893f02SDimitry Andric 
392*b5893f02SDimitry Andric   SmallVector<VPValue *, 4> CombinedOperands;
393*b5893f02SDimitry Andric   if (areCommutative(Values)) {
394*b5893f02SDimitry Andric     bool MultiNodeRoot = !MultiNodeActive;
395*b5893f02SDimitry Andric     MultiNodeActive = true;
396*b5893f02SDimitry Andric     for (auto &Operands : getOperands(Values)) {
397*b5893f02SDimitry Andric       LLVM_DEBUG({
398*b5893f02SDimitry Andric         dbgs() << "  Visiting Commutative";
399*b5893f02SDimitry Andric         dumpBundle(Operands);
400*b5893f02SDimitry Andric       });
401*b5893f02SDimitry Andric 
402*b5893f02SDimitry Andric       auto OperandsOpcode = getOpcode(Operands);
403*b5893f02SDimitry Andric       if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
404*b5893f02SDimitry Andric         LLVM_DEBUG(dbgs() << "    Same opcode, continue building\n");
405*b5893f02SDimitry Andric         CombinedOperands.push_back(buildGraph(Operands));
406*b5893f02SDimitry Andric       } else {
407*b5893f02SDimitry Andric         LLVM_DEBUG(dbgs() << "    Adding multinode Ops\n");
408*b5893f02SDimitry Andric         // Create dummy VPInstruction, which will we replace later by the
409*b5893f02SDimitry Andric         // re-ordered operand.
410*b5893f02SDimitry Andric         VPInstruction *Op = new VPInstruction(0, {});
411*b5893f02SDimitry Andric         CombinedOperands.push_back(Op);
412*b5893f02SDimitry Andric         MultiNodeOps.emplace_back(Op, Operands);
413*b5893f02SDimitry Andric       }
414*b5893f02SDimitry Andric     }
415*b5893f02SDimitry Andric 
416*b5893f02SDimitry Andric     if (MultiNodeRoot) {
417*b5893f02SDimitry Andric       LLVM_DEBUG(dbgs() << "Reorder \n");
418*b5893f02SDimitry Andric       MultiNodeActive = false;
419*b5893f02SDimitry Andric 
420*b5893f02SDimitry Andric       auto FinalOrder = reorderMultiNodeOps();
421*b5893f02SDimitry Andric 
422*b5893f02SDimitry Andric       MultiNodeOps.clear();
423*b5893f02SDimitry Andric       for (auto &Ops : FinalOrder) {
424*b5893f02SDimitry Andric         VPInstruction *NewOp = buildGraph(Ops.second);
425*b5893f02SDimitry Andric         Ops.first->replaceAllUsesWith(NewOp);
426*b5893f02SDimitry Andric         for (unsigned i = 0; i < CombinedOperands.size(); i++)
427*b5893f02SDimitry Andric           if (CombinedOperands[i] == Ops.first)
428*b5893f02SDimitry Andric             CombinedOperands[i] = NewOp;
429*b5893f02SDimitry Andric         delete Ops.first;
430*b5893f02SDimitry Andric         Ops.first = NewOp;
431*b5893f02SDimitry Andric       }
432*b5893f02SDimitry Andric       LLVM_DEBUG(dbgs() << "Found final order\n");
433*b5893f02SDimitry Andric     }
434*b5893f02SDimitry Andric   } else {
435*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "  NonCommuntative\n");
436*b5893f02SDimitry Andric     if (ValuesOpcode == Instruction::Load)
437*b5893f02SDimitry Andric       for (VPValue *V : Values)
438*b5893f02SDimitry Andric         CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
439*b5893f02SDimitry Andric     else
440*b5893f02SDimitry Andric       for (auto &Operands : getOperands(Values))
441*b5893f02SDimitry Andric         CombinedOperands.push_back(buildGraph(Operands));
442*b5893f02SDimitry Andric   }
443*b5893f02SDimitry Andric 
444*b5893f02SDimitry Andric   unsigned Opcode;
445*b5893f02SDimitry Andric   switch (ValuesOpcode) {
446*b5893f02SDimitry Andric   case Instruction::Load:
447*b5893f02SDimitry Andric     Opcode = VPInstruction::SLPLoad;
448*b5893f02SDimitry Andric     break;
449*b5893f02SDimitry Andric   case Instruction::Store:
450*b5893f02SDimitry Andric     Opcode = VPInstruction::SLPStore;
451*b5893f02SDimitry Andric     break;
452*b5893f02SDimitry Andric   default:
453*b5893f02SDimitry Andric     Opcode = ValuesOpcode;
454*b5893f02SDimitry Andric     break;
455*b5893f02SDimitry Andric   }
456*b5893f02SDimitry Andric 
457*b5893f02SDimitry Andric   if (!CompletelySLP)
458*b5893f02SDimitry Andric     return markFailed();
459*b5893f02SDimitry Andric 
460*b5893f02SDimitry Andric   assert(CombinedOperands.size() > 0 && "Need more some operands");
461*b5893f02SDimitry Andric   auto *VPI = new VPInstruction(Opcode, CombinedOperands);
462*b5893f02SDimitry Andric   VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());
463*b5893f02SDimitry Andric 
464*b5893f02SDimitry Andric   LLVM_DEBUG(dbgs() << "Create VPInstruction "; VPI->print(dbgs());
465*b5893f02SDimitry Andric              cast<VPInstruction>(Values[0])->print(dbgs()); dbgs() << "\n");
466*b5893f02SDimitry Andric   addCombined(Values, VPI);
467*b5893f02SDimitry Andric   return VPI;
468*b5893f02SDimitry Andric }
469