109e516c5SFlorian Hahn //===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
209e516c5SFlorian Hahn //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609e516c5SFlorian Hahn //
709e516c5SFlorian Hahn //===----------------------------------------------------------------------===//
809e516c5SFlorian Hahn /// This file implements SLP analysis based on VPlan. The analysis is based on
909e516c5SFlorian Hahn /// the ideas described in
1009e516c5SFlorian Hahn ///
1109e516c5SFlorian Hahn ///   Look-ahead SLP: auto-vectorization in the presence of commutative
1209e516c5SFlorian Hahn ///   operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
1309e516c5SFlorian Hahn ///   Luís F. W. Góes
1409e516c5SFlorian Hahn ///
1509e516c5SFlorian Hahn //===----------------------------------------------------------------------===//
1609e516c5SFlorian Hahn 
1709e516c5SFlorian Hahn #include "VPlan.h"
181b89c832Sserge-sans-paille #include "VPlanValue.h"
191b89c832Sserge-sans-paille #include "llvm/ADT/DenseMap.h"
2009e516c5SFlorian Hahn #include "llvm/ADT/SmallVector.h"
2109e516c5SFlorian Hahn #include "llvm/Analysis/VectorUtils.h"
2209e516c5SFlorian Hahn #include "llvm/IR/Instruction.h"
2309e516c5SFlorian Hahn #include "llvm/IR/Instructions.h"
2409e516c5SFlorian Hahn #include "llvm/IR/Type.h"
2509e516c5SFlorian Hahn #include "llvm/IR/Value.h"
2609e516c5SFlorian Hahn #include "llvm/Support/Casting.h"
2709e516c5SFlorian Hahn #include "llvm/Support/Debug.h"
2809e516c5SFlorian Hahn #include "llvm/Support/ErrorHandling.h"
2909e516c5SFlorian Hahn #include "llvm/Support/raw_ostream.h"
30b013c58eSSimon Pilgrim #include <algorithm>
3109e516c5SFlorian Hahn #include <cassert>
32b013c58eSSimon Pilgrim #include <utility>
3309e516c5SFlorian Hahn 
3409e516c5SFlorian Hahn using namespace llvm;
3509e516c5SFlorian Hahn 
3609e516c5SFlorian Hahn #define DEBUG_TYPE "vplan-slp"
3709e516c5SFlorian Hahn 
3809e516c5SFlorian Hahn // Number of levels to look ahead when re-ordering multi node operands.
3909e516c5SFlorian Hahn static unsigned LookaheadMaxDepth = 5;
4009e516c5SFlorian Hahn 
markFailed()4109e516c5SFlorian Hahn VPInstruction *VPlanSlp::markFailed() {
4209e516c5SFlorian Hahn   // FIXME: Currently this is used to signal we hit instructions we cannot
4309e516c5SFlorian Hahn   //        trivially SLP'ize.
4409e516c5SFlorian Hahn   CompletelySLP = false;
4509e516c5SFlorian Hahn   return nullptr;
4609e516c5SFlorian Hahn }
4709e516c5SFlorian Hahn 
addCombined(ArrayRef<VPValue * > Operands,VPInstruction * New)4809e516c5SFlorian Hahn void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
4909e516c5SFlorian Hahn   if (all_of(Operands, [](VPValue *V) {
5009e516c5SFlorian Hahn         return cast<VPInstruction>(V)->getUnderlyingInstr();
5109e516c5SFlorian Hahn       })) {
5209e516c5SFlorian Hahn     unsigned BundleSize = 0;
5309e516c5SFlorian Hahn     for (VPValue *V : Operands) {
5409e516c5SFlorian Hahn       Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
5509e516c5SFlorian Hahn       assert(!T->isVectorTy() && "Only scalar types supported for now");
5609e516c5SFlorian Hahn       BundleSize += T->getScalarSizeInBits();
5709e516c5SFlorian Hahn     }
5809e516c5SFlorian Hahn     WidestBundleBits = std::max(WidestBundleBits, BundleSize);
5909e516c5SFlorian Hahn   }
6009e516c5SFlorian Hahn 
6109e516c5SFlorian Hahn   auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
6209e516c5SFlorian Hahn   assert(Res.second &&
6309e516c5SFlorian Hahn          "Already created a combined instruction for the operand bundle");
6409e516c5SFlorian Hahn   (void)Res;
6509e516c5SFlorian Hahn }
6609e516c5SFlorian Hahn 
areVectorizable(ArrayRef<VPValue * > Operands) const6709e516c5SFlorian Hahn bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
6809e516c5SFlorian Hahn   // Currently we only support VPInstructions.
6909e516c5SFlorian Hahn   if (!all_of(Operands, [](VPValue *Op) {
7009e516c5SFlorian Hahn         return Op && isa<VPInstruction>(Op) &&
7109e516c5SFlorian Hahn                cast<VPInstruction>(Op)->getUnderlyingInstr();
7209e516c5SFlorian Hahn       })) {
7309e516c5SFlorian Hahn     LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
7409e516c5SFlorian Hahn     return false;
7509e516c5SFlorian Hahn   }
7609e516c5SFlorian Hahn 
7709e516c5SFlorian Hahn   // Check if opcodes and type width agree for all instructions in the bundle.
7809e516c5SFlorian Hahn   // FIXME: Differing widths/opcodes can be handled by inserting additional
7909e516c5SFlorian Hahn   //        instructions.
8009e516c5SFlorian Hahn   // FIXME: Deal with non-primitive types.
8109e516c5SFlorian Hahn   const Instruction *OriginalInstr =
8209e516c5SFlorian Hahn       cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
8309e516c5SFlorian Hahn   unsigned Opcode = OriginalInstr->getOpcode();
8409e516c5SFlorian Hahn   unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
8509e516c5SFlorian Hahn   if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
8609e516c5SFlorian Hahn         const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
8709e516c5SFlorian Hahn         return I->getOpcode() == Opcode &&
8809e516c5SFlorian Hahn                I->getType()->getPrimitiveSizeInBits() == Width;
8909e516c5SFlorian Hahn       })) {
9009e516c5SFlorian Hahn     LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
9109e516c5SFlorian Hahn     return false;
9209e516c5SFlorian Hahn   }
9309e516c5SFlorian Hahn 
9409e516c5SFlorian Hahn   // For now, all operands must be defined in the same BB.
9509e516c5SFlorian Hahn   if (any_of(Operands, [this](VPValue *Op) {
9609e516c5SFlorian Hahn         return cast<VPInstruction>(Op)->getParent() != &this->BB;
9709e516c5SFlorian Hahn       })) {
9809e516c5SFlorian Hahn     LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
9909e516c5SFlorian Hahn     return false;
10009e516c5SFlorian Hahn   }
10109e516c5SFlorian Hahn 
10209e516c5SFlorian Hahn   if (any_of(Operands,
10309e516c5SFlorian Hahn              [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
10409e516c5SFlorian Hahn     LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
10509e516c5SFlorian Hahn     return false;
10609e516c5SFlorian Hahn   }
10709e516c5SFlorian Hahn 
10809e516c5SFlorian Hahn   // For loads, check that there are no instructions writing to memory in
10909e516c5SFlorian Hahn   // between them.
11009e516c5SFlorian Hahn   // TODO: we only have to forbid instructions writing to memory that could
11109e516c5SFlorian Hahn   //       interfere with any of the loads in the bundle
11209e516c5SFlorian Hahn   if (Opcode == Instruction::Load) {
11309e516c5SFlorian Hahn     unsigned LoadsSeen = 0;
11409e516c5SFlorian Hahn     VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
11509e516c5SFlorian Hahn     for (auto &I : *Parent) {
116c11fd0dfSFlorian Hahn       auto *VPI = dyn_cast<VPInstruction>(&I);
117c11fd0dfSFlorian Hahn       if (!VPI)
118c11fd0dfSFlorian Hahn         break;
11909e516c5SFlorian Hahn       if (VPI->getOpcode() == Instruction::Load &&
120902cbcd5SKazu Hirata           llvm::is_contained(Operands, VPI))
12109e516c5SFlorian Hahn         LoadsSeen++;
12209e516c5SFlorian Hahn 
12309e516c5SFlorian Hahn       if (LoadsSeen == Operands.size())
12409e516c5SFlorian Hahn         break;
12509e516c5SFlorian Hahn       if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
12609e516c5SFlorian Hahn         LLVM_DEBUG(
12709e516c5SFlorian Hahn             dbgs() << "VPSLP: instruction modifying memory between loads\n");
12809e516c5SFlorian Hahn         return false;
12909e516c5SFlorian Hahn       }
13009e516c5SFlorian Hahn     }
13109e516c5SFlorian Hahn 
13209e516c5SFlorian Hahn     if (!all_of(Operands, [](VPValue *Op) {
13309e516c5SFlorian Hahn           return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
13409e516c5SFlorian Hahn               ->isSimple();
13509e516c5SFlorian Hahn         })) {
13609e516c5SFlorian Hahn       LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
13709e516c5SFlorian Hahn       return false;
13809e516c5SFlorian Hahn     }
13909e516c5SFlorian Hahn   }
14009e516c5SFlorian Hahn 
14109e516c5SFlorian Hahn   if (Opcode == Instruction::Store)
14209e516c5SFlorian Hahn     if (!all_of(Operands, [](VPValue *Op) {
14309e516c5SFlorian Hahn           return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
14409e516c5SFlorian Hahn               ->isSimple();
14509e516c5SFlorian Hahn         })) {
14609e516c5SFlorian Hahn       LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
14709e516c5SFlorian Hahn       return false;
14809e516c5SFlorian Hahn     }
14909e516c5SFlorian Hahn 
15009e516c5SFlorian Hahn   return true;
15109e516c5SFlorian Hahn }
15209e516c5SFlorian Hahn 
getOperands(ArrayRef<VPValue * > Values,unsigned OperandIndex)15309e516c5SFlorian Hahn static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
15409e516c5SFlorian Hahn                                              unsigned OperandIndex) {
15509e516c5SFlorian Hahn   SmallVector<VPValue *, 4> Operands;
15609e516c5SFlorian Hahn   for (VPValue *V : Values) {
15731923f6bSFlorian Hahn     // Currently we only support VPInstructions.
15831923f6bSFlorian Hahn     auto *U = cast<VPInstruction>(V);
15909e516c5SFlorian Hahn     Operands.push_back(U->getOperand(OperandIndex));
16009e516c5SFlorian Hahn   }
16109e516c5SFlorian Hahn   return Operands;
16209e516c5SFlorian Hahn }
16309e516c5SFlorian Hahn 
areCommutative(ArrayRef<VPValue * > Values)16409e516c5SFlorian Hahn static bool areCommutative(ArrayRef<VPValue *> Values) {
16509e516c5SFlorian Hahn   return Instruction::isCommutative(
16609e516c5SFlorian Hahn       cast<VPInstruction>(Values[0])->getOpcode());
16709e516c5SFlorian Hahn }
16809e516c5SFlorian Hahn 
16909e516c5SFlorian Hahn static SmallVector<SmallVector<VPValue *, 4>, 4>
getOperands(ArrayRef<VPValue * > Values)17009e516c5SFlorian Hahn getOperands(ArrayRef<VPValue *> Values) {
17109e516c5SFlorian Hahn   SmallVector<SmallVector<VPValue *, 4>, 4> Result;
17209e516c5SFlorian Hahn   auto *VPI = cast<VPInstruction>(Values[0]);
17309e516c5SFlorian Hahn 
17409e516c5SFlorian Hahn   switch (VPI->getOpcode()) {
17509e516c5SFlorian Hahn   case Instruction::Load:
17609e516c5SFlorian Hahn     llvm_unreachable("Loads terminate a tree, no need to get operands");
17709e516c5SFlorian Hahn   case Instruction::Store:
17809e516c5SFlorian Hahn     Result.push_back(getOperands(Values, 0));
17909e516c5SFlorian Hahn     break;
18009e516c5SFlorian Hahn   default:
18109e516c5SFlorian Hahn     for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
18209e516c5SFlorian Hahn       Result.push_back(getOperands(Values, I));
18309e516c5SFlorian Hahn     break;
18409e516c5SFlorian Hahn   }
18509e516c5SFlorian Hahn 
18609e516c5SFlorian Hahn   return Result;
18709e516c5SFlorian Hahn }
18809e516c5SFlorian Hahn 
18909e516c5SFlorian Hahn /// Returns the opcode of Values or ~0 if they do not all agree.
getOpcode(ArrayRef<VPValue * > Values)19009e516c5SFlorian Hahn static Optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {
19109e516c5SFlorian Hahn   unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
19209e516c5SFlorian Hahn   if (any_of(Values, [Opcode](VPValue *V) {
19309e516c5SFlorian Hahn         return cast<VPInstruction>(V)->getOpcode() != Opcode;
19409e516c5SFlorian Hahn       }))
19509e516c5SFlorian Hahn     return None;
19609e516c5SFlorian Hahn   return {Opcode};
19709e516c5SFlorian Hahn }
19809e516c5SFlorian Hahn 
19909e516c5SFlorian Hahn /// Returns true if A and B access sequential memory if they are loads or
20009e516c5SFlorian Hahn /// stores or if they have identical opcodes otherwise.
areConsecutiveOrMatch(VPInstruction * A,VPInstruction * B,VPInterleavedAccessInfo & IAI)20109e516c5SFlorian Hahn static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
20209e516c5SFlorian Hahn                                   VPInterleavedAccessInfo &IAI) {
20309e516c5SFlorian Hahn   if (A->getOpcode() != B->getOpcode())
20409e516c5SFlorian Hahn     return false;
20509e516c5SFlorian Hahn 
20609e516c5SFlorian Hahn   if (A->getOpcode() != Instruction::Load &&
20709e516c5SFlorian Hahn       A->getOpcode() != Instruction::Store)
20809e516c5SFlorian Hahn     return true;
20909e516c5SFlorian Hahn   auto *GA = IAI.getInterleaveGroup(A);
21009e516c5SFlorian Hahn   auto *GB = IAI.getInterleaveGroup(B);
21109e516c5SFlorian Hahn 
21209e516c5SFlorian Hahn   return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
21309e516c5SFlorian Hahn }
21409e516c5SFlorian Hahn 
21509e516c5SFlorian Hahn /// Implements getLAScore from Listing 7 in the paper.
21609e516c5SFlorian Hahn /// Traverses and compares operands of V1 and V2 to MaxLevel.
getLAScore(VPValue * V1,VPValue * V2,unsigned MaxLevel,VPInterleavedAccessInfo & IAI)21709e516c5SFlorian Hahn static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
21809e516c5SFlorian Hahn                            VPInterleavedAccessInfo &IAI) {
21931923f6bSFlorian Hahn   auto *I1 = dyn_cast<VPInstruction>(V1);
22031923f6bSFlorian Hahn   auto *I2 = dyn_cast<VPInstruction>(V2);
22131923f6bSFlorian Hahn   // Currently we only support VPInstructions.
22231923f6bSFlorian Hahn   if (!I1 || !I2)
22309e516c5SFlorian Hahn     return 0;
22409e516c5SFlorian Hahn 
22509e516c5SFlorian Hahn   if (MaxLevel == 0)
22631923f6bSFlorian Hahn     return (unsigned)areConsecutiveOrMatch(I1, I2, IAI);
22709e516c5SFlorian Hahn 
22809e516c5SFlorian Hahn   unsigned Score = 0;
22931923f6bSFlorian Hahn   for (unsigned I = 0, EV1 = I1->getNumOperands(); I < EV1; ++I)
23031923f6bSFlorian Hahn     for (unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)
23131923f6bSFlorian Hahn       Score +=
23231923f6bSFlorian Hahn           getLAScore(I1->getOperand(I), I2->getOperand(J), MaxLevel - 1, IAI);
23309e516c5SFlorian Hahn   return Score;
23409e516c5SFlorian Hahn }
23509e516c5SFlorian Hahn 
23609e516c5SFlorian Hahn std::pair<VPlanSlp::OpMode, VPValue *>
getBest(OpMode Mode,VPValue * Last,SmallPtrSetImpl<VPValue * > & Candidates,VPInterleavedAccessInfo & IAI)23709e516c5SFlorian Hahn VPlanSlp::getBest(OpMode Mode, VPValue *Last,
2386df11868SFlorian Hahn                   SmallPtrSetImpl<VPValue *> &Candidates,
23909e516c5SFlorian Hahn                   VPInterleavedAccessInfo &IAI) {
2406df11868SFlorian Hahn   assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
2416df11868SFlorian Hahn          "Currently we only handle load and commutative opcodes");
24209e516c5SFlorian Hahn   LLVM_DEBUG(dbgs() << "      getBest\n");
24309e516c5SFlorian Hahn 
2446df11868SFlorian Hahn   SmallVector<VPValue *, 4> BestCandidates;
24509e516c5SFlorian Hahn   LLVM_DEBUG(dbgs() << "        Candidates  for "
24609e516c5SFlorian Hahn                     << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
24709e516c5SFlorian Hahn   for (auto *Candidate : Candidates) {
24809e516c5SFlorian Hahn     auto *LastI = cast<VPInstruction>(Last);
24909e516c5SFlorian Hahn     auto *CandidateI = cast<VPInstruction>(Candidate);
25009e516c5SFlorian Hahn     if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
25109e516c5SFlorian Hahn       LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
25209e516c5SFlorian Hahn                         << " ");
25309e516c5SFlorian Hahn       BestCandidates.push_back(Candidate);
25409e516c5SFlorian Hahn     }
25509e516c5SFlorian Hahn   }
25609e516c5SFlorian Hahn   LLVM_DEBUG(dbgs() << "\n");
25709e516c5SFlorian Hahn 
25809e516c5SFlorian Hahn   if (BestCandidates.empty())
25909e516c5SFlorian Hahn     return {OpMode::Failed, nullptr};
26009e516c5SFlorian Hahn 
26109e516c5SFlorian Hahn   if (BestCandidates.size() == 1)
26209e516c5SFlorian Hahn     return {Mode, BestCandidates[0]};
26309e516c5SFlorian Hahn 
2646df11868SFlorian Hahn   VPValue *Best = nullptr;
26509e516c5SFlorian Hahn   unsigned BestScore = 0;
26609e516c5SFlorian Hahn   for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
26709e516c5SFlorian Hahn     unsigned PrevScore = ~0u;
26809e516c5SFlorian Hahn     bool AllSame = true;
26909e516c5SFlorian Hahn 
27009e516c5SFlorian Hahn     // FIXME: Avoid visiting the same operands multiple times.
27109e516c5SFlorian Hahn     for (auto *Candidate : BestCandidates) {
27209e516c5SFlorian Hahn       unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
27309e516c5SFlorian Hahn       if (PrevScore == ~0u)
27409e516c5SFlorian Hahn         PrevScore = Score;
27509e516c5SFlorian Hahn       if (PrevScore != Score)
27609e516c5SFlorian Hahn         AllSame = false;
27709e516c5SFlorian Hahn       PrevScore = Score;
27809e516c5SFlorian Hahn 
27909e516c5SFlorian Hahn       if (Score > BestScore) {
28009e516c5SFlorian Hahn         BestScore = Score;
28109e516c5SFlorian Hahn         Best = Candidate;
28209e516c5SFlorian Hahn       }
28309e516c5SFlorian Hahn     }
28409e516c5SFlorian Hahn     if (!AllSame)
28509e516c5SFlorian Hahn       break;
28609e516c5SFlorian Hahn   }
28709e516c5SFlorian Hahn   LLVM_DEBUG(dbgs() << "Found best "
28809e516c5SFlorian Hahn                     << *cast<VPInstruction>(Best)->getUnderlyingInstr()
28909e516c5SFlorian Hahn                     << "\n");
2906df11868SFlorian Hahn   Candidates.erase(Best);
29109e516c5SFlorian Hahn 
29209e516c5SFlorian Hahn   return {Mode, Best};
29309e516c5SFlorian Hahn }
29409e516c5SFlorian Hahn 
reorderMultiNodeOps()29509e516c5SFlorian Hahn SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
29609e516c5SFlorian Hahn   SmallVector<MultiNodeOpTy, 4> FinalOrder;
29709e516c5SFlorian Hahn   SmallVector<OpMode, 4> Mode;
29809e516c5SFlorian Hahn   FinalOrder.reserve(MultiNodeOps.size());
29909e516c5SFlorian Hahn   Mode.reserve(MultiNodeOps.size());
30009e516c5SFlorian Hahn 
30109e516c5SFlorian Hahn   LLVM_DEBUG(dbgs() << "Reordering multinode\n");
30209e516c5SFlorian Hahn 
30309e516c5SFlorian Hahn   for (auto &Operands : MultiNodeOps) {
30409e516c5SFlorian Hahn     FinalOrder.push_back({Operands.first, {Operands.second[0]}});
30509e516c5SFlorian Hahn     if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
30609e516c5SFlorian Hahn         Instruction::Load)
30709e516c5SFlorian Hahn       Mode.push_back(OpMode::Load);
30809e516c5SFlorian Hahn     else
30909e516c5SFlorian Hahn       Mode.push_back(OpMode::Opcode);
31009e516c5SFlorian Hahn   }
31109e516c5SFlorian Hahn 
31209e516c5SFlorian Hahn   for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
31309e516c5SFlorian Hahn     LLVM_DEBUG(dbgs() << "  Finding best value for lane " << Lane << "\n");
3146df11868SFlorian Hahn     SmallPtrSet<VPValue *, 4> Candidates;
31509e516c5SFlorian Hahn     LLVM_DEBUG(dbgs() << "  Candidates  ");
31609e516c5SFlorian Hahn     for (auto Ops : MultiNodeOps) {
31709e516c5SFlorian Hahn       LLVM_DEBUG(
31809e516c5SFlorian Hahn           dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
31909e516c5SFlorian Hahn                  << " ");
3206df11868SFlorian Hahn       Candidates.insert(Ops.second[Lane]);
32109e516c5SFlorian Hahn     }
32209e516c5SFlorian Hahn     LLVM_DEBUG(dbgs() << "\n");
32309e516c5SFlorian Hahn 
32409e516c5SFlorian Hahn     for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
32509e516c5SFlorian Hahn       LLVM_DEBUG(dbgs() << "  Checking " << Op << "\n");
32609e516c5SFlorian Hahn       if (Mode[Op] == OpMode::Failed)
32709e516c5SFlorian Hahn         continue;
32809e516c5SFlorian Hahn 
32909e516c5SFlorian Hahn       VPValue *Last = FinalOrder[Op].second[Lane - 1];
33009e516c5SFlorian Hahn       std::pair<OpMode, VPValue *> Res =
33109e516c5SFlorian Hahn           getBest(Mode[Op], Last, Candidates, IAI);
33209e516c5SFlorian Hahn       if (Res.second)
33309e516c5SFlorian Hahn         FinalOrder[Op].second.push_back(Res.second);
33409e516c5SFlorian Hahn       else
33509e516c5SFlorian Hahn         // TODO: handle this case
33609e516c5SFlorian Hahn         FinalOrder[Op].second.push_back(markFailed());
33709e516c5SFlorian Hahn     }
33809e516c5SFlorian Hahn   }
33909e516c5SFlorian Hahn 
34009e516c5SFlorian Hahn   return FinalOrder;
34109e516c5SFlorian Hahn }
34209e516c5SFlorian Hahn 
34392205cb2SAndrei Elovikov #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dumpBundle(ArrayRef<VPValue * > Values)34409e516c5SFlorian Hahn void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
34502cb67deSFlorian Hahn   dbgs() << " Ops: ";
3461aaefbcaSSimon Pilgrim   for (auto Op : Values) {
3471aaefbcaSSimon Pilgrim     if (auto *VPInstr = cast_or_null<VPInstruction>(Op))
3481aaefbcaSSimon Pilgrim       if (auto *Instr = VPInstr->getUnderlyingInstr()) {
34902cb67deSFlorian Hahn         dbgs() << *Instr << " | ";
3501aaefbcaSSimon Pilgrim         continue;
3511aaefbcaSSimon Pilgrim       }
35202cb67deSFlorian Hahn     dbgs() << " nullptr | ";
3531aaefbcaSSimon Pilgrim   }
35402cb67deSFlorian Hahn   dbgs() << "\n";
35509e516c5SFlorian Hahn }
35692205cb2SAndrei Elovikov #endif
35709e516c5SFlorian Hahn 
buildGraph(ArrayRef<VPValue * > Values)35809e516c5SFlorian Hahn VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
35909e516c5SFlorian Hahn   assert(!Values.empty() && "Need some operands!");
36009e516c5SFlorian Hahn 
36109e516c5SFlorian Hahn   // If we already visited this instruction bundle, re-use the existing node
36209e516c5SFlorian Hahn   auto I = BundleToCombined.find(to_vector<4>(Values));
36309e516c5SFlorian Hahn   if (I != BundleToCombined.end()) {
3642eca3728SFlorian Hahn #ifndef NDEBUG
36509e516c5SFlorian Hahn     // Check that the resulting graph is a tree. If we re-use a node, this means
36609e516c5SFlorian Hahn     // its values have multiple users. We only allow this, if all users of each
36709e516c5SFlorian Hahn     // value are the same instruction.
36809e516c5SFlorian Hahn     for (auto *V : Values) {
36909e516c5SFlorian Hahn       auto UI = V->user_begin();
37009e516c5SFlorian Hahn       auto *FirstUser = *UI++;
3712eca3728SFlorian Hahn       while (UI != V->user_end()) {
37209e516c5SFlorian Hahn         assert(*UI == FirstUser && "Currently we only support SLP trees.");
37309e516c5SFlorian Hahn         UI++;
37409e516c5SFlorian Hahn       }
37509e516c5SFlorian Hahn     }
37609e516c5SFlorian Hahn #endif
37709e516c5SFlorian Hahn     return I->second;
37809e516c5SFlorian Hahn   }
37909e516c5SFlorian Hahn 
38009e516c5SFlorian Hahn   // Dump inputs
38109e516c5SFlorian Hahn   LLVM_DEBUG({
38209e516c5SFlorian Hahn     dbgs() << "buildGraph: ";
38309e516c5SFlorian Hahn     dumpBundle(Values);
38409e516c5SFlorian Hahn   });
38509e516c5SFlorian Hahn 
38609e516c5SFlorian Hahn   if (!areVectorizable(Values))
38709e516c5SFlorian Hahn     return markFailed();
38809e516c5SFlorian Hahn 
38909e516c5SFlorian Hahn   assert(getOpcode(Values) && "Opcodes for all values must match");
390*7a47ee51SKazu Hirata   unsigned ValuesOpcode = *getOpcode(Values);
39109e516c5SFlorian Hahn 
39209e516c5SFlorian Hahn   SmallVector<VPValue *, 4> CombinedOperands;
39309e516c5SFlorian Hahn   if (areCommutative(Values)) {
39409e516c5SFlorian Hahn     bool MultiNodeRoot = !MultiNodeActive;
39509e516c5SFlorian Hahn     MultiNodeActive = true;
39609e516c5SFlorian Hahn     for (auto &Operands : getOperands(Values)) {
39709e516c5SFlorian Hahn       LLVM_DEBUG({
39809e516c5SFlorian Hahn         dbgs() << "  Visiting Commutative";
39909e516c5SFlorian Hahn         dumpBundle(Operands);
40009e516c5SFlorian Hahn       });
40109e516c5SFlorian Hahn 
40209e516c5SFlorian Hahn       auto OperandsOpcode = getOpcode(Operands);
40309e516c5SFlorian Hahn       if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
40409e516c5SFlorian Hahn         LLVM_DEBUG(dbgs() << "    Same opcode, continue building\n");
40509e516c5SFlorian Hahn         CombinedOperands.push_back(buildGraph(Operands));
40609e516c5SFlorian Hahn       } else {
40709e516c5SFlorian Hahn         LLVM_DEBUG(dbgs() << "    Adding multinode Ops\n");
40809e516c5SFlorian Hahn         // Create dummy VPInstruction, which will we replace later by the
40909e516c5SFlorian Hahn         // re-ordered operand.
41009e516c5SFlorian Hahn         VPInstruction *Op = new VPInstruction(0, {});
41109e516c5SFlorian Hahn         CombinedOperands.push_back(Op);
41209e516c5SFlorian Hahn         MultiNodeOps.emplace_back(Op, Operands);
41309e516c5SFlorian Hahn       }
41409e516c5SFlorian Hahn     }
41509e516c5SFlorian Hahn 
41609e516c5SFlorian Hahn     if (MultiNodeRoot) {
41709e516c5SFlorian Hahn       LLVM_DEBUG(dbgs() << "Reorder \n");
41809e516c5SFlorian Hahn       MultiNodeActive = false;
41909e516c5SFlorian Hahn 
42009e516c5SFlorian Hahn       auto FinalOrder = reorderMultiNodeOps();
42109e516c5SFlorian Hahn 
42209e516c5SFlorian Hahn       MultiNodeOps.clear();
42309e516c5SFlorian Hahn       for (auto &Ops : FinalOrder) {
42409e516c5SFlorian Hahn         VPInstruction *NewOp = buildGraph(Ops.second);
42509e516c5SFlorian Hahn         Ops.first->replaceAllUsesWith(NewOp);
42609e516c5SFlorian Hahn         for (unsigned i = 0; i < CombinedOperands.size(); i++)
42709e516c5SFlorian Hahn           if (CombinedOperands[i] == Ops.first)
42809e516c5SFlorian Hahn             CombinedOperands[i] = NewOp;
42909e516c5SFlorian Hahn         delete Ops.first;
43009e516c5SFlorian Hahn         Ops.first = NewOp;
43109e516c5SFlorian Hahn       }
43209e516c5SFlorian Hahn       LLVM_DEBUG(dbgs() << "Found final order\n");
43309e516c5SFlorian Hahn     }
43409e516c5SFlorian Hahn   } else {
43509e516c5SFlorian Hahn     LLVM_DEBUG(dbgs() << "  NonCommuntative\n");
43609e516c5SFlorian Hahn     if (ValuesOpcode == Instruction::Load)
43709e516c5SFlorian Hahn       for (VPValue *V : Values)
43809e516c5SFlorian Hahn         CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
43909e516c5SFlorian Hahn     else
44009e516c5SFlorian Hahn       for (auto &Operands : getOperands(Values))
44109e516c5SFlorian Hahn         CombinedOperands.push_back(buildGraph(Operands));
44209e516c5SFlorian Hahn   }
44309e516c5SFlorian Hahn 
44409e516c5SFlorian Hahn   unsigned Opcode;
44509e516c5SFlorian Hahn   switch (ValuesOpcode) {
44609e516c5SFlorian Hahn   case Instruction::Load:
44709e516c5SFlorian Hahn     Opcode = VPInstruction::SLPLoad;
44809e516c5SFlorian Hahn     break;
44909e516c5SFlorian Hahn   case Instruction::Store:
45009e516c5SFlorian Hahn     Opcode = VPInstruction::SLPStore;
45109e516c5SFlorian Hahn     break;
45209e516c5SFlorian Hahn   default:
45309e516c5SFlorian Hahn     Opcode = ValuesOpcode;
45409e516c5SFlorian Hahn     break;
45509e516c5SFlorian Hahn   }
45609e516c5SFlorian Hahn 
45709e516c5SFlorian Hahn   if (!CompletelySLP)
45809e516c5SFlorian Hahn     return markFailed();
45909e516c5SFlorian Hahn 
46009e516c5SFlorian Hahn   assert(CombinedOperands.size() > 0 && "Need more some operands");
4615b362e4cSFlorian Hahn   auto *Inst = cast<VPInstruction>(Values[0])->getUnderlyingInstr();
4625b362e4cSFlorian Hahn   auto *VPI = new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc());
4635b362e4cSFlorian Hahn   VPI->setUnderlyingInstr(Inst);
46409e516c5SFlorian Hahn 
465eb0371e4SFlorian Hahn   LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " "
466eb0371e4SFlorian Hahn                     << *cast<VPInstruction>(Values[0]) << "\n");
46709e516c5SFlorian Hahn   addCombined(Values, VPI);
46809e516c5SFlorian Hahn   return VPI;
46909e516c5SFlorian Hahn }
470