109e516c5SFlorian Hahn //===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
209e516c5SFlorian Hahn //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609e516c5SFlorian Hahn //
709e516c5SFlorian Hahn //===----------------------------------------------------------------------===//
809e516c5SFlorian Hahn /// This file implements SLP analysis based on VPlan. The analysis is based on
909e516c5SFlorian Hahn /// the ideas described in
1009e516c5SFlorian Hahn ///
1109e516c5SFlorian Hahn /// Look-ahead SLP: auto-vectorization in the presence of commutative
1209e516c5SFlorian Hahn /// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
1309e516c5SFlorian Hahn /// Luís F. W. Góes
1409e516c5SFlorian Hahn ///
1509e516c5SFlorian Hahn //===----------------------------------------------------------------------===//
1609e516c5SFlorian Hahn
1709e516c5SFlorian Hahn #include "VPlan.h"
181b89c832Sserge-sans-paille #include "VPlanValue.h"
191b89c832Sserge-sans-paille #include "llvm/ADT/DenseMap.h"
2009e516c5SFlorian Hahn #include "llvm/ADT/SmallVector.h"
2109e516c5SFlorian Hahn #include "llvm/Analysis/VectorUtils.h"
2209e516c5SFlorian Hahn #include "llvm/IR/Instruction.h"
2309e516c5SFlorian Hahn #include "llvm/IR/Instructions.h"
2409e516c5SFlorian Hahn #include "llvm/IR/Type.h"
2509e516c5SFlorian Hahn #include "llvm/IR/Value.h"
2609e516c5SFlorian Hahn #include "llvm/Support/Casting.h"
2709e516c5SFlorian Hahn #include "llvm/Support/Debug.h"
2809e516c5SFlorian Hahn #include "llvm/Support/ErrorHandling.h"
2909e516c5SFlorian Hahn #include "llvm/Support/raw_ostream.h"
30b013c58eSSimon Pilgrim #include <algorithm>
3109e516c5SFlorian Hahn #include <cassert>
32b013c58eSSimon Pilgrim #include <utility>
3309e516c5SFlorian Hahn
3409e516c5SFlorian Hahn using namespace llvm;
3509e516c5SFlorian Hahn
3609e516c5SFlorian Hahn #define DEBUG_TYPE "vplan-slp"
3709e516c5SFlorian Hahn
3809e516c5SFlorian Hahn // Number of levels to look ahead when re-ordering multi node operands.
3909e516c5SFlorian Hahn static unsigned LookaheadMaxDepth = 5;
4009e516c5SFlorian Hahn
markFailed()4109e516c5SFlorian Hahn VPInstruction *VPlanSlp::markFailed() {
4209e516c5SFlorian Hahn // FIXME: Currently this is used to signal we hit instructions we cannot
4309e516c5SFlorian Hahn // trivially SLP'ize.
4409e516c5SFlorian Hahn CompletelySLP = false;
4509e516c5SFlorian Hahn return nullptr;
4609e516c5SFlorian Hahn }
4709e516c5SFlorian Hahn
addCombined(ArrayRef<VPValue * > Operands,VPInstruction * New)4809e516c5SFlorian Hahn void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
4909e516c5SFlorian Hahn if (all_of(Operands, [](VPValue *V) {
5009e516c5SFlorian Hahn return cast<VPInstruction>(V)->getUnderlyingInstr();
5109e516c5SFlorian Hahn })) {
5209e516c5SFlorian Hahn unsigned BundleSize = 0;
5309e516c5SFlorian Hahn for (VPValue *V : Operands) {
5409e516c5SFlorian Hahn Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
5509e516c5SFlorian Hahn assert(!T->isVectorTy() && "Only scalar types supported for now");
5609e516c5SFlorian Hahn BundleSize += T->getScalarSizeInBits();
5709e516c5SFlorian Hahn }
5809e516c5SFlorian Hahn WidestBundleBits = std::max(WidestBundleBits, BundleSize);
5909e516c5SFlorian Hahn }
6009e516c5SFlorian Hahn
6109e516c5SFlorian Hahn auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
6209e516c5SFlorian Hahn assert(Res.second &&
6309e516c5SFlorian Hahn "Already created a combined instruction for the operand bundle");
6409e516c5SFlorian Hahn (void)Res;
6509e516c5SFlorian Hahn }
6609e516c5SFlorian Hahn
areVectorizable(ArrayRef<VPValue * > Operands) const6709e516c5SFlorian Hahn bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
6809e516c5SFlorian Hahn // Currently we only support VPInstructions.
6909e516c5SFlorian Hahn if (!all_of(Operands, [](VPValue *Op) {
7009e516c5SFlorian Hahn return Op && isa<VPInstruction>(Op) &&
7109e516c5SFlorian Hahn cast<VPInstruction>(Op)->getUnderlyingInstr();
7209e516c5SFlorian Hahn })) {
7309e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
7409e516c5SFlorian Hahn return false;
7509e516c5SFlorian Hahn }
7609e516c5SFlorian Hahn
7709e516c5SFlorian Hahn // Check if opcodes and type width agree for all instructions in the bundle.
7809e516c5SFlorian Hahn // FIXME: Differing widths/opcodes can be handled by inserting additional
7909e516c5SFlorian Hahn // instructions.
8009e516c5SFlorian Hahn // FIXME: Deal with non-primitive types.
8109e516c5SFlorian Hahn const Instruction *OriginalInstr =
8209e516c5SFlorian Hahn cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
8309e516c5SFlorian Hahn unsigned Opcode = OriginalInstr->getOpcode();
8409e516c5SFlorian Hahn unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
8509e516c5SFlorian Hahn if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
8609e516c5SFlorian Hahn const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
8709e516c5SFlorian Hahn return I->getOpcode() == Opcode &&
8809e516c5SFlorian Hahn I->getType()->getPrimitiveSizeInBits() == Width;
8909e516c5SFlorian Hahn })) {
9009e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
9109e516c5SFlorian Hahn return false;
9209e516c5SFlorian Hahn }
9309e516c5SFlorian Hahn
9409e516c5SFlorian Hahn // For now, all operands must be defined in the same BB.
9509e516c5SFlorian Hahn if (any_of(Operands, [this](VPValue *Op) {
9609e516c5SFlorian Hahn return cast<VPInstruction>(Op)->getParent() != &this->BB;
9709e516c5SFlorian Hahn })) {
9809e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
9909e516c5SFlorian Hahn return false;
10009e516c5SFlorian Hahn }
10109e516c5SFlorian Hahn
10209e516c5SFlorian Hahn if (any_of(Operands,
10309e516c5SFlorian Hahn [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
10409e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
10509e516c5SFlorian Hahn return false;
10609e516c5SFlorian Hahn }
10709e516c5SFlorian Hahn
10809e516c5SFlorian Hahn // For loads, check that there are no instructions writing to memory in
10909e516c5SFlorian Hahn // between them.
11009e516c5SFlorian Hahn // TODO: we only have to forbid instructions writing to memory that could
11109e516c5SFlorian Hahn // interfere with any of the loads in the bundle
11209e516c5SFlorian Hahn if (Opcode == Instruction::Load) {
11309e516c5SFlorian Hahn unsigned LoadsSeen = 0;
11409e516c5SFlorian Hahn VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
11509e516c5SFlorian Hahn for (auto &I : *Parent) {
116c11fd0dfSFlorian Hahn auto *VPI = dyn_cast<VPInstruction>(&I);
117c11fd0dfSFlorian Hahn if (!VPI)
118c11fd0dfSFlorian Hahn break;
11909e516c5SFlorian Hahn if (VPI->getOpcode() == Instruction::Load &&
120902cbcd5SKazu Hirata llvm::is_contained(Operands, VPI))
12109e516c5SFlorian Hahn LoadsSeen++;
12209e516c5SFlorian Hahn
12309e516c5SFlorian Hahn if (LoadsSeen == Operands.size())
12409e516c5SFlorian Hahn break;
12509e516c5SFlorian Hahn if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
12609e516c5SFlorian Hahn LLVM_DEBUG(
12709e516c5SFlorian Hahn dbgs() << "VPSLP: instruction modifying memory between loads\n");
12809e516c5SFlorian Hahn return false;
12909e516c5SFlorian Hahn }
13009e516c5SFlorian Hahn }
13109e516c5SFlorian Hahn
13209e516c5SFlorian Hahn if (!all_of(Operands, [](VPValue *Op) {
13309e516c5SFlorian Hahn return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
13409e516c5SFlorian Hahn ->isSimple();
13509e516c5SFlorian Hahn })) {
13609e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
13709e516c5SFlorian Hahn return false;
13809e516c5SFlorian Hahn }
13909e516c5SFlorian Hahn }
14009e516c5SFlorian Hahn
14109e516c5SFlorian Hahn if (Opcode == Instruction::Store)
14209e516c5SFlorian Hahn if (!all_of(Operands, [](VPValue *Op) {
14309e516c5SFlorian Hahn return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
14409e516c5SFlorian Hahn ->isSimple();
14509e516c5SFlorian Hahn })) {
14609e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
14709e516c5SFlorian Hahn return false;
14809e516c5SFlorian Hahn }
14909e516c5SFlorian Hahn
15009e516c5SFlorian Hahn return true;
15109e516c5SFlorian Hahn }
15209e516c5SFlorian Hahn
getOperands(ArrayRef<VPValue * > Values,unsigned OperandIndex)15309e516c5SFlorian Hahn static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
15409e516c5SFlorian Hahn unsigned OperandIndex) {
15509e516c5SFlorian Hahn SmallVector<VPValue *, 4> Operands;
15609e516c5SFlorian Hahn for (VPValue *V : Values) {
15731923f6bSFlorian Hahn // Currently we only support VPInstructions.
15831923f6bSFlorian Hahn auto *U = cast<VPInstruction>(V);
15909e516c5SFlorian Hahn Operands.push_back(U->getOperand(OperandIndex));
16009e516c5SFlorian Hahn }
16109e516c5SFlorian Hahn return Operands;
16209e516c5SFlorian Hahn }
16309e516c5SFlorian Hahn
areCommutative(ArrayRef<VPValue * > Values)16409e516c5SFlorian Hahn static bool areCommutative(ArrayRef<VPValue *> Values) {
16509e516c5SFlorian Hahn return Instruction::isCommutative(
16609e516c5SFlorian Hahn cast<VPInstruction>(Values[0])->getOpcode());
16709e516c5SFlorian Hahn }
16809e516c5SFlorian Hahn
16909e516c5SFlorian Hahn static SmallVector<SmallVector<VPValue *, 4>, 4>
getOperands(ArrayRef<VPValue * > Values)17009e516c5SFlorian Hahn getOperands(ArrayRef<VPValue *> Values) {
17109e516c5SFlorian Hahn SmallVector<SmallVector<VPValue *, 4>, 4> Result;
17209e516c5SFlorian Hahn auto *VPI = cast<VPInstruction>(Values[0]);
17309e516c5SFlorian Hahn
17409e516c5SFlorian Hahn switch (VPI->getOpcode()) {
17509e516c5SFlorian Hahn case Instruction::Load:
17609e516c5SFlorian Hahn llvm_unreachable("Loads terminate a tree, no need to get operands");
17709e516c5SFlorian Hahn case Instruction::Store:
17809e516c5SFlorian Hahn Result.push_back(getOperands(Values, 0));
17909e516c5SFlorian Hahn break;
18009e516c5SFlorian Hahn default:
18109e516c5SFlorian Hahn for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
18209e516c5SFlorian Hahn Result.push_back(getOperands(Values, I));
18309e516c5SFlorian Hahn break;
18409e516c5SFlorian Hahn }
18509e516c5SFlorian Hahn
18609e516c5SFlorian Hahn return Result;
18709e516c5SFlorian Hahn }
18809e516c5SFlorian Hahn
18909e516c5SFlorian Hahn /// Returns the opcode of Values or ~0 if they do not all agree.
getOpcode(ArrayRef<VPValue * > Values)19009e516c5SFlorian Hahn static Optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {
19109e516c5SFlorian Hahn unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
19209e516c5SFlorian Hahn if (any_of(Values, [Opcode](VPValue *V) {
19309e516c5SFlorian Hahn return cast<VPInstruction>(V)->getOpcode() != Opcode;
19409e516c5SFlorian Hahn }))
19509e516c5SFlorian Hahn return None;
19609e516c5SFlorian Hahn return {Opcode};
19709e516c5SFlorian Hahn }
19809e516c5SFlorian Hahn
19909e516c5SFlorian Hahn /// Returns true if A and B access sequential memory if they are loads or
20009e516c5SFlorian Hahn /// stores or if they have identical opcodes otherwise.
areConsecutiveOrMatch(VPInstruction * A,VPInstruction * B,VPInterleavedAccessInfo & IAI)20109e516c5SFlorian Hahn static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
20209e516c5SFlorian Hahn VPInterleavedAccessInfo &IAI) {
20309e516c5SFlorian Hahn if (A->getOpcode() != B->getOpcode())
20409e516c5SFlorian Hahn return false;
20509e516c5SFlorian Hahn
20609e516c5SFlorian Hahn if (A->getOpcode() != Instruction::Load &&
20709e516c5SFlorian Hahn A->getOpcode() != Instruction::Store)
20809e516c5SFlorian Hahn return true;
20909e516c5SFlorian Hahn auto *GA = IAI.getInterleaveGroup(A);
21009e516c5SFlorian Hahn auto *GB = IAI.getInterleaveGroup(B);
21109e516c5SFlorian Hahn
21209e516c5SFlorian Hahn return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
21309e516c5SFlorian Hahn }
21409e516c5SFlorian Hahn
21509e516c5SFlorian Hahn /// Implements getLAScore from Listing 7 in the paper.
21609e516c5SFlorian Hahn /// Traverses and compares operands of V1 and V2 to MaxLevel.
getLAScore(VPValue * V1,VPValue * V2,unsigned MaxLevel,VPInterleavedAccessInfo & IAI)21709e516c5SFlorian Hahn static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
21809e516c5SFlorian Hahn VPInterleavedAccessInfo &IAI) {
21931923f6bSFlorian Hahn auto *I1 = dyn_cast<VPInstruction>(V1);
22031923f6bSFlorian Hahn auto *I2 = dyn_cast<VPInstruction>(V2);
22131923f6bSFlorian Hahn // Currently we only support VPInstructions.
22231923f6bSFlorian Hahn if (!I1 || !I2)
22309e516c5SFlorian Hahn return 0;
22409e516c5SFlorian Hahn
22509e516c5SFlorian Hahn if (MaxLevel == 0)
22631923f6bSFlorian Hahn return (unsigned)areConsecutiveOrMatch(I1, I2, IAI);
22709e516c5SFlorian Hahn
22809e516c5SFlorian Hahn unsigned Score = 0;
22931923f6bSFlorian Hahn for (unsigned I = 0, EV1 = I1->getNumOperands(); I < EV1; ++I)
23031923f6bSFlorian Hahn for (unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)
23131923f6bSFlorian Hahn Score +=
23231923f6bSFlorian Hahn getLAScore(I1->getOperand(I), I2->getOperand(J), MaxLevel - 1, IAI);
23309e516c5SFlorian Hahn return Score;
23409e516c5SFlorian Hahn }
23509e516c5SFlorian Hahn
23609e516c5SFlorian Hahn std::pair<VPlanSlp::OpMode, VPValue *>
getBest(OpMode Mode,VPValue * Last,SmallPtrSetImpl<VPValue * > & Candidates,VPInterleavedAccessInfo & IAI)23709e516c5SFlorian Hahn VPlanSlp::getBest(OpMode Mode, VPValue *Last,
2386df11868SFlorian Hahn SmallPtrSetImpl<VPValue *> &Candidates,
23909e516c5SFlorian Hahn VPInterleavedAccessInfo &IAI) {
2406df11868SFlorian Hahn assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
2416df11868SFlorian Hahn "Currently we only handle load and commutative opcodes");
24209e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << " getBest\n");
24309e516c5SFlorian Hahn
2446df11868SFlorian Hahn SmallVector<VPValue *, 4> BestCandidates;
24509e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << " Candidates for "
24609e516c5SFlorian Hahn << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
24709e516c5SFlorian Hahn for (auto *Candidate : Candidates) {
24809e516c5SFlorian Hahn auto *LastI = cast<VPInstruction>(Last);
24909e516c5SFlorian Hahn auto *CandidateI = cast<VPInstruction>(Candidate);
25009e516c5SFlorian Hahn if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
25109e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
25209e516c5SFlorian Hahn << " ");
25309e516c5SFlorian Hahn BestCandidates.push_back(Candidate);
25409e516c5SFlorian Hahn }
25509e516c5SFlorian Hahn }
25609e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << "\n");
25709e516c5SFlorian Hahn
25809e516c5SFlorian Hahn if (BestCandidates.empty())
25909e516c5SFlorian Hahn return {OpMode::Failed, nullptr};
26009e516c5SFlorian Hahn
26109e516c5SFlorian Hahn if (BestCandidates.size() == 1)
26209e516c5SFlorian Hahn return {Mode, BestCandidates[0]};
26309e516c5SFlorian Hahn
2646df11868SFlorian Hahn VPValue *Best = nullptr;
26509e516c5SFlorian Hahn unsigned BestScore = 0;
26609e516c5SFlorian Hahn for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
26709e516c5SFlorian Hahn unsigned PrevScore = ~0u;
26809e516c5SFlorian Hahn bool AllSame = true;
26909e516c5SFlorian Hahn
27009e516c5SFlorian Hahn // FIXME: Avoid visiting the same operands multiple times.
27109e516c5SFlorian Hahn for (auto *Candidate : BestCandidates) {
27209e516c5SFlorian Hahn unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
27309e516c5SFlorian Hahn if (PrevScore == ~0u)
27409e516c5SFlorian Hahn PrevScore = Score;
27509e516c5SFlorian Hahn if (PrevScore != Score)
27609e516c5SFlorian Hahn AllSame = false;
27709e516c5SFlorian Hahn PrevScore = Score;
27809e516c5SFlorian Hahn
27909e516c5SFlorian Hahn if (Score > BestScore) {
28009e516c5SFlorian Hahn BestScore = Score;
28109e516c5SFlorian Hahn Best = Candidate;
28209e516c5SFlorian Hahn }
28309e516c5SFlorian Hahn }
28409e516c5SFlorian Hahn if (!AllSame)
28509e516c5SFlorian Hahn break;
28609e516c5SFlorian Hahn }
28709e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << "Found best "
28809e516c5SFlorian Hahn << *cast<VPInstruction>(Best)->getUnderlyingInstr()
28909e516c5SFlorian Hahn << "\n");
2906df11868SFlorian Hahn Candidates.erase(Best);
29109e516c5SFlorian Hahn
29209e516c5SFlorian Hahn return {Mode, Best};
29309e516c5SFlorian Hahn }
29409e516c5SFlorian Hahn
reorderMultiNodeOps()29509e516c5SFlorian Hahn SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
29609e516c5SFlorian Hahn SmallVector<MultiNodeOpTy, 4> FinalOrder;
29709e516c5SFlorian Hahn SmallVector<OpMode, 4> Mode;
29809e516c5SFlorian Hahn FinalOrder.reserve(MultiNodeOps.size());
29909e516c5SFlorian Hahn Mode.reserve(MultiNodeOps.size());
30009e516c5SFlorian Hahn
30109e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << "Reordering multinode\n");
30209e516c5SFlorian Hahn
30309e516c5SFlorian Hahn for (auto &Operands : MultiNodeOps) {
30409e516c5SFlorian Hahn FinalOrder.push_back({Operands.first, {Operands.second[0]}});
30509e516c5SFlorian Hahn if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
30609e516c5SFlorian Hahn Instruction::Load)
30709e516c5SFlorian Hahn Mode.push_back(OpMode::Load);
30809e516c5SFlorian Hahn else
30909e516c5SFlorian Hahn Mode.push_back(OpMode::Opcode);
31009e516c5SFlorian Hahn }
31109e516c5SFlorian Hahn
31209e516c5SFlorian Hahn for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
31309e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n");
3146df11868SFlorian Hahn SmallPtrSet<VPValue *, 4> Candidates;
31509e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << " Candidates ");
31609e516c5SFlorian Hahn for (auto Ops : MultiNodeOps) {
31709e516c5SFlorian Hahn LLVM_DEBUG(
31809e516c5SFlorian Hahn dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
31909e516c5SFlorian Hahn << " ");
3206df11868SFlorian Hahn Candidates.insert(Ops.second[Lane]);
32109e516c5SFlorian Hahn }
32209e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << "\n");
32309e516c5SFlorian Hahn
32409e516c5SFlorian Hahn for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
32509e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << " Checking " << Op << "\n");
32609e516c5SFlorian Hahn if (Mode[Op] == OpMode::Failed)
32709e516c5SFlorian Hahn continue;
32809e516c5SFlorian Hahn
32909e516c5SFlorian Hahn VPValue *Last = FinalOrder[Op].second[Lane - 1];
33009e516c5SFlorian Hahn std::pair<OpMode, VPValue *> Res =
33109e516c5SFlorian Hahn getBest(Mode[Op], Last, Candidates, IAI);
33209e516c5SFlorian Hahn if (Res.second)
33309e516c5SFlorian Hahn FinalOrder[Op].second.push_back(Res.second);
33409e516c5SFlorian Hahn else
33509e516c5SFlorian Hahn // TODO: handle this case
33609e516c5SFlorian Hahn FinalOrder[Op].second.push_back(markFailed());
33709e516c5SFlorian Hahn }
33809e516c5SFlorian Hahn }
33909e516c5SFlorian Hahn
34009e516c5SFlorian Hahn return FinalOrder;
34109e516c5SFlorian Hahn }
34209e516c5SFlorian Hahn
34392205cb2SAndrei Elovikov #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dumpBundle(ArrayRef<VPValue * > Values)34409e516c5SFlorian Hahn void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
34502cb67deSFlorian Hahn dbgs() << " Ops: ";
3461aaefbcaSSimon Pilgrim for (auto Op : Values) {
3471aaefbcaSSimon Pilgrim if (auto *VPInstr = cast_or_null<VPInstruction>(Op))
3481aaefbcaSSimon Pilgrim if (auto *Instr = VPInstr->getUnderlyingInstr()) {
34902cb67deSFlorian Hahn dbgs() << *Instr << " | ";
3501aaefbcaSSimon Pilgrim continue;
3511aaefbcaSSimon Pilgrim }
35202cb67deSFlorian Hahn dbgs() << " nullptr | ";
3531aaefbcaSSimon Pilgrim }
35402cb67deSFlorian Hahn dbgs() << "\n";
35509e516c5SFlorian Hahn }
35692205cb2SAndrei Elovikov #endif
35709e516c5SFlorian Hahn
buildGraph(ArrayRef<VPValue * > Values)35809e516c5SFlorian Hahn VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
35909e516c5SFlorian Hahn assert(!Values.empty() && "Need some operands!");
36009e516c5SFlorian Hahn
36109e516c5SFlorian Hahn // If we already visited this instruction bundle, re-use the existing node
36209e516c5SFlorian Hahn auto I = BundleToCombined.find(to_vector<4>(Values));
36309e516c5SFlorian Hahn if (I != BundleToCombined.end()) {
3642eca3728SFlorian Hahn #ifndef NDEBUG
36509e516c5SFlorian Hahn // Check that the resulting graph is a tree. If we re-use a node, this means
36609e516c5SFlorian Hahn // its values have multiple users. We only allow this, if all users of each
36709e516c5SFlorian Hahn // value are the same instruction.
36809e516c5SFlorian Hahn for (auto *V : Values) {
36909e516c5SFlorian Hahn auto UI = V->user_begin();
37009e516c5SFlorian Hahn auto *FirstUser = *UI++;
3712eca3728SFlorian Hahn while (UI != V->user_end()) {
37209e516c5SFlorian Hahn assert(*UI == FirstUser && "Currently we only support SLP trees.");
37309e516c5SFlorian Hahn UI++;
37409e516c5SFlorian Hahn }
37509e516c5SFlorian Hahn }
37609e516c5SFlorian Hahn #endif
37709e516c5SFlorian Hahn return I->second;
37809e516c5SFlorian Hahn }
37909e516c5SFlorian Hahn
38009e516c5SFlorian Hahn // Dump inputs
38109e516c5SFlorian Hahn LLVM_DEBUG({
38209e516c5SFlorian Hahn dbgs() << "buildGraph: ";
38309e516c5SFlorian Hahn dumpBundle(Values);
38409e516c5SFlorian Hahn });
38509e516c5SFlorian Hahn
38609e516c5SFlorian Hahn if (!areVectorizable(Values))
38709e516c5SFlorian Hahn return markFailed();
38809e516c5SFlorian Hahn
38909e516c5SFlorian Hahn assert(getOpcode(Values) && "Opcodes for all values must match");
390*7a47ee51SKazu Hirata unsigned ValuesOpcode = *getOpcode(Values);
39109e516c5SFlorian Hahn
39209e516c5SFlorian Hahn SmallVector<VPValue *, 4> CombinedOperands;
39309e516c5SFlorian Hahn if (areCommutative(Values)) {
39409e516c5SFlorian Hahn bool MultiNodeRoot = !MultiNodeActive;
39509e516c5SFlorian Hahn MultiNodeActive = true;
39609e516c5SFlorian Hahn for (auto &Operands : getOperands(Values)) {
39709e516c5SFlorian Hahn LLVM_DEBUG({
39809e516c5SFlorian Hahn dbgs() << " Visiting Commutative";
39909e516c5SFlorian Hahn dumpBundle(Operands);
40009e516c5SFlorian Hahn });
40109e516c5SFlorian Hahn
40209e516c5SFlorian Hahn auto OperandsOpcode = getOpcode(Operands);
40309e516c5SFlorian Hahn if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
40409e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");
40509e516c5SFlorian Hahn CombinedOperands.push_back(buildGraph(Operands));
40609e516c5SFlorian Hahn } else {
40709e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << " Adding multinode Ops\n");
40809e516c5SFlorian Hahn // Create dummy VPInstruction, which will we replace later by the
40909e516c5SFlorian Hahn // re-ordered operand.
41009e516c5SFlorian Hahn VPInstruction *Op = new VPInstruction(0, {});
41109e516c5SFlorian Hahn CombinedOperands.push_back(Op);
41209e516c5SFlorian Hahn MultiNodeOps.emplace_back(Op, Operands);
41309e516c5SFlorian Hahn }
41409e516c5SFlorian Hahn }
41509e516c5SFlorian Hahn
41609e516c5SFlorian Hahn if (MultiNodeRoot) {
41709e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << "Reorder \n");
41809e516c5SFlorian Hahn MultiNodeActive = false;
41909e516c5SFlorian Hahn
42009e516c5SFlorian Hahn auto FinalOrder = reorderMultiNodeOps();
42109e516c5SFlorian Hahn
42209e516c5SFlorian Hahn MultiNodeOps.clear();
42309e516c5SFlorian Hahn for (auto &Ops : FinalOrder) {
42409e516c5SFlorian Hahn VPInstruction *NewOp = buildGraph(Ops.second);
42509e516c5SFlorian Hahn Ops.first->replaceAllUsesWith(NewOp);
42609e516c5SFlorian Hahn for (unsigned i = 0; i < CombinedOperands.size(); i++)
42709e516c5SFlorian Hahn if (CombinedOperands[i] == Ops.first)
42809e516c5SFlorian Hahn CombinedOperands[i] = NewOp;
42909e516c5SFlorian Hahn delete Ops.first;
43009e516c5SFlorian Hahn Ops.first = NewOp;
43109e516c5SFlorian Hahn }
43209e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << "Found final order\n");
43309e516c5SFlorian Hahn }
43409e516c5SFlorian Hahn } else {
43509e516c5SFlorian Hahn LLVM_DEBUG(dbgs() << " NonCommuntative\n");
43609e516c5SFlorian Hahn if (ValuesOpcode == Instruction::Load)
43709e516c5SFlorian Hahn for (VPValue *V : Values)
43809e516c5SFlorian Hahn CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
43909e516c5SFlorian Hahn else
44009e516c5SFlorian Hahn for (auto &Operands : getOperands(Values))
44109e516c5SFlorian Hahn CombinedOperands.push_back(buildGraph(Operands));
44209e516c5SFlorian Hahn }
44309e516c5SFlorian Hahn
44409e516c5SFlorian Hahn unsigned Opcode;
44509e516c5SFlorian Hahn switch (ValuesOpcode) {
44609e516c5SFlorian Hahn case Instruction::Load:
44709e516c5SFlorian Hahn Opcode = VPInstruction::SLPLoad;
44809e516c5SFlorian Hahn break;
44909e516c5SFlorian Hahn case Instruction::Store:
45009e516c5SFlorian Hahn Opcode = VPInstruction::SLPStore;
45109e516c5SFlorian Hahn break;
45209e516c5SFlorian Hahn default:
45309e516c5SFlorian Hahn Opcode = ValuesOpcode;
45409e516c5SFlorian Hahn break;
45509e516c5SFlorian Hahn }
45609e516c5SFlorian Hahn
45709e516c5SFlorian Hahn if (!CompletelySLP)
45809e516c5SFlorian Hahn return markFailed();
45909e516c5SFlorian Hahn
46009e516c5SFlorian Hahn assert(CombinedOperands.size() > 0 && "Need more some operands");
4615b362e4cSFlorian Hahn auto *Inst = cast<VPInstruction>(Values[0])->getUnderlyingInstr();
4625b362e4cSFlorian Hahn auto *VPI = new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc());
4635b362e4cSFlorian Hahn VPI->setUnderlyingInstr(Inst);
46409e516c5SFlorian Hahn
465eb0371e4SFlorian Hahn LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " "
466eb0371e4SFlorian Hahn << *cast<VPInstruction>(Values[0]) << "\n");
46709e516c5SFlorian Hahn addCombined(Values, VPI);
46809e516c5SFlorian Hahn return VPI;
46909e516c5SFlorian Hahn }
470