1f1933329SEugene Zelenko //===- InterleavedAccessPass.cpp ------------------------------------------===//
21c1e0c9eSHao Liu //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61c1e0c9eSHao Liu //
71c1e0c9eSHao Liu //===----------------------------------------------------------------------===//
81c1e0c9eSHao Liu //
91c1e0c9eSHao Liu // This file implements the Interleaved Access pass, which identifies
10a306eeb2SChad Rosier // interleaved memory accesses and transforms them into target specific
11a306eeb2SChad Rosier // intrinsics.
121c1e0c9eSHao Liu //
131c1e0c9eSHao Liu // An interleaved load reads data from memory into several vectors, with
141c1e0c9eSHao Liu // DE-interleaving the data on a factor. An interleaved store writes several
151c1e0c9eSHao Liu // vectors to memory with RE-interleaving the data on a factor.
161c1e0c9eSHao Liu //
17a306eeb2SChad Rosier // As interleaved accesses are difficult to identified in CodeGen (mainly
18a306eeb2SChad Rosier // because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector
19a306eeb2SChad Rosier // IR), we identify and transform them to intrinsics in this pass so the
20a306eeb2SChad Rosier // intrinsics can be easily matched into target specific instructions later in
21a306eeb2SChad Rosier // CodeGen.
221c1e0c9eSHao Liu //
231c1e0c9eSHao Liu // E.g. An interleaved load (Factor = 2):
241c1e0c9eSHao Liu // %wide.vec = load <8 x i32>, <8 x i32>* %ptr
259f2d9364SJuneyoung Lee // %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
269f2d9364SJuneyoung Lee // %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
271c1e0c9eSHao Liu //
281c1e0c9eSHao Liu // It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
291c1e0c9eSHao Liu // intrinsic in ARM backend.
301c1e0c9eSHao Liu //
3101a057a0SDavid L Kreitzer // In X86, this can be further optimized into a set of target
3201a057a0SDavid L Kreitzer // specific loads followed by an optimized sequence of shuffles.
3301a057a0SDavid L Kreitzer //
341c1e0c9eSHao Liu // E.g. An interleaved store (Factor = 3):
351c1e0c9eSHao Liu // %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
361c1e0c9eSHao Liu // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
371c1e0c9eSHao Liu // store <12 x i32> %i.vec, <12 x i32>* %ptr
381c1e0c9eSHao Liu //
391c1e0c9eSHao Liu // It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
401c1e0c9eSHao Liu // intrinsic in ARM backend.
411c1e0c9eSHao Liu //
4201a057a0SDavid L Kreitzer // Similarly, a set of interleaved stores can be transformed into an optimized
4301a057a0SDavid L Kreitzer // sequence of shuffles followed by a set of target specific stores for X86.
44f1933329SEugene Zelenko //
451c1e0c9eSHao Liu //===----------------------------------------------------------------------===//
461c1e0c9eSHao Liu
47f1933329SEugene Zelenko #include "llvm/ADT/ArrayRef.h"
48f1933329SEugene Zelenko #include "llvm/ADT/DenseMap.h"
49ed98c1b3Sserge-sans-paille #include "llvm/ADT/SetVector.h"
50f1933329SEugene Zelenko #include "llvm/ADT/SmallVector.h"
51b3bde2eaSDavid Blaikie #include "llvm/CodeGen/TargetLowering.h"
528b61764cSFrancis Visoiu Mistrih #include "llvm/CodeGen/TargetPassConfig.h"
53b3bde2eaSDavid Blaikie #include "llvm/CodeGen/TargetSubtargetInfo.h"
54f1933329SEugene Zelenko #include "llvm/IR/Constants.h"
55476c0afcSMatthew Simpson #include "llvm/IR/Dominators.h"
56f1933329SEugene Zelenko #include "llvm/IR/Function.h"
57f1933329SEugene Zelenko #include "llvm/IR/IRBuilder.h"
581c1e0c9eSHao Liu #include "llvm/IR/InstIterator.h"
59f1933329SEugene Zelenko #include "llvm/IR/Instruction.h"
60f1933329SEugene Zelenko #include "llvm/IR/Instructions.h"
6105da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
62f1933329SEugene Zelenko #include "llvm/Pass.h"
63f1933329SEugene Zelenko #include "llvm/Support/Casting.h"
64f1933329SEugene Zelenko #include "llvm/Support/CommandLine.h"
651c1e0c9eSHao Liu #include "llvm/Support/Debug.h"
661c1e0c9eSHao Liu #include "llvm/Support/MathExtras.h"
67b41c0b44SHao Liu #include "llvm/Support/raw_ostream.h"
68f1933329SEugene Zelenko #include "llvm/Target/TargetMachine.h"
69a4b6b1e1SDavid Green #include "llvm/Transforms/Utils/Local.h"
70f1933329SEugene Zelenko #include <cassert>
71f1933329SEugene Zelenko #include <utility>
721c1e0c9eSHao Liu
731c1e0c9eSHao Liu using namespace llvm;
741c1e0c9eSHao Liu
751c1e0c9eSHao Liu #define DEBUG_TYPE "interleaved-access"
761c1e0c9eSHao Liu
771c1e0c9eSHao Liu static cl::opt<bool> LowerInterleavedAccesses(
781c1e0c9eSHao Liu "lower-interleaved-accesses",
791c1e0c9eSHao Liu cl::desc("Enable lowering interleaved accesses to intrinsics"),
806d3f05c0SSilviu Baranga cl::init(true), cl::Hidden);
811c1e0c9eSHao Liu
821c1e0c9eSHao Liu namespace {
831c1e0c9eSHao Liu
841c1e0c9eSHao Liu class InterleavedAccess : public FunctionPass {
851c1e0c9eSHao Liu public:
861c1e0c9eSHao Liu static char ID;
87f1933329SEugene Zelenko
InterleavedAccess()88f1933329SEugene Zelenko InterleavedAccess() : FunctionPass(ID) {
891c1e0c9eSHao Liu initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
901c1e0c9eSHao Liu }
911c1e0c9eSHao Liu
getPassName() const92117296c0SMehdi Amini StringRef getPassName() const override { return "Interleaved Access Pass"; }
931c1e0c9eSHao Liu
941c1e0c9eSHao Liu bool runOnFunction(Function &F) override;
951c1e0c9eSHao Liu
getAnalysisUsage(AnalysisUsage & AU) const96476c0afcSMatthew Simpson void getAnalysisUsage(AnalysisUsage &AU) const override {
97476c0afcSMatthew Simpson AU.addRequired<DominatorTreeWrapperPass>();
98c49611f9SDavid Green AU.setPreservesCFG();
99476c0afcSMatthew Simpson }
100476c0afcSMatthew Simpson
1011c1e0c9eSHao Liu private:
102f1933329SEugene Zelenko DominatorTree *DT = nullptr;
103f1933329SEugene Zelenko const TargetLowering *TLI = nullptr;
1041c1e0c9eSHao Liu
1051e425c9fSBenjamin Kramer /// The maximum supported interleave factor.
1061e425c9fSBenjamin Kramer unsigned MaxFactor;
1071e425c9fSBenjamin Kramer
1085f8f34e4SAdrian Prantl /// Transform an interleaved load into target specific intrinsics.
1091c1e0c9eSHao Liu bool lowerInterleavedLoad(LoadInst *LI,
1101c1e0c9eSHao Liu SmallVector<Instruction *, 32> &DeadInsts);
1111c1e0c9eSHao Liu
1125f8f34e4SAdrian Prantl /// Transform an interleaved store into target specific intrinsics.
1131c1e0c9eSHao Liu bool lowerInterleavedStore(StoreInst *SI,
1141c1e0c9eSHao Liu SmallVector<Instruction *, 32> &DeadInsts);
115476c0afcSMatthew Simpson
1165f8f34e4SAdrian Prantl /// Returns true if the uses of an interleaved load by the
117476c0afcSMatthew Simpson /// extractelement instructions in \p Extracts can be replaced by uses of the
118476c0afcSMatthew Simpson /// shufflevector instructions in \p Shuffles instead. If so, the necessary
119476c0afcSMatthew Simpson /// replacements are also performed.
120476c0afcSMatthew Simpson bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
121476c0afcSMatthew Simpson ArrayRef<ShuffleVectorInst *> Shuffles);
122a4b6b1e1SDavid Green
123a4b6b1e1SDavid Green /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them
124a4b6b1e1SDavid Green /// to binop(shuffle(x), shuffle(y)) to allow the formation of an
125a4b6b1e1SDavid Green /// interleaving load. Any newly created shuffles that operate on \p LI will
126ed936aadSFlorian Hahn /// be added to \p Shuffles. Returns true, if any changes to the IR have been
127ed936aadSFlorian Hahn /// made.
128ed936aadSFlorian Hahn bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
129a4b6b1e1SDavid Green SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
130a4b6b1e1SDavid Green LoadInst *LI);
1311c1e0c9eSHao Liu };
132f1933329SEugene Zelenko
1331c1e0c9eSHao Liu } // end anonymous namespace.
1341c1e0c9eSHao Liu
1351c1e0c9eSHao Liu char InterleavedAccess::ID = 0;
136f1933329SEugene Zelenko
1371527baabSMatthias Braun INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE,
138476c0afcSMatthew Simpson "Lower interleaved memory accesses to target specific intrinsics", false,
139476c0afcSMatthew Simpson false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)140476c0afcSMatthew Simpson INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1411527baabSMatthias Braun INITIALIZE_PASS_END(InterleavedAccess, DEBUG_TYPE,
142476c0afcSMatthew Simpson "Lower interleaved memory accesses to target specific intrinsics", false,
143476c0afcSMatthew Simpson false)
1441c1e0c9eSHao Liu
1458b61764cSFrancis Visoiu Mistrih FunctionPass *llvm::createInterleavedAccessPass() {
1468b61764cSFrancis Visoiu Mistrih return new InterleavedAccess();
1471c1e0c9eSHao Liu }
1481c1e0c9eSHao Liu
1495f8f34e4SAdrian Prantl /// Check if the mask is a DE-interleave mask of the given factor
1501c1e0c9eSHao Liu /// \p Factor like:
1511c1e0c9eSHao Liu /// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
isDeInterleaveMaskOfFactor(ArrayRef<int> Mask,unsigned Factor,unsigned & Index)1521c1e0c9eSHao Liu static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
1531c1e0c9eSHao Liu unsigned &Index) {
1541c1e0c9eSHao Liu // Check all potential start indices from 0 to (Factor - 1).
1551c1e0c9eSHao Liu for (Index = 0; Index < Factor; Index++) {
1561c1e0c9eSHao Liu unsigned i = 0;
1571c1e0c9eSHao Liu
1581c1e0c9eSHao Liu // Check that elements are in ascending order by Factor. Ignore undef
1591c1e0c9eSHao Liu // elements.
1601c1e0c9eSHao Liu for (; i < Mask.size(); i++)
1611c1e0c9eSHao Liu if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor)
1621c1e0c9eSHao Liu break;
1631c1e0c9eSHao Liu
1641c1e0c9eSHao Liu if (i == Mask.size())
1651c1e0c9eSHao Liu return true;
1661c1e0c9eSHao Liu }
1671c1e0c9eSHao Liu
1681c1e0c9eSHao Liu return false;
1691c1e0c9eSHao Liu }
1701c1e0c9eSHao Liu
1715f8f34e4SAdrian Prantl /// Check if the mask is a DE-interleave mask for an interleaved load.
1721c1e0c9eSHao Liu ///
1731c1e0c9eSHao Liu /// E.g. DE-interleave masks (Factor = 2) could be:
1741c1e0c9eSHao Liu /// <0, 2, 4, 6> (mask of index 0 to extract even elements)
1751c1e0c9eSHao Liu /// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
isDeInterleaveMask(ArrayRef<int> Mask,unsigned & Factor,unsigned & Index,unsigned MaxFactor,unsigned NumLoadElements)1761c1e0c9eSHao Liu static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
17796f295e2SEli Friedman unsigned &Index, unsigned MaxFactor,
17896f295e2SEli Friedman unsigned NumLoadElements) {
1791c1e0c9eSHao Liu if (Mask.size() < 2)
1801c1e0c9eSHao Liu return false;
1811c1e0c9eSHao Liu
1821c1e0c9eSHao Liu // Check potential Factors.
18396f295e2SEli Friedman for (Factor = 2; Factor <= MaxFactor; Factor++) {
18496f295e2SEli Friedman // Make sure we don't produce a load wider than the input load.
18596f295e2SEli Friedman if (Mask.size() * Factor > NumLoadElements)
18696f295e2SEli Friedman return false;
1871c1e0c9eSHao Liu if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
1881c1e0c9eSHao Liu return true;
18996f295e2SEli Friedman }
1901c1e0c9eSHao Liu
1911c1e0c9eSHao Liu return false;
1921c1e0c9eSHao Liu }
1931c1e0c9eSHao Liu
1945f8f34e4SAdrian Prantl /// Check if the mask can be used in an interleaved store.
19577c5eaaeSAlina Sbirlea //
19677c5eaaeSAlina Sbirlea /// It checks for a more general pattern than the RE-interleave mask.
19777c5eaaeSAlina Sbirlea /// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
19877c5eaaeSAlina Sbirlea /// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
19977c5eaaeSAlina Sbirlea /// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
20077c5eaaeSAlina Sbirlea /// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
2011c1e0c9eSHao Liu ///
20277c5eaaeSAlina Sbirlea /// The particular case of an RE-interleave mask is:
20377c5eaaeSAlina Sbirlea /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
20477c5eaaeSAlina Sbirlea /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
isReInterleaveMask(ArrayRef<int> Mask,unsigned & Factor,unsigned MaxFactor,unsigned OpNumElts)2051e425c9fSBenjamin Kramer static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
20601fa9622SMatthias Braun unsigned MaxFactor, unsigned OpNumElts) {
2071c1e0c9eSHao Liu unsigned NumElts = Mask.size();
2081c1e0c9eSHao Liu if (NumElts < 4)
2091c1e0c9eSHao Liu return false;
2101c1e0c9eSHao Liu
2111c1e0c9eSHao Liu // Check potential Factors.
2121c1e0c9eSHao Liu for (Factor = 2; Factor <= MaxFactor; Factor++) {
2131c1e0c9eSHao Liu if (NumElts % Factor)
2141c1e0c9eSHao Liu continue;
2151c1e0c9eSHao Liu
21677c5eaaeSAlina Sbirlea unsigned LaneLen = NumElts / Factor;
21777c5eaaeSAlina Sbirlea if (!isPowerOf2_32(LaneLen))
2181c1e0c9eSHao Liu continue;
2191c1e0c9eSHao Liu
22077c5eaaeSAlina Sbirlea // Check whether each element matches the general interleaved rule.
22177c5eaaeSAlina Sbirlea // Ignore undef elements, as long as the defined elements match the rule.
22277c5eaaeSAlina Sbirlea // Outer loop processes all factors (x, y, z in the above example)
22377c5eaaeSAlina Sbirlea unsigned I = 0, J;
22477c5eaaeSAlina Sbirlea for (; I < Factor; I++) {
22577c5eaaeSAlina Sbirlea unsigned SavedLaneValue;
22677c5eaaeSAlina Sbirlea unsigned SavedNoUndefs = 0;
22777c5eaaeSAlina Sbirlea
22877c5eaaeSAlina Sbirlea // Inner loop processes consecutive accesses (x, x+1... in the example)
22977c5eaaeSAlina Sbirlea for (J = 0; J < LaneLen - 1; J++) {
23077c5eaaeSAlina Sbirlea // Lane computes x's position in the Mask
23177c5eaaeSAlina Sbirlea unsigned Lane = J * Factor + I;
23277c5eaaeSAlina Sbirlea unsigned NextLane = Lane + Factor;
23377c5eaaeSAlina Sbirlea int LaneValue = Mask[Lane];
23477c5eaaeSAlina Sbirlea int NextLaneValue = Mask[NextLane];
23577c5eaaeSAlina Sbirlea
23677c5eaaeSAlina Sbirlea // If both are defined, values must be sequential
23777c5eaaeSAlina Sbirlea if (LaneValue >= 0 && NextLaneValue >= 0 &&
23877c5eaaeSAlina Sbirlea LaneValue + 1 != NextLaneValue)
2391c1e0c9eSHao Liu break;
2401c1e0c9eSHao Liu
24177c5eaaeSAlina Sbirlea // If the next value is undef, save the current one as reference
24277c5eaaeSAlina Sbirlea if (LaneValue >= 0 && NextLaneValue < 0) {
24377c5eaaeSAlina Sbirlea SavedLaneValue = LaneValue;
24477c5eaaeSAlina Sbirlea SavedNoUndefs = 1;
24577c5eaaeSAlina Sbirlea }
24677c5eaaeSAlina Sbirlea
24777c5eaaeSAlina Sbirlea // Undefs are allowed, but defined elements must still be consecutive:
24877c5eaaeSAlina Sbirlea // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
24977c5eaaeSAlina Sbirlea // Verify this by storing the last non-undef followed by an undef
25077c5eaaeSAlina Sbirlea // Check that following non-undef masks are incremented with the
25177c5eaaeSAlina Sbirlea // corresponding distance.
25277c5eaaeSAlina Sbirlea if (SavedNoUndefs > 0 && LaneValue < 0) {
25377c5eaaeSAlina Sbirlea SavedNoUndefs++;
25477c5eaaeSAlina Sbirlea if (NextLaneValue >= 0 &&
25577c5eaaeSAlina Sbirlea SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
25677c5eaaeSAlina Sbirlea break;
25777c5eaaeSAlina Sbirlea }
25877c5eaaeSAlina Sbirlea }
25977c5eaaeSAlina Sbirlea
26077c5eaaeSAlina Sbirlea if (J < LaneLen - 1)
26177c5eaaeSAlina Sbirlea break;
26277c5eaaeSAlina Sbirlea
26377c5eaaeSAlina Sbirlea int StartMask = 0;
26477c5eaaeSAlina Sbirlea if (Mask[I] >= 0) {
26577c5eaaeSAlina Sbirlea // Check that the start of the I range (J=0) is greater than 0
26677c5eaaeSAlina Sbirlea StartMask = Mask[I];
26777c5eaaeSAlina Sbirlea } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
26877c5eaaeSAlina Sbirlea // StartMask defined by the last value in lane
26977c5eaaeSAlina Sbirlea StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
27077c5eaaeSAlina Sbirlea } else if (SavedNoUndefs > 0) {
27177c5eaaeSAlina Sbirlea // StartMask defined by some non-zero value in the j loop
27277c5eaaeSAlina Sbirlea StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
27377c5eaaeSAlina Sbirlea }
27477c5eaaeSAlina Sbirlea // else StartMask remains set to 0, i.e. all elements are undefs
27577c5eaaeSAlina Sbirlea
27677c5eaaeSAlina Sbirlea if (StartMask < 0)
27777c5eaaeSAlina Sbirlea break;
27801fa9622SMatthias Braun // We must stay within the vectors; This case can happen with undefs.
27901fa9622SMatthias Braun if (StartMask + LaneLen > OpNumElts*2)
28001fa9622SMatthias Braun break;
28177c5eaaeSAlina Sbirlea }
28277c5eaaeSAlina Sbirlea
28377c5eaaeSAlina Sbirlea // Found an interleaved mask of current factor.
28477c5eaaeSAlina Sbirlea if (I == Factor)
2851c1e0c9eSHao Liu return true;
2861c1e0c9eSHao Liu }
2871c1e0c9eSHao Liu
2881c1e0c9eSHao Liu return false;
2891c1e0c9eSHao Liu }
2901c1e0c9eSHao Liu
lowerInterleavedLoad(LoadInst * LI,SmallVector<Instruction *,32> & DeadInsts)2911c1e0c9eSHao Liu bool InterleavedAccess::lowerInterleavedLoad(
2921c1e0c9eSHao Liu LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) {
293ff5b9a7bSChristopher Tetreault if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
2941c1e0c9eSHao Liu return false;
2951c1e0c9eSHao Liu
296a4b6b1e1SDavid Green // Check if all users of this load are shufflevectors. If we encounter any
297a4b6b1e1SDavid Green // users that are extractelement instructions or binary operators, we save
298a4b6b1e1SDavid Green // them to later check if they can be modified to extract from one of the
299a4b6b1e1SDavid Green // shufflevectors instead of the load.
300a4b6b1e1SDavid Green
3011c1e0c9eSHao Liu SmallVector<ShuffleVectorInst *, 4> Shuffles;
302476c0afcSMatthew Simpson SmallVector<ExtractElementInst *, 4> Extracts;
303a4b6b1e1SDavid Green // BinOpShuffles need to be handled a single time in case both operands of the
304a4b6b1e1SDavid Green // binop are the same load.
305a4b6b1e1SDavid Green SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
3061c1e0c9eSHao Liu
307a4b6b1e1SDavid Green for (auto *User : LI->users()) {
308a4b6b1e1SDavid Green auto *Extract = dyn_cast<ExtractElementInst>(User);
309476c0afcSMatthew Simpson if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
310476c0afcSMatthew Simpson Extracts.push_back(Extract);
311476c0afcSMatthew Simpson continue;
312476c0afcSMatthew Simpson }
31328b41237SDavid Green if (auto *BI = dyn_cast<BinaryOperator>(User)) {
31428b41237SDavid Green if (all_of(BI->users(),
31528b41237SDavid Green [](auto *U) { return isa<ShuffleVectorInst>(U); })) {
31628b41237SDavid Green for (auto *SVI : BI->users())
31728b41237SDavid Green BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI));
318a4b6b1e1SDavid Green continue;
319a4b6b1e1SDavid Green }
320a4b6b1e1SDavid Green }
321a4b6b1e1SDavid Green auto *SVI = dyn_cast<ShuffleVectorInst>(User);
3221c1e0c9eSHao Liu if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
3231c1e0c9eSHao Liu return false;
3241c1e0c9eSHao Liu
3251c1e0c9eSHao Liu Shuffles.push_back(SVI);
3261c1e0c9eSHao Liu }
3271c1e0c9eSHao Liu
328a4b6b1e1SDavid Green if (Shuffles.empty() && BinOpShuffles.empty())
3291c1e0c9eSHao Liu return false;
3301c1e0c9eSHao Liu
3311c1e0c9eSHao Liu unsigned Factor, Index;
3321c1e0c9eSHao Liu
333ff5b9a7bSChristopher Tetreault unsigned NumLoadElements =
334ff5b9a7bSChristopher Tetreault cast<FixedVectorType>(LI->getType())->getNumElements();
335a4b6b1e1SDavid Green auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
3361c1e0c9eSHao Liu // Check if the first shufflevector is DE-interleave shuffle.
337a4b6b1e1SDavid Green if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
338a4b6b1e1SDavid Green NumLoadElements))
3391c1e0c9eSHao Liu return false;
3401c1e0c9eSHao Liu
3411c1e0c9eSHao Liu // Holds the corresponding index for each DE-interleave shuffle.
3421c1e0c9eSHao Liu SmallVector<unsigned, 4> Indices;
3431c1e0c9eSHao Liu
344a4b6b1e1SDavid Green Type *VecTy = FirstSVI->getType();
3451c1e0c9eSHao Liu
3461c1e0c9eSHao Liu // Check if other shufflevectors are also DE-interleaved of the same type
3471c1e0c9eSHao Liu // and factor as the first shufflevector.
348a4b6b1e1SDavid Green for (auto *Shuffle : Shuffles) {
349a4b6b1e1SDavid Green if (Shuffle->getType() != VecTy)
3501c1e0c9eSHao Liu return false;
351a4b6b1e1SDavid Green if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
3521c1e0c9eSHao Liu Index))
3531c1e0c9eSHao Liu return false;
3541c1e0c9eSHao Liu
3559f2d9364SJuneyoung Lee assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
3561c1e0c9eSHao Liu Indices.push_back(Index);
3571c1e0c9eSHao Liu }
358a4b6b1e1SDavid Green for (auto *Shuffle : BinOpShuffles) {
359a4b6b1e1SDavid Green if (Shuffle->getType() != VecTy)
360a4b6b1e1SDavid Green return false;
361a4b6b1e1SDavid Green if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
362a4b6b1e1SDavid Green Index))
363a4b6b1e1SDavid Green return false;
364a4b6b1e1SDavid Green
3659f2d9364SJuneyoung Lee assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
3669f2d9364SJuneyoung Lee
367a4b6b1e1SDavid Green if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
368a4b6b1e1SDavid Green Indices.push_back(Index);
369a4b6b1e1SDavid Green if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
370a4b6b1e1SDavid Green Indices.push_back(Index);
371a4b6b1e1SDavid Green }
3721c1e0c9eSHao Liu
373476c0afcSMatthew Simpson // Try and modify users of the load that are extractelement instructions to
374476c0afcSMatthew Simpson // use the shufflevector instructions instead of the load.
375476c0afcSMatthew Simpson if (!tryReplaceExtracts(Extracts, Shuffles))
376476c0afcSMatthew Simpson return false;
377ed936aadSFlorian Hahn
378ed936aadSFlorian Hahn bool BinOpShuffleChanged =
379ed936aadSFlorian Hahn replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
380476c0afcSMatthew Simpson
381d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
3821c1e0c9eSHao Liu
3831c1e0c9eSHao Liu // Try to create target specific intrinsics to replace the load and shuffles.
384ed936aadSFlorian Hahn if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
385ed936aadSFlorian Hahn // If Extracts is not empty, tryReplaceExtracts made changes earlier.
386ed936aadSFlorian Hahn return !Extracts.empty() || BinOpShuffleChanged;
387ed936aadSFlorian Hahn }
3881c1e0c9eSHao Liu
3890da15ea5SKazu Hirata append_range(DeadInsts, Shuffles);
3901c1e0c9eSHao Liu
3911c1e0c9eSHao Liu DeadInsts.push_back(LI);
3921c1e0c9eSHao Liu return true;
3931c1e0c9eSHao Liu }
3941c1e0c9eSHao Liu
replaceBinOpShuffles(ArrayRef<ShuffleVectorInst * > BinOpShuffles,SmallVectorImpl<ShuffleVectorInst * > & Shuffles,LoadInst * LI)395ed936aadSFlorian Hahn bool InterleavedAccess::replaceBinOpShuffles(
396a4b6b1e1SDavid Green ArrayRef<ShuffleVectorInst *> BinOpShuffles,
397a4b6b1e1SDavid Green SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
398a4b6b1e1SDavid Green for (auto *SVI : BinOpShuffles) {
399a4b6b1e1SDavid Green BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
4009f2d9364SJuneyoung Lee Type *BIOp0Ty = BI->getOperand(0)->getType();
401a4b6b1e1SDavid Green ArrayRef<int> Mask = SVI->getShuffleMask();
4029f2d9364SJuneyoung Lee assert(all_of(Mask, [&](int Idx) {
4039f2d9364SJuneyoung Lee return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
4049f2d9364SJuneyoung Lee }));
405a4b6b1e1SDavid Green
4069f2d9364SJuneyoung Lee auto *NewSVI1 =
4079f2d9364SJuneyoung Lee new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty),
4089f2d9364SJuneyoung Lee Mask, SVI->getName(), SVI);
409a4b6b1e1SDavid Green auto *NewSVI2 = new ShuffleVectorInst(
4109f2d9364SJuneyoung Lee BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
411a4b6b1e1SDavid Green SVI->getName(), SVI);
412fda8b471SDavid Green BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags(
413fda8b471SDavid Green BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI);
414a4b6b1e1SDavid Green SVI->replaceAllUsesWith(NewBI);
415a4b6b1e1SDavid Green LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
416a4b6b1e1SDavid Green << "\n With : " << *NewSVI1 << "\n And : "
417a4b6b1e1SDavid Green << *NewSVI2 << "\n And : " << *NewBI << "\n");
418a4b6b1e1SDavid Green RecursivelyDeleteTriviallyDeadInstructions(SVI);
419a4b6b1e1SDavid Green if (NewSVI1->getOperand(0) == LI)
420a4b6b1e1SDavid Green Shuffles.push_back(NewSVI1);
421a4b6b1e1SDavid Green if (NewSVI2->getOperand(0) == LI)
422a4b6b1e1SDavid Green Shuffles.push_back(NewSVI2);
423a4b6b1e1SDavid Green }
424ed936aadSFlorian Hahn
425ed936aadSFlorian Hahn return !BinOpShuffles.empty();
426a4b6b1e1SDavid Green }
427a4b6b1e1SDavid Green
tryReplaceExtracts(ArrayRef<ExtractElementInst * > Extracts,ArrayRef<ShuffleVectorInst * > Shuffles)428476c0afcSMatthew Simpson bool InterleavedAccess::tryReplaceExtracts(
429476c0afcSMatthew Simpson ArrayRef<ExtractElementInst *> Extracts,
430476c0afcSMatthew Simpson ArrayRef<ShuffleVectorInst *> Shuffles) {
431476c0afcSMatthew Simpson // If there aren't any extractelement instructions to modify, there's nothing
432476c0afcSMatthew Simpson // to do.
433476c0afcSMatthew Simpson if (Extracts.empty())
434476c0afcSMatthew Simpson return true;
435476c0afcSMatthew Simpson
436476c0afcSMatthew Simpson // Maps extractelement instructions to vector-index pairs. The extractlement
437476c0afcSMatthew Simpson // instructions will be modified to use the new vector and index operands.
438476c0afcSMatthew Simpson DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap;
439476c0afcSMatthew Simpson
440476c0afcSMatthew Simpson for (auto *Extract : Extracts) {
441476c0afcSMatthew Simpson // The vector index that is extracted.
442476c0afcSMatthew Simpson auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
443476c0afcSMatthew Simpson auto Index = IndexOperand->getSExtValue();
444476c0afcSMatthew Simpson
445476c0afcSMatthew Simpson // Look for a suitable shufflevector instruction. The goal is to modify the
446476c0afcSMatthew Simpson // extractelement instruction (which uses an interleaved load) to use one
447476c0afcSMatthew Simpson // of the shufflevector instructions instead of the load.
448476c0afcSMatthew Simpson for (auto *Shuffle : Shuffles) {
449476c0afcSMatthew Simpson // If the shufflevector instruction doesn't dominate the extract, we
450476c0afcSMatthew Simpson // can't create a use of it.
451476c0afcSMatthew Simpson if (!DT->dominates(Shuffle, Extract))
452476c0afcSMatthew Simpson continue;
453476c0afcSMatthew Simpson
454476c0afcSMatthew Simpson // Inspect the indices of the shufflevector instruction. If the shuffle
455476c0afcSMatthew Simpson // selects the same index that is extracted, we can modify the
456476c0afcSMatthew Simpson // extractelement instruction.
457476c0afcSMatthew Simpson SmallVector<int, 4> Indices;
458476c0afcSMatthew Simpson Shuffle->getShuffleMask(Indices);
459476c0afcSMatthew Simpson for (unsigned I = 0; I < Indices.size(); ++I)
460476c0afcSMatthew Simpson if (Indices[I] == Index) {
461476c0afcSMatthew Simpson assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
462476c0afcSMatthew Simpson "Vector operations do not match");
463476c0afcSMatthew Simpson ReplacementMap[Extract] = std::make_pair(Shuffle, I);
464476c0afcSMatthew Simpson break;
465476c0afcSMatthew Simpson }
466476c0afcSMatthew Simpson
467476c0afcSMatthew Simpson // If we found a suitable shufflevector instruction, stop looking.
468476c0afcSMatthew Simpson if (ReplacementMap.count(Extract))
469476c0afcSMatthew Simpson break;
470476c0afcSMatthew Simpson }
471476c0afcSMatthew Simpson
472476c0afcSMatthew Simpson // If we did not find a suitable shufflevector instruction, the
473476c0afcSMatthew Simpson // extractelement instruction cannot be modified, so we must give up.
474476c0afcSMatthew Simpson if (!ReplacementMap.count(Extract))
475476c0afcSMatthew Simpson return false;
476476c0afcSMatthew Simpson }
477476c0afcSMatthew Simpson
478476c0afcSMatthew Simpson // Finally, perform the replacements.
479476c0afcSMatthew Simpson IRBuilder<> Builder(Extracts[0]->getContext());
480476c0afcSMatthew Simpson for (auto &Replacement : ReplacementMap) {
481476c0afcSMatthew Simpson auto *Extract = Replacement.first;
482476c0afcSMatthew Simpson auto *Vector = Replacement.second.first;
483476c0afcSMatthew Simpson auto Index = Replacement.second.second;
484476c0afcSMatthew Simpson Builder.SetInsertPoint(Extract);
485476c0afcSMatthew Simpson Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
486476c0afcSMatthew Simpson Extract->eraseFromParent();
487476c0afcSMatthew Simpson }
488476c0afcSMatthew Simpson
489476c0afcSMatthew Simpson return true;
490476c0afcSMatthew Simpson }
491476c0afcSMatthew Simpson
lowerInterleavedStore(StoreInst * SI,SmallVector<Instruction *,32> & DeadInsts)4921c1e0c9eSHao Liu bool InterleavedAccess::lowerInterleavedStore(
4931c1e0c9eSHao Liu StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) {
4941c1e0c9eSHao Liu if (!SI->isSimple())
4951c1e0c9eSHao Liu return false;
4961c1e0c9eSHao Liu
497a4b6b1e1SDavid Green auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
498ff5b9a7bSChristopher Tetreault if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
4991c1e0c9eSHao Liu return false;
5001c1e0c9eSHao Liu
5011c1e0c9eSHao Liu // Check if the shufflevector is RE-interleave shuffle.
5021c1e0c9eSHao Liu unsigned Factor;
503889f6606SChristopher Tetreault unsigned OpNumElts =
504ff5b9a7bSChristopher Tetreault cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
50501fa9622SMatthias Braun if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
5061c1e0c9eSHao Liu return false;
5071c1e0c9eSHao Liu
508d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
5091c1e0c9eSHao Liu
5101c1e0c9eSHao Liu // Try to create target specific intrinsics to replace the store and shuffle.
5111c1e0c9eSHao Liu if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
5121c1e0c9eSHao Liu return false;
5131c1e0c9eSHao Liu
5141c1e0c9eSHao Liu // Already have a new target specific interleaved store. Erase the old store.
5151c1e0c9eSHao Liu DeadInsts.push_back(SI);
5161c1e0c9eSHao Liu DeadInsts.push_back(SVI);
5171c1e0c9eSHao Liu return true;
5181c1e0c9eSHao Liu }
5191c1e0c9eSHao Liu
runOnFunction(Function & F)5201c1e0c9eSHao Liu bool InterleavedAccess::runOnFunction(Function &F) {
5218b61764cSFrancis Visoiu Mistrih auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
5228b61764cSFrancis Visoiu Mistrih if (!TPC || !LowerInterleavedAccesses)
5231c1e0c9eSHao Liu return false;
5241c1e0c9eSHao Liu
525d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
5261c1e0c9eSHao Liu
527476c0afcSMatthew Simpson DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5288b61764cSFrancis Visoiu Mistrih auto &TM = TPC->getTM<TargetMachine>();
5298b61764cSFrancis Visoiu Mistrih TLI = TM.getSubtargetImpl(F)->getTargetLowering();
5301c1e0c9eSHao Liu MaxFactor = TLI->getMaxSupportedInterleaveFactor();
5311c1e0c9eSHao Liu
5321c1e0c9eSHao Liu // Holds dead instructions that will be erased later.
5331c1e0c9eSHao Liu SmallVector<Instruction *, 32> DeadInsts;
5341c1e0c9eSHao Liu bool Changed = false;
5351c1e0c9eSHao Liu
53678199518SNico Rieck for (auto &I : instructions(F)) {
537a4b6b1e1SDavid Green if (auto *LI = dyn_cast<LoadInst>(&I))
5381c1e0c9eSHao Liu Changed |= lowerInterleavedLoad(LI, DeadInsts);
5391c1e0c9eSHao Liu
540a4b6b1e1SDavid Green if (auto *SI = dyn_cast<StoreInst>(&I))
5411c1e0c9eSHao Liu Changed |= lowerInterleavedStore(SI, DeadInsts);
5421c1e0c9eSHao Liu }
5431c1e0c9eSHao Liu
544*9e6d1f4bSKazu Hirata for (auto *I : DeadInsts)
5451c1e0c9eSHao Liu I->eraseFromParent();
5461c1e0c9eSHao Liu
5471c1e0c9eSHao Liu return Changed;
5481c1e0c9eSHao Liu }
549