109f2f960SAnna Thomas //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
29727c77dSDavid Green // intrinsics
309f2f960SAnna Thomas //
409f2f960SAnna Thomas // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
509f2f960SAnna Thomas // See https://llvm.org/LICENSE.txt for license information.
609f2f960SAnna Thomas // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
709f2f960SAnna Thomas //
809f2f960SAnna Thomas //===----------------------------------------------------------------------===//
909f2f960SAnna Thomas //
1009f2f960SAnna Thomas // This pass replaces masked memory intrinsics - when unsupported by the target
1109f2f960SAnna Thomas // - with a chain of basic blocks, that deal with the elements one-by-one if the
1209f2f960SAnna Thomas // appropriate mask bit is set.
1309f2f960SAnna Thomas //
1409f2f960SAnna Thomas //===----------------------------------------------------------------------===//
1509f2f960SAnna Thomas
1629356e32SAnna Thomas #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
1709f2f960SAnna Thomas #include "llvm/ADT/Twine.h"
1805638592SRoman Lebedev #include "llvm/Analysis/DomTreeUpdater.h"
1909f2f960SAnna Thomas #include "llvm/Analysis/TargetTransformInfo.h"
2009f2f960SAnna Thomas #include "llvm/IR/BasicBlock.h"
2109f2f960SAnna Thomas #include "llvm/IR/Constant.h"
2209f2f960SAnna Thomas #include "llvm/IR/Constants.h"
2309f2f960SAnna Thomas #include "llvm/IR/DerivedTypes.h"
2405638592SRoman Lebedev #include "llvm/IR/Dominators.h"
2509f2f960SAnna Thomas #include "llvm/IR/Function.h"
2609f2f960SAnna Thomas #include "llvm/IR/IRBuilder.h"
2709f2f960SAnna Thomas #include "llvm/IR/Instruction.h"
2809f2f960SAnna Thomas #include "llvm/IR/Instructions.h"
2909f2f960SAnna Thomas #include "llvm/IR/IntrinsicInst.h"
3009f2f960SAnna Thomas #include "llvm/IR/Type.h"
3109f2f960SAnna Thomas #include "llvm/IR/Value.h"
3209f2f960SAnna Thomas #include "llvm/InitializePasses.h"
3309f2f960SAnna Thomas #include "llvm/Pass.h"
3409f2f960SAnna Thomas #include "llvm/Support/Casting.h"
355f18e2f3SBenjamin Kramer #include "llvm/Transforms/Scalar.h"
360ea45a41SRoman Lebedev #include "llvm/Transforms/Utils/BasicBlockUtils.h"
3709f2f960SAnna Thomas #include <cassert>
3809f2f960SAnna Thomas
3909f2f960SAnna Thomas using namespace llvm;
4009f2f960SAnna Thomas
4109f2f960SAnna Thomas #define DEBUG_TYPE "scalarize-masked-mem-intrin"
4209f2f960SAnna Thomas
4309f2f960SAnna Thomas namespace {
4409f2f960SAnna Thomas
4529356e32SAnna Thomas class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
4609f2f960SAnna Thomas public:
4709f2f960SAnna Thomas static char ID; // Pass identification, replacement for typeid
4809f2f960SAnna Thomas
ScalarizeMaskedMemIntrinLegacyPass()4929356e32SAnna Thomas explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
5029356e32SAnna Thomas initializeScalarizeMaskedMemIntrinLegacyPassPass(
5129356e32SAnna Thomas *PassRegistry::getPassRegistry());
5209f2f960SAnna Thomas }
5309f2f960SAnna Thomas
5409f2f960SAnna Thomas bool runOnFunction(Function &F) override;
5509f2f960SAnna Thomas
getPassName() const5609f2f960SAnna Thomas StringRef getPassName() const override {
5709f2f960SAnna Thomas return "Scalarize Masked Memory Intrinsics";
5809f2f960SAnna Thomas }
5909f2f960SAnna Thomas
getAnalysisUsage(AnalysisUsage & AU) const6009f2f960SAnna Thomas void getAnalysisUsage(AnalysisUsage &AU) const override {
6109f2f960SAnna Thomas AU.addRequired<TargetTransformInfoWrapperPass>();
6205638592SRoman Lebedev AU.addPreserved<DominatorTreeWrapperPass>();
6309f2f960SAnna Thomas }
6409f2f960SAnna Thomas };
6509f2f960SAnna Thomas
6609f2f960SAnna Thomas } // end anonymous namespace
6709f2f960SAnna Thomas
6809f2f960SAnna Thomas static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
6905638592SRoman Lebedev const TargetTransformInfo &TTI, const DataLayout &DL,
7005638592SRoman Lebedev DomTreeUpdater *DTU);
7109f2f960SAnna Thomas static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
7209f2f960SAnna Thomas const TargetTransformInfo &TTI,
7305638592SRoman Lebedev const DataLayout &DL, DomTreeUpdater *DTU);
7409f2f960SAnna Thomas
7529356e32SAnna Thomas char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
7609f2f960SAnna Thomas
777113de30SMariya Podchishchaeva INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
787113de30SMariya Podchishchaeva "Scalarize unsupported masked memory intrinsics", false,
797113de30SMariya Podchishchaeva false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)807113de30SMariya Podchishchaeva INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
8105638592SRoman Lebedev INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
827113de30SMariya Podchishchaeva INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
837113de30SMariya Podchishchaeva "Scalarize unsupported masked memory intrinsics", false,
847113de30SMariya Podchishchaeva false)
8509f2f960SAnna Thomas
8629356e32SAnna Thomas FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() {
8729356e32SAnna Thomas return new ScalarizeMaskedMemIntrinLegacyPass();
8809f2f960SAnna Thomas }
8909f2f960SAnna Thomas
isConstantIntVector(Value * Mask)9009f2f960SAnna Thomas static bool isConstantIntVector(Value *Mask) {
9109f2f960SAnna Thomas Constant *C = dyn_cast<Constant>(Mask);
9209f2f960SAnna Thomas if (!C)
9309f2f960SAnna Thomas return false;
9409f2f960SAnna Thomas
9509f2f960SAnna Thomas unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
9609f2f960SAnna Thomas for (unsigned i = 0; i != NumElts; ++i) {
9709f2f960SAnna Thomas Constant *CElt = C->getAggregateElement(i);
9809f2f960SAnna Thomas if (!CElt || !isa<ConstantInt>(CElt))
9909f2f960SAnna Thomas return false;
10009f2f960SAnna Thomas }
10109f2f960SAnna Thomas
10209f2f960SAnna Thomas return true;
10309f2f960SAnna Thomas }
10409f2f960SAnna Thomas
adjustForEndian(const DataLayout & DL,unsigned VectorWidth,unsigned Idx)1059498315cSMarkus Lavin static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
1069498315cSMarkus Lavin unsigned Idx) {
1079498315cSMarkus Lavin return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
1089498315cSMarkus Lavin }
1099498315cSMarkus Lavin
11009f2f960SAnna Thomas // Translate a masked load intrinsic like
11109f2f960SAnna Thomas // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
11209f2f960SAnna Thomas // <16 x i1> %mask, <16 x i32> %passthru)
11309f2f960SAnna Thomas // to a chain of basic blocks, with loading element one-by-one if
11409f2f960SAnna Thomas // the appropriate mask bit is set
11509f2f960SAnna Thomas //
11609f2f960SAnna Thomas // %1 = bitcast i8* %addr to i32*
11709f2f960SAnna Thomas // %2 = extractelement <16 x i1> %mask, i32 0
11809f2f960SAnna Thomas // br i1 %2, label %cond.load, label %else
11909f2f960SAnna Thomas //
12009f2f960SAnna Thomas // cond.load: ; preds = %0
12109f2f960SAnna Thomas // %3 = getelementptr i32* %1, i32 0
12209f2f960SAnna Thomas // %4 = load i32* %3
12309f2f960SAnna Thomas // %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
12409f2f960SAnna Thomas // br label %else
12509f2f960SAnna Thomas //
12609f2f960SAnna Thomas // else: ; preds = %0, %cond.load
12709f2f960SAnna Thomas // %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
12809f2f960SAnna Thomas // %6 = extractelement <16 x i1> %mask, i32 1
12909f2f960SAnna Thomas // br i1 %6, label %cond.load1, label %else2
13009f2f960SAnna Thomas //
13109f2f960SAnna Thomas // cond.load1: ; preds = %else
13209f2f960SAnna Thomas // %7 = getelementptr i32* %1, i32 1
13309f2f960SAnna Thomas // %8 = load i32* %7
13409f2f960SAnna Thomas // %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
13509f2f960SAnna Thomas // br label %else2
13609f2f960SAnna Thomas //
13709f2f960SAnna Thomas // else2: ; preds = %else, %cond.load1
13809f2f960SAnna Thomas // %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
13909f2f960SAnna Thomas // %10 = extractelement <16 x i1> %mask, i32 2
14009f2f960SAnna Thomas // br i1 %10, label %cond.load4, label %else5
14109f2f960SAnna Thomas //
scalarizeMaskedLoad(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)1429498315cSMarkus Lavin static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI,
1439498315cSMarkus Lavin DomTreeUpdater *DTU, bool &ModifiedDT) {
14409f2f960SAnna Thomas Value *Ptr = CI->getArgOperand(0);
14509f2f960SAnna Thomas Value *Alignment = CI->getArgOperand(1);
14609f2f960SAnna Thomas Value *Mask = CI->getArgOperand(2);
14709f2f960SAnna Thomas Value *Src0 = CI->getArgOperand(3);
14809f2f960SAnna Thomas
14909f2f960SAnna Thomas const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
15009f2f960SAnna Thomas VectorType *VecType = cast<FixedVectorType>(CI->getType());
15109f2f960SAnna Thomas
15209f2f960SAnna Thomas Type *EltTy = VecType->getElementType();
15309f2f960SAnna Thomas
15409f2f960SAnna Thomas IRBuilder<> Builder(CI->getContext());
15509f2f960SAnna Thomas Instruction *InsertPt = CI;
15609f2f960SAnna Thomas BasicBlock *IfBlock = CI->getParent();
15709f2f960SAnna Thomas
15809f2f960SAnna Thomas Builder.SetInsertPoint(InsertPt);
15909f2f960SAnna Thomas Builder.SetCurrentDebugLocation(CI->getDebugLoc());
16009f2f960SAnna Thomas
16109f2f960SAnna Thomas // Short-cut if the mask is all-true.
16209f2f960SAnna Thomas if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
16309f2f960SAnna Thomas Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
16409f2f960SAnna Thomas CI->replaceAllUsesWith(NewI);
16509f2f960SAnna Thomas CI->eraseFromParent();
16609f2f960SAnna Thomas return;
16709f2f960SAnna Thomas }
16809f2f960SAnna Thomas
16909f2f960SAnna Thomas // Adjust alignment for the scalar instruction.
17009f2f960SAnna Thomas const Align AdjustedAlignVal =
17109f2f960SAnna Thomas commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
17209f2f960SAnna Thomas // Bitcast %addr from i8* to EltTy*
17309f2f960SAnna Thomas Type *NewPtrType =
17409f2f960SAnna Thomas EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
17509f2f960SAnna Thomas Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
17609f2f960SAnna Thomas unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
17709f2f960SAnna Thomas
17809f2f960SAnna Thomas // The result vector
17909f2f960SAnna Thomas Value *VResult = Src0;
18009f2f960SAnna Thomas
18109f2f960SAnna Thomas if (isConstantIntVector(Mask)) {
18209f2f960SAnna Thomas for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
18309f2f960SAnna Thomas if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
18409f2f960SAnna Thomas continue;
18509f2f960SAnna Thomas Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
18609f2f960SAnna Thomas LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
18709f2f960SAnna Thomas VResult = Builder.CreateInsertElement(VResult, Load, Idx);
18809f2f960SAnna Thomas }
18909f2f960SAnna Thomas CI->replaceAllUsesWith(VResult);
19009f2f960SAnna Thomas CI->eraseFromParent();
19109f2f960SAnna Thomas return;
19209f2f960SAnna Thomas }
19309f2f960SAnna Thomas
19409f2f960SAnna Thomas // If the mask is not v1i1, use scalar bit test operations. This generates
19509f2f960SAnna Thomas // better results on X86 at least.
19609f2f960SAnna Thomas Value *SclrMask;
19709f2f960SAnna Thomas if (VectorWidth != 1) {
19809f2f960SAnna Thomas Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
19909f2f960SAnna Thomas SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
20009f2f960SAnna Thomas }
20109f2f960SAnna Thomas
20209f2f960SAnna Thomas for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
20309f2f960SAnna Thomas // Fill the "else" block, created in the previous iteration
20409f2f960SAnna Thomas //
20509f2f960SAnna Thomas // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
20609f2f960SAnna Thomas // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
20709f2f960SAnna Thomas // %cond = icmp ne i16 %mask_1, 0
20809f2f960SAnna Thomas // br i1 %mask_1, label %cond.load, label %else
20909f2f960SAnna Thomas //
21009f2f960SAnna Thomas Value *Predicate;
21109f2f960SAnna Thomas if (VectorWidth != 1) {
2129498315cSMarkus Lavin Value *Mask = Builder.getInt(APInt::getOneBitSet(
2139498315cSMarkus Lavin VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
21409f2f960SAnna Thomas Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
21509f2f960SAnna Thomas Builder.getIntN(VectorWidth, 0));
21609f2f960SAnna Thomas } else {
21709f2f960SAnna Thomas Predicate = Builder.CreateExtractElement(Mask, Idx);
21809f2f960SAnna Thomas }
21909f2f960SAnna Thomas
22009f2f960SAnna Thomas // Create "cond" block
22109f2f960SAnna Thomas //
22209f2f960SAnna Thomas // %EltAddr = getelementptr i32* %1, i32 0
22309f2f960SAnna Thomas // %Elt = load i32* %EltAddr
22409f2f960SAnna Thomas // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
22509f2f960SAnna Thomas //
2260ea45a41SRoman Lebedev Instruction *ThenTerm =
22705638592SRoman Lebedev SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
22805638592SRoman Lebedev /*BranchWeights=*/nullptr, DTU);
22909f2f960SAnna Thomas
2300ea45a41SRoman Lebedev BasicBlock *CondBlock = ThenTerm->getParent();
2310ea45a41SRoman Lebedev CondBlock->setName("cond.load");
2320ea45a41SRoman Lebedev
2330ea45a41SRoman Lebedev Builder.SetInsertPoint(CondBlock->getTerminator());
23409f2f960SAnna Thomas Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
23509f2f960SAnna Thomas LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
23609f2f960SAnna Thomas Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
23709f2f960SAnna Thomas
23809f2f960SAnna Thomas // Create "else" block, fill it in the next iteration
2390ea45a41SRoman Lebedev BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
2400ea45a41SRoman Lebedev NewIfBlock->setName("else");
24109f2f960SAnna Thomas BasicBlock *PrevIfBlock = IfBlock;
24209f2f960SAnna Thomas IfBlock = NewIfBlock;
24309f2f960SAnna Thomas
24409f2f960SAnna Thomas // Create the phi to join the new and previous value.
2450ea45a41SRoman Lebedev Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
24609f2f960SAnna Thomas PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
24709f2f960SAnna Thomas Phi->addIncoming(NewVResult, CondBlock);
24809f2f960SAnna Thomas Phi->addIncoming(VResult, PrevIfBlock);
24909f2f960SAnna Thomas VResult = Phi;
25009f2f960SAnna Thomas }
25109f2f960SAnna Thomas
25209f2f960SAnna Thomas CI->replaceAllUsesWith(VResult);
25309f2f960SAnna Thomas CI->eraseFromParent();
25409f2f960SAnna Thomas
25509f2f960SAnna Thomas ModifiedDT = true;
25609f2f960SAnna Thomas }
25709f2f960SAnna Thomas
25809f2f960SAnna Thomas // Translate a masked store intrinsic, like
25909f2f960SAnna Thomas // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
26009f2f960SAnna Thomas // <16 x i1> %mask)
26109f2f960SAnna Thomas // to a chain of basic blocks, that stores element one-by-one if
26209f2f960SAnna Thomas // the appropriate mask bit is set
26309f2f960SAnna Thomas //
26409f2f960SAnna Thomas // %1 = bitcast i8* %addr to i32*
26509f2f960SAnna Thomas // %2 = extractelement <16 x i1> %mask, i32 0
26609f2f960SAnna Thomas // br i1 %2, label %cond.store, label %else
26709f2f960SAnna Thomas //
26809f2f960SAnna Thomas // cond.store: ; preds = %0
26909f2f960SAnna Thomas // %3 = extractelement <16 x i32> %val, i32 0
27009f2f960SAnna Thomas // %4 = getelementptr i32* %1, i32 0
27109f2f960SAnna Thomas // store i32 %3, i32* %4
27209f2f960SAnna Thomas // br label %else
27309f2f960SAnna Thomas //
27409f2f960SAnna Thomas // else: ; preds = %0, %cond.store
27509f2f960SAnna Thomas // %5 = extractelement <16 x i1> %mask, i32 1
27609f2f960SAnna Thomas // br i1 %5, label %cond.store1, label %else2
27709f2f960SAnna Thomas //
27809f2f960SAnna Thomas // cond.store1: ; preds = %else
27909f2f960SAnna Thomas // %6 = extractelement <16 x i32> %val, i32 1
28009f2f960SAnna Thomas // %7 = getelementptr i32* %1, i32 1
28109f2f960SAnna Thomas // store i32 %6, i32* %7
28209f2f960SAnna Thomas // br label %else2
28309f2f960SAnna Thomas // . . .
scalarizeMaskedStore(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)2849498315cSMarkus Lavin static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI,
2859498315cSMarkus Lavin DomTreeUpdater *DTU, bool &ModifiedDT) {
28609f2f960SAnna Thomas Value *Src = CI->getArgOperand(0);
28709f2f960SAnna Thomas Value *Ptr = CI->getArgOperand(1);
28809f2f960SAnna Thomas Value *Alignment = CI->getArgOperand(2);
28909f2f960SAnna Thomas Value *Mask = CI->getArgOperand(3);
29009f2f960SAnna Thomas
29109f2f960SAnna Thomas const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
29209f2f960SAnna Thomas auto *VecType = cast<VectorType>(Src->getType());
29309f2f960SAnna Thomas
29409f2f960SAnna Thomas Type *EltTy = VecType->getElementType();
29509f2f960SAnna Thomas
29609f2f960SAnna Thomas IRBuilder<> Builder(CI->getContext());
29709f2f960SAnna Thomas Instruction *InsertPt = CI;
29809f2f960SAnna Thomas Builder.SetInsertPoint(InsertPt);
29909f2f960SAnna Thomas Builder.SetCurrentDebugLocation(CI->getDebugLoc());
30009f2f960SAnna Thomas
30109f2f960SAnna Thomas // Short-cut if the mask is all-true.
30209f2f960SAnna Thomas if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
30309f2f960SAnna Thomas Builder.CreateAlignedStore(Src, Ptr, AlignVal);
30409f2f960SAnna Thomas CI->eraseFromParent();
30509f2f960SAnna Thomas return;
30609f2f960SAnna Thomas }
30709f2f960SAnna Thomas
30809f2f960SAnna Thomas // Adjust alignment for the scalar instruction.
30909f2f960SAnna Thomas const Align AdjustedAlignVal =
31009f2f960SAnna Thomas commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
31109f2f960SAnna Thomas // Bitcast %addr from i8* to EltTy*
31209f2f960SAnna Thomas Type *NewPtrType =
31309f2f960SAnna Thomas EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
31409f2f960SAnna Thomas Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
31509f2f960SAnna Thomas unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
31609f2f960SAnna Thomas
31709f2f960SAnna Thomas if (isConstantIntVector(Mask)) {
31809f2f960SAnna Thomas for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
31909f2f960SAnna Thomas if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
32009f2f960SAnna Thomas continue;
32109f2f960SAnna Thomas Value *OneElt = Builder.CreateExtractElement(Src, Idx);
32209f2f960SAnna Thomas Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
32309f2f960SAnna Thomas Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
32409f2f960SAnna Thomas }
32509f2f960SAnna Thomas CI->eraseFromParent();
32609f2f960SAnna Thomas return;
32709f2f960SAnna Thomas }
32809f2f960SAnna Thomas
32909f2f960SAnna Thomas // If the mask is not v1i1, use scalar bit test operations. This generates
33009f2f960SAnna Thomas // better results on X86 at least.
33109f2f960SAnna Thomas Value *SclrMask;
33209f2f960SAnna Thomas if (VectorWidth != 1) {
33309f2f960SAnna Thomas Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
33409f2f960SAnna Thomas SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
33509f2f960SAnna Thomas }
33609f2f960SAnna Thomas
33709f2f960SAnna Thomas for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
33809f2f960SAnna Thomas // Fill the "else" block, created in the previous iteration
33909f2f960SAnna Thomas //
34009f2f960SAnna Thomas // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
34109f2f960SAnna Thomas // %cond = icmp ne i16 %mask_1, 0
34209f2f960SAnna Thomas // br i1 %mask_1, label %cond.store, label %else
34309f2f960SAnna Thomas //
34409f2f960SAnna Thomas Value *Predicate;
34509f2f960SAnna Thomas if (VectorWidth != 1) {
3469498315cSMarkus Lavin Value *Mask = Builder.getInt(APInt::getOneBitSet(
3479498315cSMarkus Lavin VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
34809f2f960SAnna Thomas Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
34909f2f960SAnna Thomas Builder.getIntN(VectorWidth, 0));
35009f2f960SAnna Thomas } else {
35109f2f960SAnna Thomas Predicate = Builder.CreateExtractElement(Mask, Idx);
35209f2f960SAnna Thomas }
35309f2f960SAnna Thomas
35409f2f960SAnna Thomas // Create "cond" block
35509f2f960SAnna Thomas //
35609f2f960SAnna Thomas // %OneElt = extractelement <16 x i32> %Src, i32 Idx
35709f2f960SAnna Thomas // %EltAddr = getelementptr i32* %1, i32 0
35809f2f960SAnna Thomas // %store i32 %OneElt, i32* %EltAddr
35909f2f960SAnna Thomas //
36022b84211SRoman Lebedev Instruction *ThenTerm =
36105638592SRoman Lebedev SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
36205638592SRoman Lebedev /*BranchWeights=*/nullptr, DTU);
36309f2f960SAnna Thomas
36422b84211SRoman Lebedev BasicBlock *CondBlock = ThenTerm->getParent();
36522b84211SRoman Lebedev CondBlock->setName("cond.store");
36622b84211SRoman Lebedev
36722b84211SRoman Lebedev Builder.SetInsertPoint(CondBlock->getTerminator());
36809f2f960SAnna Thomas Value *OneElt = Builder.CreateExtractElement(Src, Idx);
36909f2f960SAnna Thomas Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
37009f2f960SAnna Thomas Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
37109f2f960SAnna Thomas
37209f2f960SAnna Thomas // Create "else" block, fill it in the next iteration
37322b84211SRoman Lebedev BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
37422b84211SRoman Lebedev NewIfBlock->setName("else");
37522b84211SRoman Lebedev
37622b84211SRoman Lebedev Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
37709f2f960SAnna Thomas }
37809f2f960SAnna Thomas CI->eraseFromParent();
37909f2f960SAnna Thomas
38009f2f960SAnna Thomas ModifiedDT = true;
38109f2f960SAnna Thomas }
38209f2f960SAnna Thomas
38309f2f960SAnna Thomas // Translate a masked gather intrinsic like
38409f2f960SAnna Thomas // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
38509f2f960SAnna Thomas // <16 x i1> %Mask, <16 x i32> %Src)
38609f2f960SAnna Thomas // to a chain of basic blocks, with loading element one-by-one if
38709f2f960SAnna Thomas // the appropriate mask bit is set
38809f2f960SAnna Thomas //
38909f2f960SAnna Thomas // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
39009f2f960SAnna Thomas // %Mask0 = extractelement <16 x i1> %Mask, i32 0
39109f2f960SAnna Thomas // br i1 %Mask0, label %cond.load, label %else
39209f2f960SAnna Thomas //
39309f2f960SAnna Thomas // cond.load:
39409f2f960SAnna Thomas // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
39509f2f960SAnna Thomas // %Load0 = load i32, i32* %Ptr0, align 4
39609f2f960SAnna Thomas // %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
39709f2f960SAnna Thomas // br label %else
39809f2f960SAnna Thomas //
39909f2f960SAnna Thomas // else:
40009f2f960SAnna Thomas // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
40109f2f960SAnna Thomas // %Mask1 = extractelement <16 x i1> %Mask, i32 1
40209f2f960SAnna Thomas // br i1 %Mask1, label %cond.load1, label %else2
40309f2f960SAnna Thomas //
40409f2f960SAnna Thomas // cond.load1:
40509f2f960SAnna Thomas // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
40609f2f960SAnna Thomas // %Load1 = load i32, i32* %Ptr1, align 4
40709f2f960SAnna Thomas // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
40809f2f960SAnna Thomas // br label %else2
40909f2f960SAnna Thomas // . . .
41009f2f960SAnna Thomas // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
41109f2f960SAnna Thomas // ret <16 x i32> %Result
scalarizeMaskedGather(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)4129498315cSMarkus Lavin static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI,
4139498315cSMarkus Lavin DomTreeUpdater *DTU, bool &ModifiedDT) {
41409f2f960SAnna Thomas Value *Ptrs = CI->getArgOperand(0);
41509f2f960SAnna Thomas Value *Alignment = CI->getArgOperand(1);
41609f2f960SAnna Thomas Value *Mask = CI->getArgOperand(2);
41709f2f960SAnna Thomas Value *Src0 = CI->getArgOperand(3);
41809f2f960SAnna Thomas
41909f2f960SAnna Thomas auto *VecType = cast<FixedVectorType>(CI->getType());
42009f2f960SAnna Thomas Type *EltTy = VecType->getElementType();
42109f2f960SAnna Thomas
42209f2f960SAnna Thomas IRBuilder<> Builder(CI->getContext());
42309f2f960SAnna Thomas Instruction *InsertPt = CI;
42409f2f960SAnna Thomas BasicBlock *IfBlock = CI->getParent();
42509f2f960SAnna Thomas Builder.SetInsertPoint(InsertPt);
42609f2f960SAnna Thomas MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
42709f2f960SAnna Thomas
42809f2f960SAnna Thomas Builder.SetCurrentDebugLocation(CI->getDebugLoc());
42909f2f960SAnna Thomas
43009f2f960SAnna Thomas // The result vector
43109f2f960SAnna Thomas Value *VResult = Src0;
43209f2f960SAnna Thomas unsigned VectorWidth = VecType->getNumElements();
43309f2f960SAnna Thomas
43409f2f960SAnna Thomas // Shorten the way if the mask is a vector of constants.
43509f2f960SAnna Thomas if (isConstantIntVector(Mask)) {
43609f2f960SAnna Thomas for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
43709f2f960SAnna Thomas if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
43809f2f960SAnna Thomas continue;
43909f2f960SAnna Thomas Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
44009f2f960SAnna Thomas LoadInst *Load =
44109f2f960SAnna Thomas Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
44209f2f960SAnna Thomas VResult =
44309f2f960SAnna Thomas Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
44409f2f960SAnna Thomas }
44509f2f960SAnna Thomas CI->replaceAllUsesWith(VResult);
44609f2f960SAnna Thomas CI->eraseFromParent();
44709f2f960SAnna Thomas return;
44809f2f960SAnna Thomas }
44909f2f960SAnna Thomas
45009f2f960SAnna Thomas // If the mask is not v1i1, use scalar bit test operations. This generates
45109f2f960SAnna Thomas // better results on X86 at least.
45209f2f960SAnna Thomas Value *SclrMask;
45309f2f960SAnna Thomas if (VectorWidth != 1) {
45409f2f960SAnna Thomas Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
45509f2f960SAnna Thomas SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
45609f2f960SAnna Thomas }
45709f2f960SAnna Thomas
45809f2f960SAnna Thomas for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
45909f2f960SAnna Thomas // Fill the "else" block, created in the previous iteration
46009f2f960SAnna Thomas //
46109f2f960SAnna Thomas // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
46209f2f960SAnna Thomas // %cond = icmp ne i16 %mask_1, 0
46309f2f960SAnna Thomas // br i1 %Mask1, label %cond.load, label %else
46409f2f960SAnna Thomas //
46509f2f960SAnna Thomas
46609f2f960SAnna Thomas Value *Predicate;
46709f2f960SAnna Thomas if (VectorWidth != 1) {
4689498315cSMarkus Lavin Value *Mask = Builder.getInt(APInt::getOneBitSet(
4699498315cSMarkus Lavin VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
47009f2f960SAnna Thomas Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
47109f2f960SAnna Thomas Builder.getIntN(VectorWidth, 0));
47209f2f960SAnna Thomas } else {
47309f2f960SAnna Thomas Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
47409f2f960SAnna Thomas }
47509f2f960SAnna Thomas
47609f2f960SAnna Thomas // Create "cond" block
47709f2f960SAnna Thomas //
47809f2f960SAnna Thomas // %EltAddr = getelementptr i32* %1, i32 0
47909f2f960SAnna Thomas // %Elt = load i32* %EltAddr
48009f2f960SAnna Thomas // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
48109f2f960SAnna Thomas //
4821356399aSRoman Lebedev Instruction *ThenTerm =
48305638592SRoman Lebedev SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
48405638592SRoman Lebedev /*BranchWeights=*/nullptr, DTU);
48509f2f960SAnna Thomas
4861356399aSRoman Lebedev BasicBlock *CondBlock = ThenTerm->getParent();
4871356399aSRoman Lebedev CondBlock->setName("cond.load");
4881356399aSRoman Lebedev
4891356399aSRoman Lebedev Builder.SetInsertPoint(CondBlock->getTerminator());
49009f2f960SAnna Thomas Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
49109f2f960SAnna Thomas LoadInst *Load =
49209f2f960SAnna Thomas Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
49309f2f960SAnna Thomas Value *NewVResult =
49409f2f960SAnna Thomas Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
49509f2f960SAnna Thomas
49609f2f960SAnna Thomas // Create "else" block, fill it in the next iteration
4971356399aSRoman Lebedev BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
4981356399aSRoman Lebedev NewIfBlock->setName("else");
49909f2f960SAnna Thomas BasicBlock *PrevIfBlock = IfBlock;
50009f2f960SAnna Thomas IfBlock = NewIfBlock;
50109f2f960SAnna Thomas
5021356399aSRoman Lebedev // Create the phi to join the new and previous value.
5031356399aSRoman Lebedev Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
50409f2f960SAnna Thomas PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
50509f2f960SAnna Thomas Phi->addIncoming(NewVResult, CondBlock);
50609f2f960SAnna Thomas Phi->addIncoming(VResult, PrevIfBlock);
50709f2f960SAnna Thomas VResult = Phi;
50809f2f960SAnna Thomas }
50909f2f960SAnna Thomas
51009f2f960SAnna Thomas CI->replaceAllUsesWith(VResult);
51109f2f960SAnna Thomas CI->eraseFromParent();
51209f2f960SAnna Thomas
51309f2f960SAnna Thomas ModifiedDT = true;
51409f2f960SAnna Thomas }
51509f2f960SAnna Thomas
51609f2f960SAnna Thomas // Translate a masked scatter intrinsic, like
51709f2f960SAnna Thomas // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
51809f2f960SAnna Thomas // <16 x i1> %Mask)
51909f2f960SAnna Thomas // to a chain of basic blocks, that stores element one-by-one if
52009f2f960SAnna Thomas // the appropriate mask bit is set.
52109f2f960SAnna Thomas //
52209f2f960SAnna Thomas // %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
52309f2f960SAnna Thomas // %Mask0 = extractelement <16 x i1> %Mask, i32 0
52409f2f960SAnna Thomas // br i1 %Mask0, label %cond.store, label %else
52509f2f960SAnna Thomas //
52609f2f960SAnna Thomas // cond.store:
52709f2f960SAnna Thomas // %Elt0 = extractelement <16 x i32> %Src, i32 0
52809f2f960SAnna Thomas // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
52909f2f960SAnna Thomas // store i32 %Elt0, i32* %Ptr0, align 4
53009f2f960SAnna Thomas // br label %else
53109f2f960SAnna Thomas //
53209f2f960SAnna Thomas // else:
53309f2f960SAnna Thomas // %Mask1 = extractelement <16 x i1> %Mask, i32 1
53409f2f960SAnna Thomas // br i1 %Mask1, label %cond.store1, label %else2
53509f2f960SAnna Thomas //
53609f2f960SAnna Thomas // cond.store1:
53709f2f960SAnna Thomas // %Elt1 = extractelement <16 x i32> %Src, i32 1
53809f2f960SAnna Thomas // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
53909f2f960SAnna Thomas // store i32 %Elt1, i32* %Ptr1, align 4
54009f2f960SAnna Thomas // br label %else2
54109f2f960SAnna Thomas // . . .
scalarizeMaskedScatter(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)5429498315cSMarkus Lavin static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI,
5439498315cSMarkus Lavin DomTreeUpdater *DTU, bool &ModifiedDT) {
54409f2f960SAnna Thomas Value *Src = CI->getArgOperand(0);
54509f2f960SAnna Thomas Value *Ptrs = CI->getArgOperand(1);
54609f2f960SAnna Thomas Value *Alignment = CI->getArgOperand(2);
54709f2f960SAnna Thomas Value *Mask = CI->getArgOperand(3);
54809f2f960SAnna Thomas
54909f2f960SAnna Thomas auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
55009f2f960SAnna Thomas
55109f2f960SAnna Thomas assert(
55209f2f960SAnna Thomas isa<VectorType>(Ptrs->getType()) &&
55309f2f960SAnna Thomas isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
55409f2f960SAnna Thomas "Vector of pointers is expected in masked scatter intrinsic");
55509f2f960SAnna Thomas
55609f2f960SAnna Thomas IRBuilder<> Builder(CI->getContext());
55709f2f960SAnna Thomas Instruction *InsertPt = CI;
55809f2f960SAnna Thomas Builder.SetInsertPoint(InsertPt);
55909f2f960SAnna Thomas Builder.SetCurrentDebugLocation(CI->getDebugLoc());
56009f2f960SAnna Thomas
56109f2f960SAnna Thomas MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
56209f2f960SAnna Thomas unsigned VectorWidth = SrcFVTy->getNumElements();
56309f2f960SAnna Thomas
56409f2f960SAnna Thomas // Shorten the way if the mask is a vector of constants.
56509f2f960SAnna Thomas if (isConstantIntVector(Mask)) {
56609f2f960SAnna Thomas for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
56709f2f960SAnna Thomas if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
56809f2f960SAnna Thomas continue;
56909f2f960SAnna Thomas Value *OneElt =
57009f2f960SAnna Thomas Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
57109f2f960SAnna Thomas Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
57209f2f960SAnna Thomas Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
57309f2f960SAnna Thomas }
57409f2f960SAnna Thomas CI->eraseFromParent();
57509f2f960SAnna Thomas return;
57609f2f960SAnna Thomas }
57709f2f960SAnna Thomas
57809f2f960SAnna Thomas // If the mask is not v1i1, use scalar bit test operations. This generates
57909f2f960SAnna Thomas // better results on X86 at least.
58009f2f960SAnna Thomas Value *SclrMask;
58109f2f960SAnna Thomas if (VectorWidth != 1) {
58209f2f960SAnna Thomas Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
58309f2f960SAnna Thomas SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
58409f2f960SAnna Thomas }
58509f2f960SAnna Thomas
58609f2f960SAnna Thomas for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
58709f2f960SAnna Thomas // Fill the "else" block, created in the previous iteration
58809f2f960SAnna Thomas //
58909f2f960SAnna Thomas // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
59009f2f960SAnna Thomas // %cond = icmp ne i16 %mask_1, 0
59109f2f960SAnna Thomas // br i1 %Mask1, label %cond.store, label %else
59209f2f960SAnna Thomas //
59309f2f960SAnna Thomas Value *Predicate;
59409f2f960SAnna Thomas if (VectorWidth != 1) {
5959498315cSMarkus Lavin Value *Mask = Builder.getInt(APInt::getOneBitSet(
5969498315cSMarkus Lavin VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
59709f2f960SAnna Thomas Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
59809f2f960SAnna Thomas Builder.getIntN(VectorWidth, 0));
59909f2f960SAnna Thomas } else {
60009f2f960SAnna Thomas Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
60109f2f960SAnna Thomas }
60209f2f960SAnna Thomas
60309f2f960SAnna Thomas // Create "cond" block
60409f2f960SAnna Thomas //
60509f2f960SAnna Thomas // %Elt1 = extractelement <16 x i32> %Src, i32 1
60609f2f960SAnna Thomas // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
60709f2f960SAnna Thomas // %store i32 %Elt1, i32* %Ptr1
60809f2f960SAnna Thomas //
609e8efc03aSRoman Lebedev Instruction *ThenTerm =
61005638592SRoman Lebedev SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
61105638592SRoman Lebedev /*BranchWeights=*/nullptr, DTU);
61209f2f960SAnna Thomas
613e8efc03aSRoman Lebedev BasicBlock *CondBlock = ThenTerm->getParent();
614e8efc03aSRoman Lebedev CondBlock->setName("cond.store");
615e8efc03aSRoman Lebedev
616e8efc03aSRoman Lebedev Builder.SetInsertPoint(CondBlock->getTerminator());
61709f2f960SAnna Thomas Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
61809f2f960SAnna Thomas Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
61909f2f960SAnna Thomas Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
62009f2f960SAnna Thomas
62109f2f960SAnna Thomas // Create "else" block, fill it in the next iteration
622e8efc03aSRoman Lebedev BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
623e8efc03aSRoman Lebedev NewIfBlock->setName("else");
624e8efc03aSRoman Lebedev
625e8efc03aSRoman Lebedev Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
62609f2f960SAnna Thomas }
62709f2f960SAnna Thomas CI->eraseFromParent();
62809f2f960SAnna Thomas
62909f2f960SAnna Thomas ModifiedDT = true;
63009f2f960SAnna Thomas }
63109f2f960SAnna Thomas
scalarizeMaskedExpandLoad(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)6329498315cSMarkus Lavin static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
6339498315cSMarkus Lavin DomTreeUpdater *DTU, bool &ModifiedDT) {
63409f2f960SAnna Thomas Value *Ptr = CI->getArgOperand(0);
63509f2f960SAnna Thomas Value *Mask = CI->getArgOperand(1);
63609f2f960SAnna Thomas Value *PassThru = CI->getArgOperand(2);
63709f2f960SAnna Thomas
63809f2f960SAnna Thomas auto *VecType = cast<FixedVectorType>(CI->getType());
63909f2f960SAnna Thomas
64009f2f960SAnna Thomas Type *EltTy = VecType->getElementType();
64109f2f960SAnna Thomas
64209f2f960SAnna Thomas IRBuilder<> Builder(CI->getContext());
64309f2f960SAnna Thomas Instruction *InsertPt = CI;
64409f2f960SAnna Thomas BasicBlock *IfBlock = CI->getParent();
64509f2f960SAnna Thomas
64609f2f960SAnna Thomas Builder.SetInsertPoint(InsertPt);
64709f2f960SAnna Thomas Builder.SetCurrentDebugLocation(CI->getDebugLoc());
64809f2f960SAnna Thomas
64909f2f960SAnna Thomas unsigned VectorWidth = VecType->getNumElements();
65009f2f960SAnna Thomas
65109f2f960SAnna Thomas // The result vector
65209f2f960SAnna Thomas Value *VResult = PassThru;
65309f2f960SAnna Thomas
65409f2f960SAnna Thomas // Shorten the way if the mask is a vector of constants.
65509f2f960SAnna Thomas // Create a build_vector pattern, with loads/undefs as necessary and then
65609f2f960SAnna Thomas // shuffle blend with the pass through value.
65709f2f960SAnna Thomas if (isConstantIntVector(Mask)) {
65809f2f960SAnna Thomas unsigned MemIndex = 0;
65909f2f960SAnna Thomas VResult = UndefValue::get(VecType);
66009f2f960SAnna Thomas SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem);
66109f2f960SAnna Thomas for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
66209f2f960SAnna Thomas Value *InsertElt;
66309f2f960SAnna Thomas if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
66409f2f960SAnna Thomas InsertElt = UndefValue::get(EltTy);
66509f2f960SAnna Thomas ShuffleMask[Idx] = Idx + VectorWidth;
66609f2f960SAnna Thomas } else {
66709f2f960SAnna Thomas Value *NewPtr =
66809f2f960SAnna Thomas Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
66909f2f960SAnna Thomas InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
67009f2f960SAnna Thomas "Load" + Twine(Idx));
67109f2f960SAnna Thomas ShuffleMask[Idx] = Idx;
67209f2f960SAnna Thomas ++MemIndex;
67309f2f960SAnna Thomas }
67409f2f960SAnna Thomas VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
67509f2f960SAnna Thomas "Res" + Twine(Idx));
67609f2f960SAnna Thomas }
67709f2f960SAnna Thomas VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
67809f2f960SAnna Thomas CI->replaceAllUsesWith(VResult);
67909f2f960SAnna Thomas CI->eraseFromParent();
68009f2f960SAnna Thomas return;
68109f2f960SAnna Thomas }
68209f2f960SAnna Thomas
68309f2f960SAnna Thomas // If the mask is not v1i1, use scalar bit test operations. This generates
68409f2f960SAnna Thomas // better results on X86 at least.
68509f2f960SAnna Thomas Value *SclrMask;
68609f2f960SAnna Thomas if (VectorWidth != 1) {
68709f2f960SAnna Thomas Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
68809f2f960SAnna Thomas SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
68909f2f960SAnna Thomas }
69009f2f960SAnna Thomas
69109f2f960SAnna Thomas for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
69209f2f960SAnna Thomas // Fill the "else" block, created in the previous iteration
69309f2f960SAnna Thomas //
69409f2f960SAnna Thomas // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
69509f2f960SAnna Thomas // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
69609f2f960SAnna Thomas // br i1 %mask_1, label %cond.load, label %else
69709f2f960SAnna Thomas //
69809f2f960SAnna Thomas
69909f2f960SAnna Thomas Value *Predicate;
70009f2f960SAnna Thomas if (VectorWidth != 1) {
7019498315cSMarkus Lavin Value *Mask = Builder.getInt(APInt::getOneBitSet(
7029498315cSMarkus Lavin VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
70309f2f960SAnna Thomas Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
70409f2f960SAnna Thomas Builder.getIntN(VectorWidth, 0));
70509f2f960SAnna Thomas } else {
70609f2f960SAnna Thomas Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
70709f2f960SAnna Thomas }
70809f2f960SAnna Thomas
70909f2f960SAnna Thomas // Create "cond" block
71009f2f960SAnna Thomas //
71109f2f960SAnna Thomas // %EltAddr = getelementptr i32* %1, i32 0
71209f2f960SAnna Thomas // %Elt = load i32* %EltAddr
71309f2f960SAnna Thomas // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
71409f2f960SAnna Thomas //
7152e4bb3f1SRoman Lebedev Instruction *ThenTerm =
71605638592SRoman Lebedev SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
71705638592SRoman Lebedev /*BranchWeights=*/nullptr, DTU);
71809f2f960SAnna Thomas
7192e4bb3f1SRoman Lebedev BasicBlock *CondBlock = ThenTerm->getParent();
7202e4bb3f1SRoman Lebedev CondBlock->setName("cond.load");
7212e4bb3f1SRoman Lebedev
7222e4bb3f1SRoman Lebedev Builder.SetInsertPoint(CondBlock->getTerminator());
72309f2f960SAnna Thomas LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1));
72409f2f960SAnna Thomas Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
72509f2f960SAnna Thomas
72609f2f960SAnna Thomas // Move the pointer if there are more blocks to come.
72709f2f960SAnna Thomas Value *NewPtr;
72809f2f960SAnna Thomas if ((Idx + 1) != VectorWidth)
72909f2f960SAnna Thomas NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
73009f2f960SAnna Thomas
73109f2f960SAnna Thomas // Create "else" block, fill it in the next iteration
7322e4bb3f1SRoman Lebedev BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
7332e4bb3f1SRoman Lebedev NewIfBlock->setName("else");
73409f2f960SAnna Thomas BasicBlock *PrevIfBlock = IfBlock;
73509f2f960SAnna Thomas IfBlock = NewIfBlock;
73609f2f960SAnna Thomas
73709f2f960SAnna Thomas // Create the phi to join the new and previous value.
7382e4bb3f1SRoman Lebedev Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
73909f2f960SAnna Thomas PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
74009f2f960SAnna Thomas ResultPhi->addIncoming(NewVResult, CondBlock);
74109f2f960SAnna Thomas ResultPhi->addIncoming(VResult, PrevIfBlock);
74209f2f960SAnna Thomas VResult = ResultPhi;
74309f2f960SAnna Thomas
74409f2f960SAnna Thomas // Add a PHI for the pointer if this isn't the last iteration.
74509f2f960SAnna Thomas if ((Idx + 1) != VectorWidth) {
74609f2f960SAnna Thomas PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
74709f2f960SAnna Thomas PtrPhi->addIncoming(NewPtr, CondBlock);
74809f2f960SAnna Thomas PtrPhi->addIncoming(Ptr, PrevIfBlock);
74909f2f960SAnna Thomas Ptr = PtrPhi;
75009f2f960SAnna Thomas }
75109f2f960SAnna Thomas }
75209f2f960SAnna Thomas
75309f2f960SAnna Thomas CI->replaceAllUsesWith(VResult);
75409f2f960SAnna Thomas CI->eraseFromParent();
75509f2f960SAnna Thomas
75609f2f960SAnna Thomas ModifiedDT = true;
75709f2f960SAnna Thomas }
75809f2f960SAnna Thomas
scalarizeMaskedCompressStore(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)7599498315cSMarkus Lavin static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
7609498315cSMarkus Lavin DomTreeUpdater *DTU,
76105638592SRoman Lebedev bool &ModifiedDT) {
76209f2f960SAnna Thomas Value *Src = CI->getArgOperand(0);
76309f2f960SAnna Thomas Value *Ptr = CI->getArgOperand(1);
76409f2f960SAnna Thomas Value *Mask = CI->getArgOperand(2);
76509f2f960SAnna Thomas
76609f2f960SAnna Thomas auto *VecType = cast<FixedVectorType>(Src->getType());
76709f2f960SAnna Thomas
76809f2f960SAnna Thomas IRBuilder<> Builder(CI->getContext());
76909f2f960SAnna Thomas Instruction *InsertPt = CI;
77009f2f960SAnna Thomas BasicBlock *IfBlock = CI->getParent();
77109f2f960SAnna Thomas
77209f2f960SAnna Thomas Builder.SetInsertPoint(InsertPt);
77309f2f960SAnna Thomas Builder.SetCurrentDebugLocation(CI->getDebugLoc());
77409f2f960SAnna Thomas
77509f2f960SAnna Thomas Type *EltTy = VecType->getElementType();
77609f2f960SAnna Thomas
77709f2f960SAnna Thomas unsigned VectorWidth = VecType->getNumElements();
77809f2f960SAnna Thomas
77909f2f960SAnna Thomas // Shorten the way if the mask is a vector of constants.
78009f2f960SAnna Thomas if (isConstantIntVector(Mask)) {
78109f2f960SAnna Thomas unsigned MemIndex = 0;
78209f2f960SAnna Thomas for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
78309f2f960SAnna Thomas if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
78409f2f960SAnna Thomas continue;
78509f2f960SAnna Thomas Value *OneElt =
78609f2f960SAnna Thomas Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
78709f2f960SAnna Thomas Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
78809f2f960SAnna Thomas Builder.CreateAlignedStore(OneElt, NewPtr, Align(1));
78909f2f960SAnna Thomas ++MemIndex;
79009f2f960SAnna Thomas }
79109f2f960SAnna Thomas CI->eraseFromParent();
79209f2f960SAnna Thomas return;
79309f2f960SAnna Thomas }
79409f2f960SAnna Thomas
79509f2f960SAnna Thomas // If the mask is not v1i1, use scalar bit test operations. This generates
79609f2f960SAnna Thomas // better results on X86 at least.
79709f2f960SAnna Thomas Value *SclrMask;
79809f2f960SAnna Thomas if (VectorWidth != 1) {
79909f2f960SAnna Thomas Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
80009f2f960SAnna Thomas SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
80109f2f960SAnna Thomas }
80209f2f960SAnna Thomas
80309f2f960SAnna Thomas for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
80409f2f960SAnna Thomas // Fill the "else" block, created in the previous iteration
80509f2f960SAnna Thomas //
80609f2f960SAnna Thomas // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
80709f2f960SAnna Thomas // br i1 %mask_1, label %cond.store, label %else
80809f2f960SAnna Thomas //
80909f2f960SAnna Thomas Value *Predicate;
81009f2f960SAnna Thomas if (VectorWidth != 1) {
8119498315cSMarkus Lavin Value *Mask = Builder.getInt(APInt::getOneBitSet(
8129498315cSMarkus Lavin VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
81309f2f960SAnna Thomas Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
81409f2f960SAnna Thomas Builder.getIntN(VectorWidth, 0));
81509f2f960SAnna Thomas } else {
81609f2f960SAnna Thomas Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
81709f2f960SAnna Thomas }
81809f2f960SAnna Thomas
81909f2f960SAnna Thomas // Create "cond" block
82009f2f960SAnna Thomas //
82109f2f960SAnna Thomas // %OneElt = extractelement <16 x i32> %Src, i32 Idx
82209f2f960SAnna Thomas // %EltAddr = getelementptr i32* %1, i32 0
82309f2f960SAnna Thomas // %store i32 %OneElt, i32* %EltAddr
82409f2f960SAnna Thomas //
825573f7411SRoman Lebedev Instruction *ThenTerm =
82605638592SRoman Lebedev SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
82705638592SRoman Lebedev /*BranchWeights=*/nullptr, DTU);
82809f2f960SAnna Thomas
829573f7411SRoman Lebedev BasicBlock *CondBlock = ThenTerm->getParent();
830573f7411SRoman Lebedev CondBlock->setName("cond.store");
831573f7411SRoman Lebedev
832573f7411SRoman Lebedev Builder.SetInsertPoint(CondBlock->getTerminator());
83309f2f960SAnna Thomas Value *OneElt = Builder.CreateExtractElement(Src, Idx);
83409f2f960SAnna Thomas Builder.CreateAlignedStore(OneElt, Ptr, Align(1));
83509f2f960SAnna Thomas
83609f2f960SAnna Thomas // Move the pointer if there are more blocks to come.
83709f2f960SAnna Thomas Value *NewPtr;
83809f2f960SAnna Thomas if ((Idx + 1) != VectorWidth)
83909f2f960SAnna Thomas NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
84009f2f960SAnna Thomas
84109f2f960SAnna Thomas // Create "else" block, fill it in the next iteration
842573f7411SRoman Lebedev BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
843573f7411SRoman Lebedev NewIfBlock->setName("else");
84409f2f960SAnna Thomas BasicBlock *PrevIfBlock = IfBlock;
84509f2f960SAnna Thomas IfBlock = NewIfBlock;
84609f2f960SAnna Thomas
847573f7411SRoman Lebedev Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
848573f7411SRoman Lebedev
84909f2f960SAnna Thomas // Add a PHI for the pointer if this isn't the last iteration.
85009f2f960SAnna Thomas if ((Idx + 1) != VectorWidth) {
85109f2f960SAnna Thomas PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
85209f2f960SAnna Thomas PtrPhi->addIncoming(NewPtr, CondBlock);
85309f2f960SAnna Thomas PtrPhi->addIncoming(Ptr, PrevIfBlock);
85409f2f960SAnna Thomas Ptr = PtrPhi;
85509f2f960SAnna Thomas }
85609f2f960SAnna Thomas }
85709f2f960SAnna Thomas CI->eraseFromParent();
85809f2f960SAnna Thomas
85909f2f960SAnna Thomas ModifiedDT = true;
86009f2f960SAnna Thomas }
86109f2f960SAnna Thomas
runImpl(Function & F,const TargetTransformInfo & TTI,DominatorTree * DT)86205638592SRoman Lebedev static bool runImpl(Function &F, const TargetTransformInfo &TTI,
86305638592SRoman Lebedev DominatorTree *DT) {
86405638592SRoman Lebedev Optional<DomTreeUpdater> DTU;
86505638592SRoman Lebedev if (DT)
86605638592SRoman Lebedev DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
86705638592SRoman Lebedev
86809f2f960SAnna Thomas bool EverMadeChange = false;
86909f2f960SAnna Thomas bool MadeChange = true;
87029356e32SAnna Thomas auto &DL = F.getParent()->getDataLayout();
87109f2f960SAnna Thomas while (MadeChange) {
87209f2f960SAnna Thomas MadeChange = false;
8738e86c0e4SKazu Hirata for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
87409f2f960SAnna Thomas bool ModifiedDTOnIteration = false;
8758e86c0e4SKazu Hirata MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
876*0916d96dSKazu Hirata DTU ? DTU.getPointer() : nullptr);
87705638592SRoman Lebedev
87809f2f960SAnna Thomas // Restart BB iteration if the dominator tree of the Function was changed
87909f2f960SAnna Thomas if (ModifiedDTOnIteration)
88009f2f960SAnna Thomas break;
88109f2f960SAnna Thomas }
88209f2f960SAnna Thomas
88309f2f960SAnna Thomas EverMadeChange |= MadeChange;
88409f2f960SAnna Thomas }
88509f2f960SAnna Thomas return EverMadeChange;
88609f2f960SAnna Thomas }
88709f2f960SAnna Thomas
runOnFunction(Function & F)88829356e32SAnna Thomas bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
88929356e32SAnna Thomas auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
89005638592SRoman Lebedev DominatorTree *DT = nullptr;
89105638592SRoman Lebedev if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
89205638592SRoman Lebedev DT = &DTWP->getDomTree();
89305638592SRoman Lebedev return runImpl(F, TTI, DT);
89429356e32SAnna Thomas }
89529356e32SAnna Thomas
89629356e32SAnna Thomas PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)89729356e32SAnna Thomas ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) {
89829356e32SAnna Thomas auto &TTI = AM.getResult<TargetIRAnalysis>(F);
89905638592SRoman Lebedev auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
90005638592SRoman Lebedev if (!runImpl(F, TTI, DT))
90129356e32SAnna Thomas return PreservedAnalyses::all();
90229356e32SAnna Thomas PreservedAnalyses PA;
90329356e32SAnna Thomas PA.preserve<TargetIRAnalysis>();
90405638592SRoman Lebedev PA.preserve<DominatorTreeAnalysis>();
90529356e32SAnna Thomas return PA;
90629356e32SAnna Thomas }
90729356e32SAnna Thomas
optimizeBlock(BasicBlock & BB,bool & ModifiedDT,const TargetTransformInfo & TTI,const DataLayout & DL,DomTreeUpdater * DTU)90809f2f960SAnna Thomas static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
90905638592SRoman Lebedev const TargetTransformInfo &TTI, const DataLayout &DL,
91005638592SRoman Lebedev DomTreeUpdater *DTU) {
91109f2f960SAnna Thomas bool MadeChange = false;
91209f2f960SAnna Thomas
91309f2f960SAnna Thomas BasicBlock::iterator CurInstIterator = BB.begin();
91409f2f960SAnna Thomas while (CurInstIterator != BB.end()) {
91509f2f960SAnna Thomas if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
91605638592SRoman Lebedev MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU);
91709f2f960SAnna Thomas if (ModifiedDT)
91809f2f960SAnna Thomas return true;
91909f2f960SAnna Thomas }
92009f2f960SAnna Thomas
92109f2f960SAnna Thomas return MadeChange;
92209f2f960SAnna Thomas }
92309f2f960SAnna Thomas
optimizeCallInst(CallInst * CI,bool & ModifiedDT,const TargetTransformInfo & TTI,const DataLayout & DL,DomTreeUpdater * DTU)92409f2f960SAnna Thomas static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
92509f2f960SAnna Thomas const TargetTransformInfo &TTI,
92605638592SRoman Lebedev const DataLayout &DL, DomTreeUpdater *DTU) {
92709f2f960SAnna Thomas IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
92809f2f960SAnna Thomas if (II) {
92909f2f960SAnna Thomas // The scalarization code below does not work for scalable vectors.
93009f2f960SAnna Thomas if (isa<ScalableVectorType>(II->getType()) ||
931f631173dSKazu Hirata any_of(II->args(),
93209f2f960SAnna Thomas [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
93309f2f960SAnna Thomas return false;
93409f2f960SAnna Thomas
93509f2f960SAnna Thomas switch (II->getIntrinsicID()) {
93609f2f960SAnna Thomas default:
93709f2f960SAnna Thomas break;
93809f2f960SAnna Thomas case Intrinsic::masked_load:
93909f2f960SAnna Thomas // Scalarize unsupported vector masked load
94009f2f960SAnna Thomas if (TTI.isLegalMaskedLoad(
94109f2f960SAnna Thomas CI->getType(),
94209f2f960SAnna Thomas cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
94309f2f960SAnna Thomas return false;
9449498315cSMarkus Lavin scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT);
94509f2f960SAnna Thomas return true;
94609f2f960SAnna Thomas case Intrinsic::masked_store:
94709f2f960SAnna Thomas if (TTI.isLegalMaskedStore(
94809f2f960SAnna Thomas CI->getArgOperand(0)->getType(),
94909f2f960SAnna Thomas cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
95009f2f960SAnna Thomas return false;
9519498315cSMarkus Lavin scalarizeMaskedStore(DL, CI, DTU, ModifiedDT);
95209f2f960SAnna Thomas return true;
95309f2f960SAnna Thomas case Intrinsic::masked_gather: {
954066524eaSCraig Topper MaybeAlign MA =
955066524eaSCraig Topper cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
95609f2f960SAnna Thomas Type *LoadTy = CI->getType();
957066524eaSCraig Topper Align Alignment = DL.getValueOrABITypeAlignment(MA,
958066524eaSCraig Topper LoadTy->getScalarType());
959552eb372SRosie Sumpter if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
960552eb372SRosie Sumpter !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
96109f2f960SAnna Thomas return false;
9629498315cSMarkus Lavin scalarizeMaskedGather(DL, CI, DTU, ModifiedDT);
96309f2f960SAnna Thomas return true;
96409f2f960SAnna Thomas }
96509f2f960SAnna Thomas case Intrinsic::masked_scatter: {
966066524eaSCraig Topper MaybeAlign MA =
967066524eaSCraig Topper cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
96809f2f960SAnna Thomas Type *StoreTy = CI->getArgOperand(0)->getType();
969066524eaSCraig Topper Align Alignment = DL.getValueOrABITypeAlignment(MA,
970066524eaSCraig Topper StoreTy->getScalarType());
971552eb372SRosie Sumpter if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
972552eb372SRosie Sumpter !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
973552eb372SRosie Sumpter Alignment))
97409f2f960SAnna Thomas return false;
9759498315cSMarkus Lavin scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT);
97609f2f960SAnna Thomas return true;
97709f2f960SAnna Thomas }
97809f2f960SAnna Thomas case Intrinsic::masked_expandload:
97909f2f960SAnna Thomas if (TTI.isLegalMaskedExpandLoad(CI->getType()))
98009f2f960SAnna Thomas return false;
9819498315cSMarkus Lavin scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT);
98209f2f960SAnna Thomas return true;
98309f2f960SAnna Thomas case Intrinsic::masked_compressstore:
98409f2f960SAnna Thomas if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
98509f2f960SAnna Thomas return false;
9869498315cSMarkus Lavin scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT);
98709f2f960SAnna Thomas return true;
98809f2f960SAnna Thomas }
98909f2f960SAnna Thomas }
99009f2f960SAnna Thomas
99109f2f960SAnna Thomas return false;
99209f2f960SAnna Thomas }
993