1a65e2f73SChris Lattner //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2a65e2f73SChris Lattner //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a65e2f73SChris Lattner //
7a65e2f73SChris Lattner //===----------------------------------------------------------------------===//
8a65e2f73SChris Lattner //
9a65e2f73SChris Lattner // This file implements the visit functions for load, store and alloca.
10a65e2f73SChris Lattner //
11a65e2f73SChris Lattner //===----------------------------------------------------------------------===//
12a65e2f73SChris Lattner
13a9174582SChandler Carruth #include "InstCombineInternal.h"
14ba01ed00SYaxun Liu #include "llvm/ADT/MapVector.h"
15ec6b1fcfSNAKAMURA Takumi #include "llvm/ADT/SmallString.h"
16ed0881b2SChandler Carruth #include "llvm/ADT/Statistic.h"
176c6adde8SSimon Pilgrim #include "llvm/Analysis/AliasAnalysis.h"
18826bdf8cSDan Gohman #include "llvm/Analysis/Loads.h"
199fb823bbSChandler Carruth #include "llvm/IR/DataLayout.h"
20238533ecSVedant Kumar #include "llvm/IR/DebugInfoMetadata.h"
219fb823bbSChandler Carruth #include "llvm/IR/IntrinsicInst.h"
22ba01ed00SYaxun Liu #include "llvm/IR/LLVMContext.h"
23ec95c6ccSAlexey Bataev #include "llvm/IR/PatternMatch.h"
242a6c8715SSebastian Neubauer #include "llvm/Transforms/InstCombine/InstCombiner.h"
256c6adde8SSimon Pilgrim #include "llvm/Transforms/Utils/Local.h"
26a65e2f73SChris Lattner using namespace llvm;
27ec95c6ccSAlexey Bataev using namespace PatternMatch;
28a65e2f73SChris Lattner
29964daaafSChandler Carruth #define DEBUG_TYPE "instcombine"
30964daaafSChandler Carruth
31a65e2f73SChris Lattner STATISTIC(NumDeadStore, "Number of dead stores eliminated");
32c908ca17SChandler Carruth STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
33c908ca17SChandler Carruth
34c908ca17SChandler Carruth /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
35c908ca17SChandler Carruth /// pointer to an alloca. Ignore any reads of the pointer, return false if we
36c908ca17SChandler Carruth /// see any stores or other unknown uses. If we see pointer arithmetic, keep
37c908ca17SChandler Carruth /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
38c908ca17SChandler Carruth /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
39c908ca17SChandler Carruth /// the alloca, and if the source pointer is a pointer to a constant global, we
40c908ca17SChandler Carruth /// can optimize this.
41c908ca17SChandler Carruth static bool
isOnlyCopiedFromConstantMemory(AAResults * AA,Value * V,MemTransferInst * & TheCopy,SmallVectorImpl<Instruction * > & ToDelete)426c6adde8SSimon Pilgrim isOnlyCopiedFromConstantMemory(AAResults *AA,
4316295d52SMatt Arsenault Value *V, MemTransferInst *&TheCopy,
44813dab2fSReid Kleckner SmallVectorImpl<Instruction *> &ToDelete) {
45c908ca17SChandler Carruth // We track lifetime intrinsics as we encounter them. If we decide to go
46c908ca17SChandler Carruth // ahead and replace the value with the global, this lets the caller quickly
47c908ca17SChandler Carruth // eliminate the markers.
48c908ca17SChandler Carruth
49813dab2fSReid Kleckner SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
500a16c228SDavid Majnemer ValuesToInspect.emplace_back(V, false);
51813dab2fSReid Kleckner while (!ValuesToInspect.empty()) {
52813dab2fSReid Kleckner auto ValuePair = ValuesToInspect.pop_back_val();
53813dab2fSReid Kleckner const bool IsOffset = ValuePair.second;
54813dab2fSReid Kleckner for (auto &U : ValuePair.first->uses()) {
550a16c228SDavid Majnemer auto *I = cast<Instruction>(U.getUser());
56c908ca17SChandler Carruth
570a16c228SDavid Majnemer if (auto *LI = dyn_cast<LoadInst>(I)) {
58c908ca17SChandler Carruth // Ignore non-volatile loads, they are always ok.
59c908ca17SChandler Carruth if (!LI->isSimple()) return false;
60c908ca17SChandler Carruth continue;
61c908ca17SChandler Carruth }
62c908ca17SChandler Carruth
6360728177SMatt Arsenault if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
64c908ca17SChandler Carruth // If uses of the bitcast are ok, we are ok.
650a16c228SDavid Majnemer ValuesToInspect.emplace_back(I, IsOffset);
66c908ca17SChandler Carruth continue;
67c908ca17SChandler Carruth }
680a16c228SDavid Majnemer if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
69c908ca17SChandler Carruth // If the GEP has all zero indices, it doesn't offset the pointer. If it
70c908ca17SChandler Carruth // doesn't, it does.
710a16c228SDavid Majnemer ValuesToInspect.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
72c908ca17SChandler Carruth continue;
73c908ca17SChandler Carruth }
74c908ca17SChandler Carruth
75c1892ec1SCraig Topper if (auto *Call = dyn_cast<CallBase>(I)) {
76c908ca17SChandler Carruth // If this is the function being called then we treat it like a load and
77c908ca17SChandler Carruth // ignore it.
78c1892ec1SCraig Topper if (Call->isCallee(&U))
79c908ca17SChandler Carruth continue;
80c908ca17SChandler Carruth
81c1892ec1SCraig Topper unsigned DataOpNo = Call->getDataOperandNo(&U);
82c1892ec1SCraig Topper bool IsArgOperand = Call->isArgOperand(&U);
8302f4787eSDavid Majnemer
8426af2caeSReid Kleckner // Inalloca arguments are clobbered by the call.
85c1892ec1SCraig Topper if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
8626af2caeSReid Kleckner return false;
8726af2caeSReid Kleckner
88c908ca17SChandler Carruth // If this is a readonly/readnone call site, then we know it is just a
89c908ca17SChandler Carruth // load (but one that potentially returns the value itself), so we can
90c908ca17SChandler Carruth // ignore it if we know that the value isn't captured.
91c1892ec1SCraig Topper if (Call->onlyReadsMemory() &&
92c1892ec1SCraig Topper (Call->use_empty() || Call->doesNotCapture(DataOpNo)))
93c908ca17SChandler Carruth continue;
94c908ca17SChandler Carruth
95c908ca17SChandler Carruth // If this is being passed as a byval argument, the caller is making a
96c908ca17SChandler Carruth // copy, so it is only a read of the alloca.
97c1892ec1SCraig Topper if (IsArgOperand && Call->isByValArgument(DataOpNo))
98c908ca17SChandler Carruth continue;
99c908ca17SChandler Carruth }
100c908ca17SChandler Carruth
101c908ca17SChandler Carruth // Lifetime intrinsics can be handled by the caller.
102b264d69dSVedant Kumar if (I->isLifetimeStartOrEnd()) {
103b264d69dSVedant Kumar assert(I->use_empty() && "Lifetime markers have no result to use!");
104b264d69dSVedant Kumar ToDelete.push_back(I);
105c908ca17SChandler Carruth continue;
106c908ca17SChandler Carruth }
107c908ca17SChandler Carruth
108c908ca17SChandler Carruth // If this is isn't our memcpy/memmove, reject it as something we can't
109c908ca17SChandler Carruth // handle.
110cdf47884SChandler Carruth MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
111f40110f4SCraig Topper if (!MI)
112c908ca17SChandler Carruth return false;
113c908ca17SChandler Carruth
114c908ca17SChandler Carruth // If the transfer is using the alloca as a source of the transfer, then
115c908ca17SChandler Carruth // ignore it since it is a load (unless the transfer is volatile).
116cdf47884SChandler Carruth if (U.getOperandNo() == 1) {
117c908ca17SChandler Carruth if (MI->isVolatile()) return false;
118c908ca17SChandler Carruth continue;
119c908ca17SChandler Carruth }
120c908ca17SChandler Carruth
121c908ca17SChandler Carruth // If we already have seen a copy, reject the second one.
122c908ca17SChandler Carruth if (TheCopy) return false;
123c908ca17SChandler Carruth
124c908ca17SChandler Carruth // If the pointer has been offset from the start of the alloca, we can't
125c908ca17SChandler Carruth // safely handle this.
126c908ca17SChandler Carruth if (IsOffset) return false;
127c908ca17SChandler Carruth
128c908ca17SChandler Carruth // If the memintrinsic isn't using the alloca as the dest, reject it.
129cdf47884SChandler Carruth if (U.getOperandNo() != 0) return false;
130c908ca17SChandler Carruth
131c908ca17SChandler Carruth // If the source of the memcpy/move is not a constant global, reject it.
13216295d52SMatt Arsenault if (!AA->pointsToConstantMemory(MI->getSource()))
133c908ca17SChandler Carruth return false;
134c908ca17SChandler Carruth
135c908ca17SChandler Carruth // Otherwise, the transform is safe. Remember the copy instruction.
136c908ca17SChandler Carruth TheCopy = MI;
137c908ca17SChandler Carruth }
138813dab2fSReid Kleckner }
139c908ca17SChandler Carruth return true;
140c908ca17SChandler Carruth }
141c908ca17SChandler Carruth
142c908ca17SChandler Carruth /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
143c908ca17SChandler Carruth /// modified by a copy from a constant global. If we can prove this, we can
144c908ca17SChandler Carruth /// replace any uses of the alloca with uses of the global directly.
145c908ca17SChandler Carruth static MemTransferInst *
isOnlyCopiedFromConstantMemory(AAResults * AA,AllocaInst * AI,SmallVectorImpl<Instruction * > & ToDelete)1466c6adde8SSimon Pilgrim isOnlyCopiedFromConstantMemory(AAResults *AA,
14716295d52SMatt Arsenault AllocaInst *AI,
148c908ca17SChandler Carruth SmallVectorImpl<Instruction *> &ToDelete) {
149f40110f4SCraig Topper MemTransferInst *TheCopy = nullptr;
15016295d52SMatt Arsenault if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete))
151c908ca17SChandler Carruth return TheCopy;
152f40110f4SCraig Topper return nullptr;
153c908ca17SChandler Carruth }
154c908ca17SChandler Carruth
155df19ad45SVitaly Buka /// Returns true if V is dereferenceable for size of alloca.
isDereferenceableForAllocaSize(const Value * V,const AllocaInst * AI,const DataLayout & DL)156df19ad45SVitaly Buka static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
157df19ad45SVitaly Buka const DataLayout &DL) {
158df19ad45SVitaly Buka if (AI->isArrayAllocation())
159df19ad45SVitaly Buka return false;
160df19ad45SVitaly Buka uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
161df19ad45SVitaly Buka if (!AllocaSize)
162df19ad45SVitaly Buka return false;
1631172712fSArthur Eubanks return isDereferenceableAndAlignedPointer(V, AI->getAlign(),
164df19ad45SVitaly Buka APInt(64, AllocaSize), DL);
165df19ad45SVitaly Buka }
166df19ad45SVitaly Buka
simplifyAllocaArraySize(InstCombinerImpl & IC,AllocaInst & AI)1672a6c8715SSebastian Neubauer static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
1682a6c8715SSebastian Neubauer AllocaInst &AI) {
169720762e2SDuncan P. N. Exon Smith // Check for array size of 1 (scalar allocation).
170be95b4afSDuncan P. N. Exon Smith if (!AI.isArrayAllocation()) {
171be95b4afSDuncan P. N. Exon Smith // i32 1 is the canonical array size for scalar allocations.
172be95b4afSDuncan P. N. Exon Smith if (AI.getArraySize()->getType()->isIntegerTy(32))
173720762e2SDuncan P. N. Exon Smith return nullptr;
174720762e2SDuncan P. N. Exon Smith
175be95b4afSDuncan P. N. Exon Smith // Canonicalize it.
176878cb38aSNikita Popov return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1));
177be95b4afSDuncan P. N. Exon Smith }
178be95b4afSDuncan P. N. Exon Smith
179a65e2f73SChris Lattner // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
180a65e2f73SChris Lattner if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
18182edf8d3SSimon Pilgrim if (C->getValue().getActiveBits() <= 64) {
182bb730135SDuncan P. N. Exon Smith Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
18328623796SMatt Arsenault AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
18428623796SMatt Arsenault nullptr, AI.getName());
1854f04db4bSEli Friedman New->setAlignment(AI.getAlign());
186a65e2f73SChris Lattner
187a65e2f73SChris Lattner // Scan to the end of the allocation instructions, to skip over a block of
188a65e2f73SChris Lattner // allocas if possible...also skip interleaved debug info
189a65e2f73SChris Lattner //
1909f8aaf21SDuncan P. N. Exon Smith BasicBlock::iterator It(New);
191bb730135SDuncan P. N. Exon Smith while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
192bb730135SDuncan P. N. Exon Smith ++It;
193a65e2f73SChris Lattner
194a65e2f73SChris Lattner // Now that I is pointing to the first non-allocation-inst in the block,
195a65e2f73SChris Lattner // insert our getelementptr instruction...
196a65e2f73SChris Lattner //
197c6820ec1SDuncan P. N. Exon Smith Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
1989e3a6ca6SMatt Arsenault Value *NullIdx = Constant::getNullValue(IdxTy);
199640ff9dbSMatt Arsenault Value *Idx[2] = {NullIdx, NullIdx};
20028623796SMatt Arsenault Instruction *GEP = GetElementPtrInst::CreateInBounds(
2017716075aSJames Y Knight NewTy, New, Idx, New->getName() + ".sub");
20228623796SMatt Arsenault IC.InsertNewInstBefore(GEP, *It);
203a65e2f73SChris Lattner
204a65e2f73SChris Lattner // Now make everything use the getelementptr instead of the original
205a65e2f73SChris Lattner // allocation.
20628623796SMatt Arsenault return IC.replaceInstUsesWith(AI, GEP);
207bb730135SDuncan P. N. Exon Smith }
20882edf8d3SSimon Pilgrim }
209bb730135SDuncan P. N. Exon Smith
210bb730135SDuncan P. N. Exon Smith if (isa<UndefValue>(AI.getArraySize()))
2114b198802SSanjay Patel return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
212a65e2f73SChris Lattner
21307ff9b03SDuncan P. N. Exon Smith // Ensure that the alloca array size argument has type intptr_t, so that
21407ff9b03SDuncan P. N. Exon Smith // any casting is exposed early.
21507ff9b03SDuncan P. N. Exon Smith Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
21607ff9b03SDuncan P. N. Exon Smith if (AI.getArraySize()->getType() != IntPtrTy) {
217bb4069e4SCraig Topper Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
218878cb38aSNikita Popov return IC.replaceOperand(AI, 0, V);
21907ff9b03SDuncan P. N. Exon Smith }
22007ff9b03SDuncan P. N. Exon Smith
221c6820ec1SDuncan P. N. Exon Smith return nullptr;
222c6820ec1SDuncan P. N. Exon Smith }
223c6820ec1SDuncan P. N. Exon Smith
22403ab8a36SBenjamin Kramer namespace {
225ba01ed00SYaxun Liu // If I and V are pointers in different address space, it is not allowed to
226ba01ed00SYaxun Liu // use replaceAllUsesWith since I and V have different types. A
227ba01ed00SYaxun Liu // non-target-specific transformation should not use addrspacecast on V since
228ba01ed00SYaxun Liu // the two address space may be disjoint depending on target.
229ba01ed00SYaxun Liu //
230ba01ed00SYaxun Liu // This class chases down uses of the old pointer until reaching the load
231ba01ed00SYaxun Liu // instructions, then replaces the old pointer in the load instructions with
232ba01ed00SYaxun Liu // the new pointer. If during the chasing it sees bitcast or GEP, it will
233ba01ed00SYaxun Liu // create new bitcast or GEP with the new pointer and use them in the load
234ba01ed00SYaxun Liu // instruction.
235ba01ed00SYaxun Liu class PointerReplacer {
236ba01ed00SYaxun Liu public:
PointerReplacer(InstCombinerImpl & IC)2372a6c8715SSebastian Neubauer PointerReplacer(InstCombinerImpl &IC) : IC(IC) {}
2386da31fa4SMatt Arsenault
2396da31fa4SMatt Arsenault bool collectUsers(Instruction &I);
240ba01ed00SYaxun Liu void replacePointer(Instruction &I, Value *V);
241ba01ed00SYaxun Liu
242ba01ed00SYaxun Liu private:
243ba01ed00SYaxun Liu void replace(Instruction *I);
244ba01ed00SYaxun Liu Value *getReplacement(Value *I);
245ba01ed00SYaxun Liu
2466da31fa4SMatt Arsenault SmallSetVector<Instruction *, 4> Worklist;
247ba01ed00SYaxun Liu MapVector<Value *, Value *> WorkMap;
2482a6c8715SSebastian Neubauer InstCombinerImpl &IC;
249ba01ed00SYaxun Liu };
25003ab8a36SBenjamin Kramer } // end anonymous namespace
251ba01ed00SYaxun Liu
collectUsers(Instruction & I)2526da31fa4SMatt Arsenault bool PointerReplacer::collectUsers(Instruction &I) {
253ba01ed00SYaxun Liu for (auto U : I.users()) {
254b4d945baSAndy Kaylor auto *Inst = cast<Instruction>(&*U);
255b4d945baSAndy Kaylor if (auto *Load = dyn_cast<LoadInst>(Inst)) {
2566da31fa4SMatt Arsenault if (Load->isVolatile())
2576da31fa4SMatt Arsenault return false;
2586da31fa4SMatt Arsenault Worklist.insert(Load);
259ba01ed00SYaxun Liu } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
2606da31fa4SMatt Arsenault Worklist.insert(Inst);
2616da31fa4SMatt Arsenault if (!collectUsers(*Inst))
2626da31fa4SMatt Arsenault return false;
263b4d945baSAndy Kaylor } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
264b4d945baSAndy Kaylor if (MI->isVolatile())
265b4d945baSAndy Kaylor return false;
2666da31fa4SMatt Arsenault Worklist.insert(Inst);
267b373b599SAndy Kaylor } else if (Inst->isLifetimeStartOrEnd()) {
268b373b599SAndy Kaylor continue;
269ba01ed00SYaxun Liu } else {
2706da31fa4SMatt Arsenault LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
2716da31fa4SMatt Arsenault return false;
272ba01ed00SYaxun Liu }
273ba01ed00SYaxun Liu }
2746da31fa4SMatt Arsenault
2756da31fa4SMatt Arsenault return true;
276ba01ed00SYaxun Liu }
277ba01ed00SYaxun Liu
getReplacement(Value * V)278789d2506SKazu Hirata Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
279ba01ed00SYaxun Liu
replace(Instruction * I)280ba01ed00SYaxun Liu void PointerReplacer::replace(Instruction *I) {
281ba01ed00SYaxun Liu if (getReplacement(I))
282ba01ed00SYaxun Liu return;
283ba01ed00SYaxun Liu
284ba01ed00SYaxun Liu if (auto *LT = dyn_cast<LoadInst>(I)) {
285ba01ed00SYaxun Liu auto *V = getReplacement(LT->getPointerOperand());
286ba01ed00SYaxun Liu assert(V && "Operand not replaced");
2876a9484f4SMatt Arsenault auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
2886a9484f4SMatt Arsenault LT->getAlign(), LT->getOrdering(),
2896a9484f4SMatt Arsenault LT->getSyncScopeID());
290ba01ed00SYaxun Liu NewI->takeName(LT);
2916a9484f4SMatt Arsenault copyMetadataForLoad(*NewI, *LT);
2926a9484f4SMatt Arsenault
293ba01ed00SYaxun Liu IC.InsertNewInstWith(NewI, *LT);
294ba01ed00SYaxun Liu IC.replaceInstUsesWith(*LT, NewI);
295ba01ed00SYaxun Liu WorkMap[LT] = NewI;
296ba01ed00SYaxun Liu } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
297ba01ed00SYaxun Liu auto *V = getReplacement(GEP->getPointerOperand());
298ba01ed00SYaxun Liu assert(V && "Operand not replaced");
299ba01ed00SYaxun Liu SmallVector<Value *, 8> Indices;
300ba01ed00SYaxun Liu Indices.append(GEP->idx_begin(), GEP->idx_end());
301d839afe3SNikita Popov auto *NewI =
302d839afe3SNikita Popov GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
303ba01ed00SYaxun Liu IC.InsertNewInstWith(NewI, *GEP);
304ba01ed00SYaxun Liu NewI->takeName(GEP);
305ba01ed00SYaxun Liu WorkMap[GEP] = NewI;
306ba01ed00SYaxun Liu } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
307ba01ed00SYaxun Liu auto *V = getReplacement(BC->getOperand(0));
308ba01ed00SYaxun Liu assert(V && "Operand not replaced");
309d839afe3SNikita Popov auto *NewT = PointerType::getWithSamePointeeType(
310d839afe3SNikita Popov cast<PointerType>(BC->getType()),
311ba01ed00SYaxun Liu V->getType()->getPointerAddressSpace());
312ba01ed00SYaxun Liu auto *NewI = new BitCastInst(V, NewT);
313ba01ed00SYaxun Liu IC.InsertNewInstWith(NewI, *BC);
314ba01ed00SYaxun Liu NewI->takeName(BC);
315e6d1ce59SYaxun Liu WorkMap[BC] = NewI;
3166da31fa4SMatt Arsenault } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
3176da31fa4SMatt Arsenault auto *SrcV = getReplacement(MemCpy->getRawSource());
3186da31fa4SMatt Arsenault // The pointer may appear in the destination of a copy, but we don't want to
3196da31fa4SMatt Arsenault // replace it.
3206da31fa4SMatt Arsenault if (!SrcV) {
3216da31fa4SMatt Arsenault assert(getReplacement(MemCpy->getRawDest()) &&
3226da31fa4SMatt Arsenault "destination not in replace list");
3236da31fa4SMatt Arsenault return;
3246da31fa4SMatt Arsenault }
3256da31fa4SMatt Arsenault
3266da31fa4SMatt Arsenault IC.Builder.SetInsertPoint(MemCpy);
3276da31fa4SMatt Arsenault auto *NewI = IC.Builder.CreateMemTransferInst(
3286da31fa4SMatt Arsenault MemCpy->getIntrinsicID(), MemCpy->getRawDest(), MemCpy->getDestAlign(),
3296da31fa4SMatt Arsenault SrcV, MemCpy->getSourceAlign(), MemCpy->getLength(),
3306da31fa4SMatt Arsenault MemCpy->isVolatile());
3310fc624f0SNikita Popov AAMDNodes AAMD = MemCpy->getAAMetadata();
3326da31fa4SMatt Arsenault if (AAMD)
3336da31fa4SMatt Arsenault NewI->setAAMetadata(AAMD);
3346da31fa4SMatt Arsenault
3356da31fa4SMatt Arsenault IC.eraseInstFromFunction(*MemCpy);
3366da31fa4SMatt Arsenault WorkMap[MemCpy] = NewI;
337ba01ed00SYaxun Liu } else {
338ba01ed00SYaxun Liu llvm_unreachable("should never reach here");
339ba01ed00SYaxun Liu }
340ba01ed00SYaxun Liu }
341ba01ed00SYaxun Liu
replacePointer(Instruction & I,Value * V)342ba01ed00SYaxun Liu void PointerReplacer::replacePointer(Instruction &I, Value *V) {
343684c87beSBenjamin Kramer #ifndef NDEBUG
344ba01ed00SYaxun Liu auto *PT = cast<PointerType>(I.getType());
345ba01ed00SYaxun Liu auto *NT = cast<PointerType>(V->getType());
346d839afe3SNikita Popov assert(PT != NT && PT->hasSameElementTypeAs(NT) && "Invalid usage");
347684c87beSBenjamin Kramer #endif
348ba01ed00SYaxun Liu WorkMap[&I] = V;
3496da31fa4SMatt Arsenault
3506da31fa4SMatt Arsenault for (Instruction *Workitem : Worklist)
3516da31fa4SMatt Arsenault replace(Workitem);
352ba01ed00SYaxun Liu }
353ba01ed00SYaxun Liu
visitAllocaInst(AllocaInst & AI)3542a6c8715SSebastian Neubauer Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
355c6820ec1SDuncan P. N. Exon Smith if (auto *I = simplifyAllocaArraySize(*this, AI))
356c6820ec1SDuncan P. N. Exon Smith return I;
357c6820ec1SDuncan P. N. Exon Smith
358a28d91d8SMehdi Amini if (AI.getAllocatedType()->isSized()) {
3598bc764aeSDuncan Sands // Move all alloca's of zero byte objects to the entry block and merge them
3608bc764aeSDuncan Sands // together. Note that we only do this for alloca's, because malloc should
3618bc764aeSDuncan Sands // allocate and return a unique pointer, even for a zero byte allocation.
3622ea54957SHuihui Zhang if (DL.getTypeAllocSize(AI.getAllocatedType()).getKnownMinSize() == 0) {
3638bc764aeSDuncan Sands // For a zero sized alloca there is no point in doing an array allocation.
3648bc764aeSDuncan Sands // This is helpful if the array size is a complicated expression not used
3658bc764aeSDuncan Sands // elsewhere.
366878cb38aSNikita Popov if (AI.isArrayAllocation())
367878cb38aSNikita Popov return replaceOperand(AI, 0,
368878cb38aSNikita Popov ConstantInt::get(AI.getArraySize()->getType(), 1));
3698bc764aeSDuncan Sands
3708bc764aeSDuncan Sands // Get the first instruction in the entry block.
3718bc764aeSDuncan Sands BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
3728bc764aeSDuncan Sands Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
3738bc764aeSDuncan Sands if (FirstInst != &AI) {
3748bc764aeSDuncan Sands // If the entry block doesn't start with a zero-size alloca then move
3758bc764aeSDuncan Sands // this one to the start of the entry block. There is no problem with
3768bc764aeSDuncan Sands // dominance as the array size was forced to a constant earlier already.
3778bc764aeSDuncan Sands AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
3788bc764aeSDuncan Sands if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
3792ea54957SHuihui Zhang DL.getTypeAllocSize(EntryAI->getAllocatedType())
3802ea54957SHuihui Zhang .getKnownMinSize() != 0) {
3818bc764aeSDuncan Sands AI.moveBefore(FirstInst);
3828bc764aeSDuncan Sands return &AI;
3838bc764aeSDuncan Sands }
3848bc764aeSDuncan Sands
3858bc764aeSDuncan Sands // Replace this zero-sized alloca with the one at the start of the entry
3868bc764aeSDuncan Sands // block after ensuring that the address will be aligned enough for both
3878bc764aeSDuncan Sands // types.
3884f04db4bSEli Friedman const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
3898bc764aeSDuncan Sands EntryAI->setAlignment(MaxAlign);
3908bc764aeSDuncan Sands if (AI.getType() != EntryAI->getType())
3918bc764aeSDuncan Sands return new BitCastInst(EntryAI, AI.getType());
3924b198802SSanjay Patel return replaceInstUsesWith(AI, EntryAI);
3938bc764aeSDuncan Sands }
3948bc764aeSDuncan Sands }
395a65e2f73SChris Lattner }
396a65e2f73SChris Lattner
397c908ca17SChandler Carruth // Check to see if this allocation is only modified by a memcpy/memmove from
39816295d52SMatt Arsenault // a constant whose alignment is equal to or exceeds that of the allocation.
39916295d52SMatt Arsenault // If this is the case, we can change all users to use the constant global
40016295d52SMatt Arsenault // instead. This is commonly produced by the CFE by constructs like "void
40116295d52SMatt Arsenault // foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' is only subsequently
40216295d52SMatt Arsenault // read.
403c908ca17SChandler Carruth SmallVector<Instruction *, 4> ToDelete;
40416295d52SMatt Arsenault if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
4056da31fa4SMatt Arsenault Value *TheSrc = Copy->getSource();
406a4953db5SNikita Popov Align AllocaAlign = AI.getAlign();
40768b2e507SCraig Topper Align SourceAlign = getOrEnforceKnownAlignment(
4086da31fa4SMatt Arsenault TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
409a4953db5SNikita Popov if (AllocaAlign <= SourceAlign &&
4102fea5d5dSRoman Lebedev isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
411cf416167SBenjamin Kramer !isa<Instruction>(TheSrc)) {
412cf416167SBenjamin Kramer // FIXME: Can we sink instructions without violating dominance when TheSrc
413cf416167SBenjamin Kramer // is an instruction instead of a constant or argument?
414d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
415d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
4166da31fa4SMatt Arsenault unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
4176da31fa4SMatt Arsenault auto *DestTy = PointerType::get(AI.getAllocatedType(), SrcAddrSpace);
4186da31fa4SMatt Arsenault if (AI.getType()->getAddressSpace() == SrcAddrSpace) {
4196da31fa4SMatt Arsenault for (Instruction *Delete : ToDelete)
4206da31fa4SMatt Arsenault eraseInstFromFunction(*Delete);
4216da31fa4SMatt Arsenault
4226da31fa4SMatt Arsenault Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
4234b198802SSanjay Patel Instruction *NewI = replaceInstUsesWith(AI, Cast);
4244b198802SSanjay Patel eraseInstFromFunction(*Copy);
425c908ca17SChandler Carruth ++NumGlobalCopies;
426c908ca17SChandler Carruth return NewI;
42759bc99a0SMatt Arsenault }
42859bc99a0SMatt Arsenault
429ba01ed00SYaxun Liu PointerReplacer PtrReplacer(*this);
4306da31fa4SMatt Arsenault if (PtrReplacer.collectUsers(AI)) {
4316da31fa4SMatt Arsenault for (Instruction *Delete : ToDelete)
4326da31fa4SMatt Arsenault eraseInstFromFunction(*Delete);
4336da31fa4SMatt Arsenault
4346da31fa4SMatt Arsenault Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
435ba01ed00SYaxun Liu PtrReplacer.replacePointer(AI, Cast);
436ba01ed00SYaxun Liu ++NumGlobalCopies;
437ba01ed00SYaxun Liu }
438c908ca17SChandler Carruth }
4396da31fa4SMatt Arsenault }
440c908ca17SChandler Carruth
44195cc4f3cSNuno Lopes // At last, use the generic allocation site handler to aggressively remove
44295cc4f3cSNuno Lopes // unused allocas.
44395cc4f3cSNuno Lopes return visitAllocSite(AI);
444a65e2f73SChris Lattner }
445a65e2f73SChris Lattner
44689e92d21SPhilip Reames // Are we allowed to form a atomic load or store of this type?
isSupportedAtomicType(Type * Ty)44789e92d21SPhilip Reames static bool isSupportedAtomicType(Type *Ty) {
448b3091da3SVedant Kumar return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
44989e92d21SPhilip Reames }
45089e92d21SPhilip Reames
4515f8f34e4SAdrian Prantl /// Helper to combine a load to a new type.
452bc6378deSChandler Carruth ///
453bc6378deSChandler Carruth /// This just does the work of combining a load to a new type. It handles
454bc6378deSChandler Carruth /// metadata, etc., and returns the new instruction. The \c NewTy should be the
455bc6378deSChandler Carruth /// loaded *value* type. This will convert it to a pointer, cast the operand to
456bc6378deSChandler Carruth /// that pointer type, load it, etc.
457bc6378deSChandler Carruth ///
458bc6378deSChandler Carruth /// Note that this will create all of the instructions with whatever insert
4592a6c8715SSebastian Neubauer /// point the \c InstCombinerImpl currently is using.
combineLoadToNewType(LoadInst & LI,Type * NewTy,const Twine & Suffix)4602a6c8715SSebastian Neubauer LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy,
461b4dd928fSNikita Popov const Twine &Suffix) {
46289e92d21SPhilip Reames assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
46389e92d21SPhilip Reames "can't fold an atomic load to requested type");
46489e92d21SPhilip Reames
465bc6378deSChandler Carruth Value *Ptr = LI.getPointerOperand();
466bc6378deSChandler Carruth unsigned AS = LI.getPointerAddressSpace();
4677bb7fa12SNikita Popov Type *NewPtrTy = NewTy->getPointerTo(AS);
4687c9ad0dbSAlexey Bataev Value *NewPtr = nullptr;
4697c9ad0dbSAlexey Bataev if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
4707bb7fa12SNikita Popov NewPtr->getType() == NewPtrTy))
4717bb7fa12SNikita Popov NewPtr = Builder.CreateBitCast(Ptr, NewPtrTy);
4727c9ad0dbSAlexey Bataev
473b4dd928fSNikita Popov LoadInst *NewLoad = Builder.CreateAlignedLoad(
474604f4497SNikita Popov NewTy, NewPtr, LI.getAlign(), LI.isVolatile(), LI.getName() + Suffix);
475bb80d3e1SKonstantin Zhuravlyov NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
47686e9f9dcSSanjay Patel copyMetadataForLoad(*NewLoad, LI);
477bc6378deSChandler Carruth return NewLoad;
478bc6378deSChandler Carruth }
479bc6378deSChandler Carruth
4805f8f34e4SAdrian Prantl /// Combine a store to a new type.
481fa11d837SChandler Carruth ///
482fa11d837SChandler Carruth /// Returns the newly created store instruction.
combineStoreToNewValue(InstCombinerImpl & IC,StoreInst & SI,Value * V)4832a6c8715SSebastian Neubauer static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI,
4842a6c8715SSebastian Neubauer Value *V) {
48589e92d21SPhilip Reames assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
48689e92d21SPhilip Reames "can't fold an atomic store of requested type");
48789e92d21SPhilip Reames
488fa11d837SChandler Carruth Value *Ptr = SI.getPointerOperand();
489fa11d837SChandler Carruth unsigned AS = SI.getPointerAddressSpace();
490fa11d837SChandler Carruth SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
491fa11d837SChandler Carruth SI.getAllMetadata(MD);
492fa11d837SChandler Carruth
493bb4069e4SCraig Topper StoreInst *NewStore = IC.Builder.CreateAlignedStore(
494bb4069e4SCraig Topper V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
49559f95222SGuillaume Chatelet SI.getAlign(), SI.isVolatile());
496bb80d3e1SKonstantin Zhuravlyov NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
497fa11d837SChandler Carruth for (const auto &MDPair : MD) {
498fa11d837SChandler Carruth unsigned ID = MDPair.first;
499fa11d837SChandler Carruth MDNode *N = MDPair.second;
500fa11d837SChandler Carruth // Note, essentially every kind of metadata should be preserved here! This
501fa11d837SChandler Carruth // routine is supposed to clone a store instruction changing *only its
502fa11d837SChandler Carruth // type*. The only metadata it makes sense to drop is metadata which is
503fa11d837SChandler Carruth // invalidated when the pointer type changes. This should essentially
504fa11d837SChandler Carruth // never be the case in LLVM, but we explicitly switch over only known
505fa11d837SChandler Carruth // metadata to be conservatively correct. If you are adding metadata to
506fa11d837SChandler Carruth // LLVM which pertains to stores, you almost certainly want to add it
507fa11d837SChandler Carruth // here.
508fa11d837SChandler Carruth switch (ID) {
509fa11d837SChandler Carruth case LLVMContext::MD_dbg:
510fa11d837SChandler Carruth case LLVMContext::MD_tbaa:
511fa11d837SChandler Carruth case LLVMContext::MD_prof:
512fa11d837SChandler Carruth case LLVMContext::MD_fpmath:
513fa11d837SChandler Carruth case LLVMContext::MD_tbaa_struct:
514fa11d837SChandler Carruth case LLVMContext::MD_alias_scope:
515fa11d837SChandler Carruth case LLVMContext::MD_noalias:
516fa11d837SChandler Carruth case LLVMContext::MD_nontemporal:
517fa11d837SChandler Carruth case LLVMContext::MD_mem_parallel_loop_access:
51819942710SMichael Kruse case LLVMContext::MD_access_group:
519fa11d837SChandler Carruth // All of these directly apply.
520fa11d837SChandler Carruth NewStore->setMetadata(ID, N);
521fa11d837SChandler Carruth break;
522fa11d837SChandler Carruth case LLVMContext::MD_invariant_load:
52387fdafc7SChandler Carruth case LLVMContext::MD_nonnull:
52462a0ec16SJuneyoung Lee case LLVMContext::MD_noundef:
525fa11d837SChandler Carruth case LLVMContext::MD_range:
5265c5011d5SArtur Pilipenko case LLVMContext::MD_align:
5275c5011d5SArtur Pilipenko case LLVMContext::MD_dereferenceable:
5285c5011d5SArtur Pilipenko case LLVMContext::MD_dereferenceable_or_null:
52987fdafc7SChandler Carruth // These don't apply for stores.
530fa11d837SChandler Carruth break;
531fa11d837SChandler Carruth }
532fa11d837SChandler Carruth }
533fa11d837SChandler Carruth
534fa11d837SChandler Carruth return NewStore;
535fa11d837SChandler Carruth }
536fa11d837SChandler Carruth
537ec95c6ccSAlexey Bataev /// Returns true if instruction represent minmax pattern like:
538ec95c6ccSAlexey Bataev /// select ((cmp load V1, load V2), V1, V2).
isMinMaxWithLoads(Value * V,Type * & LoadTy)53902f644c5SCraig Topper static bool isMinMaxWithLoads(Value *V, Type *&LoadTy) {
540ec95c6ccSAlexey Bataev assert(V->getType()->isPointerTy() && "Expected pointer type.");
541ec95c6ccSAlexey Bataev // Ignore possible ty* to ixx* bitcast.
5422a6c8715SSebastian Neubauer V = InstCombiner::peekThroughBitcast(V);
543ec95c6ccSAlexey Bataev // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
544ec95c6ccSAlexey Bataev // pattern.
545ec95c6ccSAlexey Bataev CmpInst::Predicate Pred;
546ec95c6ccSAlexey Bataev Instruction *L1;
547ec95c6ccSAlexey Bataev Instruction *L2;
548ec95c6ccSAlexey Bataev Value *LHS;
549ec95c6ccSAlexey Bataev Value *RHS;
550ec95c6ccSAlexey Bataev if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
551ec95c6ccSAlexey Bataev m_Value(LHS), m_Value(RHS))))
552ec95c6ccSAlexey Bataev return false;
55302f644c5SCraig Topper LoadTy = L1->getType();
554ec95c6ccSAlexey Bataev return (match(L1, m_Load(m_Specific(LHS))) &&
555ec95c6ccSAlexey Bataev match(L2, m_Load(m_Specific(RHS)))) ||
556ec95c6ccSAlexey Bataev (match(L1, m_Load(m_Specific(RHS))) &&
557ec95c6ccSAlexey Bataev match(L2, m_Load(m_Specific(LHS))));
558ec95c6ccSAlexey Bataev }
559ec95c6ccSAlexey Bataev
5605f8f34e4SAdrian Prantl /// Combine loads to match the type of their uses' value after looking
5612f75fcfeSChandler Carruth /// through intervening bitcasts.
5622f75fcfeSChandler Carruth ///
5632f75fcfeSChandler Carruth /// The core idea here is that if the result of a load is used in an operation,
5642f75fcfeSChandler Carruth /// we should load the type most conducive to that operation. For example, when
5652f75fcfeSChandler Carruth /// loading an integer and converting that immediately to a pointer, we should
5662f75fcfeSChandler Carruth /// instead directly load a pointer.
5672f75fcfeSChandler Carruth ///
5682f75fcfeSChandler Carruth /// However, this routine must never change the width of a load or the number of
5692f75fcfeSChandler Carruth /// loads as that would introduce a semantic change. This combine is expected to
5702f75fcfeSChandler Carruth /// be a semantic no-op which just allows loads to more closely model the types
5712f75fcfeSChandler Carruth /// of their consuming operations.
5722f75fcfeSChandler Carruth ///
5732f75fcfeSChandler Carruth /// Currently, we also refuse to change the precise type used for an atomic load
5742f75fcfeSChandler Carruth /// or a volatile load. This is debatable, and might be reasonable to change
5752f75fcfeSChandler Carruth /// later. However, it is risky in case some backend or other part of LLVM is
5762f75fcfeSChandler Carruth /// relying on the exact type loaded to select appropriate atomic operations.
combineLoadToOperationType(InstCombinerImpl & IC,LoadInst & LI)5772a6c8715SSebastian Neubauer static Instruction *combineLoadToOperationType(InstCombinerImpl &IC,
5782a6c8715SSebastian Neubauer LoadInst &LI) {
5796f4d0088SPhilip Reames // FIXME: We could probably with some care handle both volatile and ordered
5806f4d0088SPhilip Reames // atomic loads here but it isn't clear that this is important.
5816f4d0088SPhilip Reames if (!LI.isUnordered())
582f40110f4SCraig Topper return nullptr;
583a65e2f73SChris Lattner
5842f75fcfeSChandler Carruth if (LI.use_empty())
5852f75fcfeSChandler Carruth return nullptr;
586a65e2f73SChris Lattner
5875d335559SArnold Schwaighofer // swifterror values can't be bitcasted.
5885d335559SArnold Schwaighofer if (LI.getPointerOperand()->isSwiftError())
5895d335559SArnold Schwaighofer return nullptr;
5905d335559SArnold Schwaighofer
591a28d91d8SMehdi Amini const DataLayout &DL = IC.getDataLayout();
592cd8522efSChandler Carruth
5932f75fcfeSChandler Carruth // Fold away bit casts of the loaded value by loading the desired type.
594544a6aa2SRoman Lebedev // Note that we should not do this for pointer<->integer casts,
595544a6aa2SRoman Lebedev // because that would result in type punning.
596055644ccSLuo, Yuanke if (LI.hasOneUse()) {
597055644ccSLuo, Yuanke // Don't transform when the type is x86_amx, it makes the pass that lower
598055644ccSLuo, Yuanke // x86_amx type happy.
599055644ccSLuo, Yuanke if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) {
600055644ccSLuo, Yuanke assert(!LI.getType()->isX86_AMXTy() &&
601055644ccSLuo, Yuanke "load from x86_amx* should not happen!");
602055644ccSLuo, Yuanke if (BC->getType()->isX86_AMXTy())
603055644ccSLuo, Yuanke return nullptr;
604055644ccSLuo, Yuanke }
605055644ccSLuo, Yuanke
60689e92d21SPhilip Reames if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
607544a6aa2SRoman Lebedev if (CI->isNoopCast(DL) && LI.getType()->isPtrOrPtrVectorTy() ==
608544a6aa2SRoman Lebedev CI->getDestTy()->isPtrOrPtrVectorTy())
60989e92d21SPhilip Reames if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
610b4dd928fSNikita Popov LoadInst *NewLoad = IC.combineLoadToNewType(LI, CI->getDestTy());
611490cfbe2SQuentin Colombet CI->replaceAllUsesWith(NewLoad);
612490cfbe2SQuentin Colombet IC.eraseInstFromFunction(*CI);
6132f75fcfeSChandler Carruth return &LI;
614a65e2f73SChris Lattner }
615055644ccSLuo, Yuanke }
616a65e2f73SChris Lattner
617a7f247eaSChandler Carruth // FIXME: We should also canonicalize loads of vectors when their elements are
618a7f247eaSChandler Carruth // cast to other types.
619f40110f4SCraig Topper return nullptr;
620a65e2f73SChris Lattner }
621a65e2f73SChris Lattner
unpackLoadToAggregate(InstCombinerImpl & IC,LoadInst & LI)6222a6c8715SSebastian Neubauer static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
6232668a487SMehdi Amini // FIXME: We could probably with some care handle both volatile and atomic
6242668a487SMehdi Amini // stores here but it isn't clear that this is important.
6252668a487SMehdi Amini if (!LI.isSimple())
6262668a487SMehdi Amini return nullptr;
6272668a487SMehdi Amini
6282668a487SMehdi Amini Type *T = LI.getType();
6292668a487SMehdi Amini if (!T->isAggregateType())
6302668a487SMehdi Amini return nullptr;
6312668a487SMehdi Amini
632c1263534SBenjamin Kramer StringRef Name = LI.getName();
6332668a487SMehdi Amini
6342668a487SMehdi Amini if (auto *ST = dyn_cast<StructType>(T)) {
6352668a487SMehdi Amini // If the struct only have one element, we unpack.
63661a7d629SAmaury Sechet auto NumElements = ST->getNumElements();
63761a7d629SAmaury Sechet if (NumElements == 1) {
638b4dd928fSNikita Popov LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
6392668a487SMehdi Amini ".unpack");
6400fc624f0SNikita Popov NewLoad->setAAMetadata(LI.getAAMetadata());
641bb4069e4SCraig Topper return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
64261a7d629SAmaury Sechet UndefValue::get(T), NewLoad, 0, Name));
6432668a487SMehdi Amini }
6441c131b37SMehdi Amini
6451c131b37SMehdi Amini // We don't want to break loads with padding here as we'd loose
6461c131b37SMehdi Amini // the knowledge that padding exists for the rest of the pipeline.
6471c131b37SMehdi Amini const DataLayout &DL = IC.getDataLayout();
6481c131b37SMehdi Amini auto *SL = DL.getStructLayout(ST);
6491c131b37SMehdi Amini if (SL->hasPadding())
6501c131b37SMehdi Amini return nullptr;
6511c131b37SMehdi Amini
652604f4497SNikita Popov const auto Align = LI.getAlign();
6531c131b37SMehdi Amini auto *Addr = LI.getPointerOperand();
65461a7d629SAmaury Sechet auto *IdxType = Type::getInt32Ty(T->getContext());
6551c131b37SMehdi Amini auto *Zero = ConstantInt::get(IdxType, 0);
65661a7d629SAmaury Sechet
65761a7d629SAmaury Sechet Value *V = UndefValue::get(T);
65861a7d629SAmaury Sechet for (unsigned i = 0; i < NumElements; i++) {
6591c131b37SMehdi Amini Value *Indices[2] = {
6601c131b37SMehdi Amini Zero,
6611c131b37SMehdi Amini ConstantInt::get(IdxType, i),
6621c131b37SMehdi Amini };
663bb4069e4SCraig Topper auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
664c1263534SBenjamin Kramer Name + ".elt");
665279fa8e0SGuillaume Chatelet auto *L = IC.Builder.CreateAlignedLoad(
666279fa8e0SGuillaume Chatelet ST->getElementType(i), Ptr,
667279fa8e0SGuillaume Chatelet commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack");
668a236dae5SKeno Fischer // Propagate AA metadata. It'll still be valid on the narrowed load.
6690fc624f0SNikita Popov L->setAAMetadata(LI.getAAMetadata());
670bb4069e4SCraig Topper V = IC.Builder.CreateInsertValue(V, L, i);
6711c131b37SMehdi Amini }
6721c131b37SMehdi Amini
6731c131b37SMehdi Amini V->setName(Name);
6744b198802SSanjay Patel return IC.replaceInstUsesWith(LI, V);
6752668a487SMehdi Amini }
6762668a487SMehdi Amini
67758fb038bSDavid Majnemer if (auto *AT = dyn_cast<ArrayType>(T)) {
6787cd3fe7dSAmaury Sechet auto *ET = AT->getElementType();
6797cd3fe7dSAmaury Sechet auto NumElements = AT->getNumElements();
6807cd3fe7dSAmaury Sechet if (NumElements == 1) {
681b4dd928fSNikita Popov LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
6820fc624f0SNikita Popov NewLoad->setAAMetadata(LI.getAAMetadata());
683bb4069e4SCraig Topper return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
6847cd3fe7dSAmaury Sechet UndefValue::get(T), NewLoad, 0, Name));
68558fb038bSDavid Majnemer }
6867cd3fe7dSAmaury Sechet
687da114122SDavide Italiano // Bail out if the array is too large. Ideally we would like to optimize
688da114122SDavide Italiano // arrays of arbitrary size but this has a terrible impact on compile time.
689da114122SDavide Italiano // The threshold here is chosen arbitrarily, maybe needs a little bit of
690da114122SDavide Italiano // tuning.
6912133bf55SDavide Italiano if (NumElements > IC.MaxArraySizeForCombine)
692da114122SDavide Italiano return nullptr;
693da114122SDavide Italiano
6947cd3fe7dSAmaury Sechet const DataLayout &DL = IC.getDataLayout();
6957cd3fe7dSAmaury Sechet auto EltSize = DL.getTypeAllocSize(ET);
696604f4497SNikita Popov const auto Align = LI.getAlign();
6977cd3fe7dSAmaury Sechet
6987cd3fe7dSAmaury Sechet auto *Addr = LI.getPointerOperand();
6997cd3fe7dSAmaury Sechet auto *IdxType = Type::getInt64Ty(T->getContext());
7007cd3fe7dSAmaury Sechet auto *Zero = ConstantInt::get(IdxType, 0);
7017cd3fe7dSAmaury Sechet
7027cd3fe7dSAmaury Sechet Value *V = UndefValue::get(T);
7037cd3fe7dSAmaury Sechet uint64_t Offset = 0;
7047cd3fe7dSAmaury Sechet for (uint64_t i = 0; i < NumElements; i++) {
7057cd3fe7dSAmaury Sechet Value *Indices[2] = {
7067cd3fe7dSAmaury Sechet Zero,
7077cd3fe7dSAmaury Sechet ConstantInt::get(IdxType, i),
7087cd3fe7dSAmaury Sechet };
709bb4069e4SCraig Topper auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
710c1263534SBenjamin Kramer Name + ".elt");
711279fa8e0SGuillaume Chatelet auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
712279fa8e0SGuillaume Chatelet commonAlignment(Align, Offset),
713279fa8e0SGuillaume Chatelet Name + ".unpack");
7140fc624f0SNikita Popov L->setAAMetadata(LI.getAAMetadata());
715bb4069e4SCraig Topper V = IC.Builder.CreateInsertValue(V, L, i);
7167cd3fe7dSAmaury Sechet Offset += EltSize;
7177cd3fe7dSAmaury Sechet }
7187cd3fe7dSAmaury Sechet
7197cd3fe7dSAmaury Sechet V->setName(Name);
7207cd3fe7dSAmaury Sechet return IC.replaceInstUsesWith(LI, V);
72158fb038bSDavid Majnemer }
72258fb038bSDavid Majnemer
7232668a487SMehdi Amini return nullptr;
7242668a487SMehdi Amini }
7252668a487SMehdi Amini
726847e05f5SHal Finkel // If we can determine that all possible objects pointed to by the provided
727847e05f5SHal Finkel // pointer value are, not only dereferenceable, but also definitively less than
728847e05f5SHal Finkel // or equal to the provided maximum size, then return true. Otherwise, return
729847e05f5SHal Finkel // false (constant global values and allocas fall into this category).
730847e05f5SHal Finkel //
731847e05f5SHal Finkel // FIXME: This should probably live in ValueTracking (or similar).
isObjectSizeLessThanOrEq(Value * V,uint64_t MaxSize,const DataLayout & DL)732847e05f5SHal Finkel static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
733a28d91d8SMehdi Amini const DataLayout &DL) {
734847e05f5SHal Finkel SmallPtrSet<Value *, 4> Visited;
735847e05f5SHal Finkel SmallVector<Value *, 4> Worklist(1, V);
736847e05f5SHal Finkel
737847e05f5SHal Finkel do {
738847e05f5SHal Finkel Value *P = Worklist.pop_back_val();
739847e05f5SHal Finkel P = P->stripPointerCasts();
740847e05f5SHal Finkel
741847e05f5SHal Finkel if (!Visited.insert(P).second)
742847e05f5SHal Finkel continue;
743847e05f5SHal Finkel
744847e05f5SHal Finkel if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
745847e05f5SHal Finkel Worklist.push_back(SI->getTrueValue());
746847e05f5SHal Finkel Worklist.push_back(SI->getFalseValue());
747847e05f5SHal Finkel continue;
748847e05f5SHal Finkel }
749847e05f5SHal Finkel
750847e05f5SHal Finkel if (PHINode *PN = dyn_cast<PHINode>(P)) {
751e53472deSKazu Hirata append_range(Worklist, PN->incoming_values());
752847e05f5SHal Finkel continue;
753847e05f5SHal Finkel }
754847e05f5SHal Finkel
755847e05f5SHal Finkel if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
75699042473SSanjoy Das if (GA->isInterposable())
757847e05f5SHal Finkel return false;
758847e05f5SHal Finkel Worklist.push_back(GA->getAliasee());
759847e05f5SHal Finkel continue;
760847e05f5SHal Finkel }
761847e05f5SHal Finkel
762847e05f5SHal Finkel // If we know how big this object is, and it is less than MaxSize, continue
763847e05f5SHal Finkel // searching. Otherwise, return false.
764847e05f5SHal Finkel if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
765847e05f5SHal Finkel if (!AI->getAllocatedType()->isSized())
766847e05f5SHal Finkel return false;
767847e05f5SHal Finkel
768847e05f5SHal Finkel ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
769847e05f5SHal Finkel if (!CS)
770847e05f5SHal Finkel return false;
771847e05f5SHal Finkel
772a28d91d8SMehdi Amini uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
773847e05f5SHal Finkel // Make sure that, even if the multiplication below would wrap as an
774847e05f5SHal Finkel // uint64_t, we still do the right thing.
775*6bec3e93SJay Foad if ((CS->getValue().zext(128) * APInt(128, TypeSize)).ugt(MaxSize))
776847e05f5SHal Finkel return false;
777847e05f5SHal Finkel continue;
778847e05f5SHal Finkel }
779847e05f5SHal Finkel
780847e05f5SHal Finkel if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
781847e05f5SHal Finkel if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
782847e05f5SHal Finkel return false;
783847e05f5SHal Finkel
7845f6eaac6SManuel Jacob uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
785847e05f5SHal Finkel if (InitSize > MaxSize)
786847e05f5SHal Finkel return false;
787847e05f5SHal Finkel continue;
788847e05f5SHal Finkel }
789847e05f5SHal Finkel
790847e05f5SHal Finkel return false;
791847e05f5SHal Finkel } while (!Worklist.empty());
792847e05f5SHal Finkel
793847e05f5SHal Finkel return true;
794847e05f5SHal Finkel }
795847e05f5SHal Finkel
796847e05f5SHal Finkel // If we're indexing into an object of a known size, and the outer index is
797847e05f5SHal Finkel // not a constant, but having any value but zero would lead to undefined
798847e05f5SHal Finkel // behavior, replace it with zero.
799847e05f5SHal Finkel //
800847e05f5SHal Finkel // For example, if we have:
801847e05f5SHal Finkel // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
802847e05f5SHal Finkel // ...
803847e05f5SHal Finkel // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
804847e05f5SHal Finkel // ... = load i32* %arrayidx, align 4
805847e05f5SHal Finkel // Then we know that we can replace %x in the GEP with i64 0.
806847e05f5SHal Finkel //
807847e05f5SHal Finkel // FIXME: We could fold any GEP index to zero that would cause UB if it were
808847e05f5SHal Finkel // not zero. Currently, we only handle the first such index. Also, we could
809847e05f5SHal Finkel // also search through non-zero constant indices if we kept track of the
810847e05f5SHal Finkel // offsets those indices implied.
canReplaceGEPIdxWithZero(InstCombinerImpl & IC,GetElementPtrInst * GEPI,Instruction * MemI,unsigned & Idx)8112a6c8715SSebastian Neubauer static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
8122a6c8715SSebastian Neubauer GetElementPtrInst *GEPI, Instruction *MemI,
8132a6c8715SSebastian Neubauer unsigned &Idx) {
814a28d91d8SMehdi Amini if (GEPI->getNumOperands() < 2)
815847e05f5SHal Finkel return false;
816847e05f5SHal Finkel
817847e05f5SHal Finkel // Find the first non-zero index of a GEP. If all indices are zero, return
818847e05f5SHal Finkel // one past the last index.
819847e05f5SHal Finkel auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
820847e05f5SHal Finkel unsigned I = 1;
821847e05f5SHal Finkel for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
822847e05f5SHal Finkel Value *V = GEPI->getOperand(I);
823847e05f5SHal Finkel if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
824847e05f5SHal Finkel if (CI->isZero())
825847e05f5SHal Finkel continue;
826847e05f5SHal Finkel
827847e05f5SHal Finkel break;
828847e05f5SHal Finkel }
829847e05f5SHal Finkel
830847e05f5SHal Finkel return I;
831847e05f5SHal Finkel };
832847e05f5SHal Finkel
833847e05f5SHal Finkel // Skip through initial 'zero' indices, and find the corresponding pointer
834847e05f5SHal Finkel // type. See if the next index is not a constant.
835847e05f5SHal Finkel Idx = FirstNZIdx(GEPI);
836847e05f5SHal Finkel if (Idx == GEPI->getNumOperands())
837847e05f5SHal Finkel return false;
838847e05f5SHal Finkel if (isa<Constant>(GEPI->getOperand(Idx)))
839847e05f5SHal Finkel return false;
840847e05f5SHal Finkel
841847e05f5SHal Finkel SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
8420f835055SJoe Ellis Type *SourceElementType = GEPI->getSourceElementType();
8430f835055SJoe Ellis // Size information about scalable vectors is not available, so we cannot
8440f835055SJoe Ellis // deduce whether indexing at n is undefined behaviour or not. Bail out.
8450f835055SJoe Ellis if (isa<ScalableVectorType>(SourceElementType))
8460f835055SJoe Ellis return false;
8470f835055SJoe Ellis
8480f835055SJoe Ellis Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
849847e05f5SHal Finkel if (!AllocTy || !AllocTy->isSized())
850847e05f5SHal Finkel return false;
851a28d91d8SMehdi Amini const DataLayout &DL = IC.getDataLayout();
8520f835055SJoe Ellis uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedSize();
853847e05f5SHal Finkel
854847e05f5SHal Finkel // If there are more indices after the one we might replace with a zero, make
855847e05f5SHal Finkel // sure they're all non-negative. If any of them are negative, the overall
856847e05f5SHal Finkel // address being computed might be before the base address determined by the
857847e05f5SHal Finkel // first non-zero index.
858847e05f5SHal Finkel auto IsAllNonNegative = [&]() {
859847e05f5SHal Finkel for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
8601a36b7d8SCraig Topper KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
8611a36b7d8SCraig Topper if (Known.isNonNegative())
862847e05f5SHal Finkel continue;
863847e05f5SHal Finkel return false;
864847e05f5SHal Finkel }
865847e05f5SHal Finkel
866847e05f5SHal Finkel return true;
867847e05f5SHal Finkel };
868847e05f5SHal Finkel
869847e05f5SHal Finkel // FIXME: If the GEP is not inbounds, and there are extra indices after the
870847e05f5SHal Finkel // one we'll replace, those could cause the address computation to wrap
871847e05f5SHal Finkel // (rendering the IsAllNonNegative() check below insufficient). We can do
872e9ffb45bSBruce Mitchener // better, ignoring zero indices (and other indices we can prove small
873847e05f5SHal Finkel // enough not to wrap).
874847e05f5SHal Finkel if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
875847e05f5SHal Finkel return false;
876847e05f5SHal Finkel
877847e05f5SHal Finkel // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
878847e05f5SHal Finkel // also known to be dereferenceable.
879847e05f5SHal Finkel return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
880847e05f5SHal Finkel IsAllNonNegative();
881847e05f5SHal Finkel }
882847e05f5SHal Finkel
883847e05f5SHal Finkel // If we're indexing into an object with a variable index for the memory
884847e05f5SHal Finkel // access, but the object has only one element, we can assume that the index
885847e05f5SHal Finkel // will always be zero. If we replace the GEP, return it.
886847e05f5SHal Finkel template <typename T>
replaceGEPIdxWithZero(InstCombinerImpl & IC,Value * Ptr,T & MemI)8872a6c8715SSebastian Neubauer static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
888847e05f5SHal Finkel T &MemI) {
889847e05f5SHal Finkel if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
890847e05f5SHal Finkel unsigned Idx;
891847e05f5SHal Finkel if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
892847e05f5SHal Finkel Instruction *NewGEPI = GEPI->clone();
893847e05f5SHal Finkel NewGEPI->setOperand(Idx,
894847e05f5SHal Finkel ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
895847e05f5SHal Finkel NewGEPI->insertBefore(GEPI);
896847e05f5SHal Finkel MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
897847e05f5SHal Finkel return NewGEPI;
898847e05f5SHal Finkel }
899847e05f5SHal Finkel }
900847e05f5SHal Finkel
901847e05f5SHal Finkel return nullptr;
902847e05f5SHal Finkel }
903847e05f5SHal Finkel
canSimplifyNullStoreOrGEP(StoreInst & SI)9042dd9835fSAnna Thomas static bool canSimplifyNullStoreOrGEP(StoreInst &SI) {
90590177912SNikita Popov if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
90690177912SNikita Popov return false;
90790177912SNikita Popov
90890177912SNikita Popov auto *Ptr = SI.getPointerOperand();
90990177912SNikita Popov if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
91090177912SNikita Popov Ptr = GEPI->getOperand(0);
91190177912SNikita Popov return (isa<ConstantPointerNull>(Ptr) &&
91290177912SNikita Popov !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
9132dd9835fSAnna Thomas }
9142dd9835fSAnna Thomas
canSimplifyNullLoadOrGEP(LoadInst & LI,Value * Op)915ffcb4df2SDavide Italiano static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
91690177912SNikita Popov if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
91790177912SNikita Popov const Value *GEPI0 = GEPI->getOperand(0);
918ef2f8433SNikita Popov if (isa<ConstantPointerNull>(GEPI0) &&
91990177912SNikita Popov !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
92090177912SNikita Popov return true;
92190177912SNikita Popov }
92290177912SNikita Popov if (isa<UndefValue>(Op) ||
92377eeac3dSManoj Gupta (isa<ConstantPointerNull>(Op) &&
92490177912SNikita Popov !NullPointerIsDefined(LI.getFunction(), LI.getPointerAddressSpace())))
92590177912SNikita Popov return true;
92690177912SNikita Popov return false;
927ffcb4df2SDavide Italiano }
928ffcb4df2SDavide Italiano
visitLoadInst(LoadInst & LI)9292a6c8715SSebastian Neubauer Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
930a65e2f73SChris Lattner Value *Op = LI.getOperand(0);
931a65e2f73SChris Lattner
9322f75fcfeSChandler Carruth // Try to canonicalize the loaded type.
9332f75fcfeSChandler Carruth if (Instruction *Res = combineLoadToOperationType(*this, LI))
9342f75fcfeSChandler Carruth return Res;
9352f75fcfeSChandler Carruth
936a65e2f73SChris Lattner // Attempt to improve the alignment.
93768b2e507SCraig Topper Align KnownAlign = getOrEnforceKnownAlignment(
93868b2e507SCraig Topper Op, DL.getPrefTypeAlign(LI.getType()), DL, &LI, &AC, &DT);
939604f4497SNikita Popov if (KnownAlign > LI.getAlign())
94068b2e507SCraig Topper LI.setAlignment(KnownAlign);
941a65e2f73SChris Lattner
942847e05f5SHal Finkel // Replace GEP indices if possible.
943847e05f5SHal Finkel if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
944e6c9ab4fSNikita Popov Worklist.push(NewGEPI);
945847e05f5SHal Finkel return &LI;
946847e05f5SHal Finkel }
947847e05f5SHal Finkel
9482668a487SMehdi Amini if (Instruction *Res = unpackLoadToAggregate(*this, LI))
9492668a487SMehdi Amini return Res;
9502668a487SMehdi Amini
951a65e2f73SChris Lattner // Do really simple store-to-load forwarding and load CSE, to catch cases
95275b5d27bSDuncan Sands // where there are several consecutive memory accesses to the same location,
953a65e2f73SChris Lattner // separated by a few arithmetic operations.
954bd254a6fSEli Friedman bool IsLoadCSE = false;
955e0615bcdSNikita Popov if (Value *AvailableVal = FindAvailableLoadedValue(&LI, *AA, &IsLoadCSE)) {
956b38ad88eSSanjay Patel if (IsLoadCSE)
957406f1ff1SFlorian Hahn combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
958a91fd099SBjorn Steinbrink
9594b198802SSanjay Patel return replaceInstUsesWith(
960bb4069e4SCraig Topper LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
9611a3c2c41SChandler Carruth LI.getName() + ".cast"));
962a91fd099SBjorn Steinbrink }
963a65e2f73SChris Lattner
9643ac07184SPhilip Reames // None of the following transforms are legal for volatile/ordered atomic
9653ac07184SPhilip Reames // loads. Most of them do apply for unordered atomics.
9663ac07184SPhilip Reames if (!LI.isUnordered()) return nullptr;
967ac55090eSPhilip Reames
968a65e2f73SChris Lattner // load(gep null, ...) -> unreachable
969ffcb4df2SDavide Italiano // load null/undef -> unreachable
970ffcb4df2SDavide Italiano // TODO: Consider a target hook for valid address spaces for this xforms.
971ffcb4df2SDavide Italiano if (canSimplifyNullLoadOrGEP(LI, Op)) {
972a65e2f73SChris Lattner // Insert a new store to null instruction before the load to indicate
973a65e2f73SChris Lattner // that this code is not reachable. We do this instead of inserting
974a65e2f73SChris Lattner // an unreachable instruction directly because we cannot modify the
975a65e2f73SChris Lattner // CFG.
976ce192cedSJuneyoung Lee StoreInst *SI = new StoreInst(PoisonValue::get(LI.getType()),
977a65e2f73SChris Lattner Constant::getNullValue(Op->getType()), &LI);
978984f1dc3SWeiming Zhao SI->setDebugLoc(LI.getDebugLoc());
979ce192cedSJuneyoung Lee return replaceInstUsesWith(LI, PoisonValue::get(LI.getType()));
980a65e2f73SChris Lattner }
981a65e2f73SChris Lattner
982a65e2f73SChris Lattner if (Op->hasOneUse()) {
983a65e2f73SChris Lattner // Change select and PHI nodes to select values instead of addresses: this
984a65e2f73SChris Lattner // helps alias analysis out a lot, allows many others simplifications, and
985a65e2f73SChris Lattner // exposes redundancy in the code.
986a65e2f73SChris Lattner //
987a65e2f73SChris Lattner // Note that we cannot do the transformation unless we know that the
988a65e2f73SChris Lattner // introduced loads cannot trap! Something like this is valid as long as
989a65e2f73SChris Lattner // the condition is always false: load (select bool %C, int* null, int* %G),
990a65e2f73SChris Lattner // but it would not be valid if we transformed it to load from null
991a65e2f73SChris Lattner // unconditionally.
992a65e2f73SChris Lattner //
993a65e2f73SChris Lattner if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
994a65e2f73SChris Lattner // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
995accc6b55SEli Friedman Align Alignment = LI.getAlign();
996301b4128SGuillaume Chatelet if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
997301b4128SGuillaume Chatelet Alignment, DL, SI) &&
998301b4128SGuillaume Chatelet isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
999301b4128SGuillaume Chatelet Alignment, DL, SI)) {
100014359ef1SJames Y Knight LoadInst *V1 =
100114359ef1SJames Y Knight Builder.CreateLoad(LI.getType(), SI->getOperand(1),
1002a65e2f73SChris Lattner SI->getOperand(1)->getName() + ".val");
100314359ef1SJames Y Knight LoadInst *V2 =
100414359ef1SJames Y Knight Builder.CreateLoad(LI.getType(), SI->getOperand(2),
1005a65e2f73SChris Lattner SI->getOperand(2)->getName() + ".val");
1006a98c7eadSPhilip Reames assert(LI.isUnordered() && "implied by above");
1007301b4128SGuillaume Chatelet V1->setAlignment(Alignment);
1008bb80d3e1SKonstantin Zhuravlyov V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1009301b4128SGuillaume Chatelet V2->setAlignment(Alignment);
1010bb80d3e1SKonstantin Zhuravlyov V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1011a65e2f73SChris Lattner return SelectInst::Create(SI->getCondition(), V1, V2);
1012a65e2f73SChris Lattner }
1013a65e2f73SChris Lattner
1014a65e2f73SChris Lattner // load (select (cond, null, P)) -> load P
10155ad26c35SPhilip Reames if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
101677eeac3dSManoj Gupta !NullPointerIsDefined(SI->getFunction(),
1017878cb38aSNikita Popov LI.getPointerAddressSpace()))
1018878cb38aSNikita Popov return replaceOperand(LI, 0, SI->getOperand(2));
1019a65e2f73SChris Lattner
1020a65e2f73SChris Lattner // load (select (cond, P, null)) -> load P
10215ad26c35SPhilip Reames if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
102277eeac3dSManoj Gupta !NullPointerIsDefined(SI->getFunction(),
1023878cb38aSNikita Popov LI.getPointerAddressSpace()))
1024878cb38aSNikita Popov return replaceOperand(LI, 0, SI->getOperand(1));
1025a65e2f73SChris Lattner }
1026a65e2f73SChris Lattner }
1027f40110f4SCraig Topper return nullptr;
1028a65e2f73SChris Lattner }
1029a65e2f73SChris Lattner
10305f8f34e4SAdrian Prantl /// Look for extractelement/insertvalue sequence that acts like a bitcast.
1031be0490a6SArch D. Robison ///
1032be0490a6SArch D. Robison /// \returns underlying value that was "cast", or nullptr otherwise.
1033be0490a6SArch D. Robison ///
1034be0490a6SArch D. Robison /// For example, if we have:
1035be0490a6SArch D. Robison ///
1036be0490a6SArch D. Robison /// %E0 = extractelement <2 x double> %U, i32 0
1037be0490a6SArch D. Robison /// %V0 = insertvalue [2 x double] undef, double %E0, 0
1038be0490a6SArch D. Robison /// %E1 = extractelement <2 x double> %U, i32 1
1039be0490a6SArch D. Robison /// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1040be0490a6SArch D. Robison ///
1041be0490a6SArch D. Robison /// and the layout of a <2 x double> is isomorphic to a [2 x double],
1042be0490a6SArch D. Robison /// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1043be0490a6SArch D. Robison /// Note that %U may contain non-undef values where %V1 has undef.
likeBitCastFromVector(InstCombinerImpl & IC,Value * V)10442a6c8715SSebastian Neubauer static Value *likeBitCastFromVector(InstCombinerImpl &IC, Value *V) {
1045be0490a6SArch D. Robison Value *U = nullptr;
1046be0490a6SArch D. Robison while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1047be0490a6SArch D. Robison auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1048be0490a6SArch D. Robison if (!E)
1049be0490a6SArch D. Robison return nullptr;
1050be0490a6SArch D. Robison auto *W = E->getVectorOperand();
1051be0490a6SArch D. Robison if (!U)
1052be0490a6SArch D. Robison U = W;
1053be0490a6SArch D. Robison else if (U != W)
1054be0490a6SArch D. Robison return nullptr;
1055be0490a6SArch D. Robison auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1056be0490a6SArch D. Robison if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1057be0490a6SArch D. Robison return nullptr;
1058be0490a6SArch D. Robison V = IV->getAggregateOperand();
1059be0490a6SArch D. Robison }
10601c10201dSJuneyoung Lee if (!match(V, m_Undef()) || !U)
1061be0490a6SArch D. Robison return nullptr;
1062be0490a6SArch D. Robison
1063be0490a6SArch D. Robison auto *UT = cast<VectorType>(U->getType());
1064be0490a6SArch D. Robison auto *VT = V->getType();
1065be0490a6SArch D. Robison // Check that types UT and VT are bitwise isomorphic.
1066be0490a6SArch D. Robison const auto &DL = IC.getDataLayout();
1067be0490a6SArch D. Robison if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1068be0490a6SArch D. Robison return nullptr;
1069be0490a6SArch D. Robison }
1070be0490a6SArch D. Robison if (auto *AT = dyn_cast<ArrayType>(VT)) {
1071640f20b0SChristopher Tetreault if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1072be0490a6SArch D. Robison return nullptr;
1073be0490a6SArch D. Robison } else {
1074be0490a6SArch D. Robison auto *ST = cast<StructType>(VT);
1075640f20b0SChristopher Tetreault if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1076be0490a6SArch D. Robison return nullptr;
1077be0490a6SArch D. Robison for (const auto *EltT : ST->elements()) {
1078be0490a6SArch D. Robison if (EltT != UT->getElementType())
1079be0490a6SArch D. Robison return nullptr;
1080be0490a6SArch D. Robison }
1081be0490a6SArch D. Robison }
1082be0490a6SArch D. Robison return U;
1083be0490a6SArch D. Robison }
1084be0490a6SArch D. Robison
10855f8f34e4SAdrian Prantl /// Combine stores to match the type of value being stored.
10862135b97dSChandler Carruth ///
10872135b97dSChandler Carruth /// The core idea here is that the memory does not have any intrinsic type and
10882135b97dSChandler Carruth /// where we can we should match the type of a store to the type of value being
10892135b97dSChandler Carruth /// stored.
10902135b97dSChandler Carruth ///
10912135b97dSChandler Carruth /// However, this routine must never change the width of a store or the number of
10922135b97dSChandler Carruth /// stores as that would introduce a semantic change. This combine is expected to
10932135b97dSChandler Carruth /// be a semantic no-op which just allows stores to more closely model the types
10942135b97dSChandler Carruth /// of their incoming values.
10952135b97dSChandler Carruth ///
10962135b97dSChandler Carruth /// Currently, we also refuse to change the precise type used for an atomic or
10972135b97dSChandler Carruth /// volatile store. This is debatable, and might be reasonable to change later.
10982135b97dSChandler Carruth /// However, it is risky in case some backend or other part of LLVM is relying
10992135b97dSChandler Carruth /// on the exact type stored to select appropriate atomic operations.
11002135b97dSChandler Carruth ///
11012135b97dSChandler Carruth /// \returns true if the store was successfully combined away. This indicates
11022135b97dSChandler Carruth /// the caller must erase the store instruction. We have to let the caller erase
1103e9ffb45bSBruce Mitchener /// the store instruction as otherwise there is no way to signal whether it was
11042135b97dSChandler Carruth /// combined or not: IC.EraseInstFromFunction returns a null pointer.
combineStoreToValueType(InstCombinerImpl & IC,StoreInst & SI)11052a6c8715SSebastian Neubauer static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) {
11066f4d0088SPhilip Reames // FIXME: We could probably with some care handle both volatile and ordered
11076f4d0088SPhilip Reames // atomic stores here but it isn't clear that this is important.
11086f4d0088SPhilip Reames if (!SI.isUnordered())
11092135b97dSChandler Carruth return false;
11102135b97dSChandler Carruth
11115d335559SArnold Schwaighofer // swifterror values can't be bitcasted.
11125d335559SArnold Schwaighofer if (SI.getPointerOperand()->isSwiftError())
11135d335559SArnold Schwaighofer return false;
11145d335559SArnold Schwaighofer
11152135b97dSChandler Carruth Value *V = SI.getValueOperand();
11162135b97dSChandler Carruth
11172135b97dSChandler Carruth // Fold away bit casts of the stored value by storing the original type.
11182135b97dSChandler Carruth if (auto *BC = dyn_cast<BitCastInst>(V)) {
1119055644ccSLuo, Yuanke assert(!BC->getType()->isX86_AMXTy() &&
1120055644ccSLuo, Yuanke "store to x86_amx* should not happen!");
11212135b97dSChandler Carruth V = BC->getOperand(0);
1122055644ccSLuo, Yuanke // Don't transform when the type is x86_amx, it makes the pass that lower
1123981a0bd8SLuo, Yuanke // x86_amx type happy.
1124055644ccSLuo, Yuanke if (V->getType()->isX86_AMXTy())
1125981a0bd8SLuo, Yuanke return false;
112689e92d21SPhilip Reames if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
11272135b97dSChandler Carruth combineStoreToNewValue(IC, SI, V);
1128816d26feSChandler Carruth return true;
1129a65e2f73SChris Lattner }
113089e92d21SPhilip Reames }
1131a65e2f73SChris Lattner
113289e92d21SPhilip Reames if (Value *U = likeBitCastFromVector(IC, V))
113389e92d21SPhilip Reames if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1134be0490a6SArch D. Robison combineStoreToNewValue(IC, SI, U);
1135be0490a6SArch D. Robison return true;
1136be0490a6SArch D. Robison }
1137be0490a6SArch D. Robison
1138c22d2998SJF Bastien // FIXME: We should also canonicalize stores of vectors when their elements
1139c22d2998SJF Bastien // are cast to other types.
1140816d26feSChandler Carruth return false;
1141a65e2f73SChris Lattner }
1142a65e2f73SChris Lattner
unpackStoreToAggregate(InstCombinerImpl & IC,StoreInst & SI)11432a6c8715SSebastian Neubauer static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
1144b344ac9aSMehdi Amini // FIXME: We could probably with some care handle both volatile and atomic
1145b344ac9aSMehdi Amini // stores here but it isn't clear that this is important.
1146b344ac9aSMehdi Amini if (!SI.isSimple())
1147b344ac9aSMehdi Amini return false;
1148b344ac9aSMehdi Amini
1149b344ac9aSMehdi Amini Value *V = SI.getValueOperand();
1150b344ac9aSMehdi Amini Type *T = V->getType();
1151b344ac9aSMehdi Amini
1152b344ac9aSMehdi Amini if (!T->isAggregateType())
1153b344ac9aSMehdi Amini return false;
1154b344ac9aSMehdi Amini
11552668a487SMehdi Amini if (auto *ST = dyn_cast<StructType>(T)) {
1156b344ac9aSMehdi Amini // If the struct only have one element, we unpack.
11571c131b37SMehdi Amini unsigned Count = ST->getNumElements();
11581c131b37SMehdi Amini if (Count == 1) {
1159bb4069e4SCraig Topper V = IC.Builder.CreateExtractValue(V, 0);
1160b344ac9aSMehdi Amini combineStoreToNewValue(IC, SI, V);
1161b344ac9aSMehdi Amini return true;
1162b344ac9aSMehdi Amini }
11631c131b37SMehdi Amini
11641c131b37SMehdi Amini // We don't want to break loads with padding here as we'd loose
11651c131b37SMehdi Amini // the knowledge that padding exists for the rest of the pipeline.
11661c131b37SMehdi Amini const DataLayout &DL = IC.getDataLayout();
11671c131b37SMehdi Amini auto *SL = DL.getStructLayout(ST);
11681c131b37SMehdi Amini if (SL->hasPadding())
11691c131b37SMehdi Amini return false;
11701c131b37SMehdi Amini
1171604f4497SNikita Popov const auto Align = SI.getAlign();
117261a7d629SAmaury Sechet
1173ec6b1fcfSNAKAMURA Takumi SmallString<16> EltName = V->getName();
1174ec6b1fcfSNAKAMURA Takumi EltName += ".elt";
11751c131b37SMehdi Amini auto *Addr = SI.getPointerOperand();
1176ec6b1fcfSNAKAMURA Takumi SmallString<16> AddrName = Addr->getName();
1177ec6b1fcfSNAKAMURA Takumi AddrName += ".repack";
117861a7d629SAmaury Sechet
11791c131b37SMehdi Amini auto *IdxType = Type::getInt32Ty(ST->getContext());
11801c131b37SMehdi Amini auto *Zero = ConstantInt::get(IdxType, 0);
11811c131b37SMehdi Amini for (unsigned i = 0; i < Count; i++) {
11821c131b37SMehdi Amini Value *Indices[2] = {
11831c131b37SMehdi Amini Zero,
11841c131b37SMehdi Amini ConstantInt::get(IdxType, i),
11851c131b37SMehdi Amini };
1186bb4069e4SCraig Topper auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
1187da71cb7bSAmaury Sechet AddrName);
1188bb4069e4SCraig Topper auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
118959f95222SGuillaume Chatelet auto EltAlign = commonAlignment(Align, SL->getElementOffset(i));
1190bb4069e4SCraig Topper llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
11910fc624f0SNikita Popov NS->setAAMetadata(SI.getAAMetadata());
11921c131b37SMehdi Amini }
11931c131b37SMehdi Amini
11941c131b37SMehdi Amini return true;
1195b344ac9aSMehdi Amini }
1196b344ac9aSMehdi Amini
11977536460cSDavid Majnemer if (auto *AT = dyn_cast<ArrayType>(T)) {
11987536460cSDavid Majnemer // If the array only have one element, we unpack.
11993b8b2ea2SAmaury Sechet auto NumElements = AT->getNumElements();
12003b8b2ea2SAmaury Sechet if (NumElements == 1) {
1201bb4069e4SCraig Topper V = IC.Builder.CreateExtractValue(V, 0);
12027536460cSDavid Majnemer combineStoreToNewValue(IC, SI, V);
12037536460cSDavid Majnemer return true;
12047536460cSDavid Majnemer }
12053b8b2ea2SAmaury Sechet
1206f6988d29SDavide Italiano // Bail out if the array is too large. Ideally we would like to optimize
1207f6988d29SDavide Italiano // arrays of arbitrary size but this has a terrible impact on compile time.
1208f6988d29SDavide Italiano // The threshold here is chosen arbitrarily, maybe needs a little bit of
1209f6988d29SDavide Italiano // tuning.
12102133bf55SDavide Italiano if (NumElements > IC.MaxArraySizeForCombine)
1211f6988d29SDavide Italiano return false;
1212f6988d29SDavide Italiano
12133b8b2ea2SAmaury Sechet const DataLayout &DL = IC.getDataLayout();
12143b8b2ea2SAmaury Sechet auto EltSize = DL.getTypeAllocSize(AT->getElementType());
1215604f4497SNikita Popov const auto Align = SI.getAlign();
12163b8b2ea2SAmaury Sechet
12173b8b2ea2SAmaury Sechet SmallString<16> EltName = V->getName();
12183b8b2ea2SAmaury Sechet EltName += ".elt";
12193b8b2ea2SAmaury Sechet auto *Addr = SI.getPointerOperand();
12203b8b2ea2SAmaury Sechet SmallString<16> AddrName = Addr->getName();
12213b8b2ea2SAmaury Sechet AddrName += ".repack";
12223b8b2ea2SAmaury Sechet
12233b8b2ea2SAmaury Sechet auto *IdxType = Type::getInt64Ty(T->getContext());
12243b8b2ea2SAmaury Sechet auto *Zero = ConstantInt::get(IdxType, 0);
12253b8b2ea2SAmaury Sechet
12263b8b2ea2SAmaury Sechet uint64_t Offset = 0;
12273b8b2ea2SAmaury Sechet for (uint64_t i = 0; i < NumElements; i++) {
12283b8b2ea2SAmaury Sechet Value *Indices[2] = {
12293b8b2ea2SAmaury Sechet Zero,
12303b8b2ea2SAmaury Sechet ConstantInt::get(IdxType, i),
12313b8b2ea2SAmaury Sechet };
1232bb4069e4SCraig Topper auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
12333b8b2ea2SAmaury Sechet AddrName);
1234bb4069e4SCraig Topper auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
123559f95222SGuillaume Chatelet auto EltAlign = commonAlignment(Align, Offset);
1236bb4069e4SCraig Topper Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
12370fc624f0SNikita Popov NS->setAAMetadata(SI.getAAMetadata());
12383b8b2ea2SAmaury Sechet Offset += EltSize;
12393b8b2ea2SAmaury Sechet }
12403b8b2ea2SAmaury Sechet
12413b8b2ea2SAmaury Sechet return true;
12427536460cSDavid Majnemer }
12437536460cSDavid Majnemer
1244b344ac9aSMehdi Amini return false;
1245b344ac9aSMehdi Amini }
1246b344ac9aSMehdi Amini
1247a65e2f73SChris Lattner /// equivalentAddressValues - Test if A and B will obviously have the same
1248a65e2f73SChris Lattner /// value. This includes recognizing that %t0 and %t1 will have the same
1249a65e2f73SChris Lattner /// value in code like this:
1250a65e2f73SChris Lattner /// %t0 = getelementptr \@a, 0, 3
1251a65e2f73SChris Lattner /// store i32 0, i32* %t0
1252a65e2f73SChris Lattner /// %t1 = getelementptr \@a, 0, 3
1253a65e2f73SChris Lattner /// %t2 = load i32* %t1
1254a65e2f73SChris Lattner ///
equivalentAddressValues(Value * A,Value * B)1255a65e2f73SChris Lattner static bool equivalentAddressValues(Value *A, Value *B) {
1256a65e2f73SChris Lattner // Test if the values are trivially equivalent.
1257a65e2f73SChris Lattner if (A == B) return true;
1258a65e2f73SChris Lattner
1259a65e2f73SChris Lattner // Test if the values come form identical arithmetic instructions.
1260a65e2f73SChris Lattner // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1261a65e2f73SChris Lattner // its only used to compare two uses within the same basic block, which
1262a65e2f73SChris Lattner // means that they'll always either have the same value or one of them
1263a65e2f73SChris Lattner // will have an undefined value.
1264a65e2f73SChris Lattner if (isa<BinaryOperator>(A) ||
1265a65e2f73SChris Lattner isa<CastInst>(A) ||
1266a65e2f73SChris Lattner isa<PHINode>(A) ||
1267a65e2f73SChris Lattner isa<GetElementPtrInst>(A))
1268a65e2f73SChris Lattner if (Instruction *BI = dyn_cast<Instruction>(B))
1269a65e2f73SChris Lattner if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1270a65e2f73SChris Lattner return true;
1271a65e2f73SChris Lattner
1272a65e2f73SChris Lattner // Otherwise they may not be equivalent.
1273a65e2f73SChris Lattner return false;
1274a65e2f73SChris Lattner }
1275a65e2f73SChris Lattner
1276ec95c6ccSAlexey Bataev /// Converts store (bitcast (load (bitcast (select ...)))) to
1277ec95c6ccSAlexey Bataev /// store (load (select ...)), where select is minmax:
1278ec95c6ccSAlexey Bataev /// select ((cmp load V1, load V2), V1, V2).
removeBitcastsFromLoadStoreOnMinMax(InstCombinerImpl & IC,StoreInst & SI)12792a6c8715SSebastian Neubauer static bool removeBitcastsFromLoadStoreOnMinMax(InstCombinerImpl &IC,
128083c15b13SAlexey Bataev StoreInst &SI) {
1281ec95c6ccSAlexey Bataev // bitcast?
128283c15b13SAlexey Bataev if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
1283fa0a76dbSAlexey Bataev return false;
1284ec95c6ccSAlexey Bataev // load? integer?
1285ec95c6ccSAlexey Bataev Value *LoadAddr;
1286ec95c6ccSAlexey Bataev if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
1287fa0a76dbSAlexey Bataev return false;
1288ec95c6ccSAlexey Bataev auto *LI = cast<LoadInst>(SI.getValueOperand());
1289ec95c6ccSAlexey Bataev if (!LI->getType()->isIntegerTy())
1290fa0a76dbSAlexey Bataev return false;
129102f644c5SCraig Topper Type *CmpLoadTy;
129202f644c5SCraig Topper if (!isMinMaxWithLoads(LoadAddr, CmpLoadTy))
129302f644c5SCraig Topper return false;
129402f644c5SCraig Topper
129523db9724SNikita Popov // Make sure the type would actually change.
129623db9724SNikita Popov // This condition can be hit with chains of bitcasts.
129723db9724SNikita Popov if (LI->getType() == CmpLoadTy)
129823db9724SNikita Popov return false;
129923db9724SNikita Popov
130002f644c5SCraig Topper // Make sure we're not changing the size of the load/store.
130102f644c5SCraig Topper const auto &DL = IC.getDataLayout();
130202f644c5SCraig Topper if (DL.getTypeStoreSizeInBits(LI->getType()) !=
130302f644c5SCraig Topper DL.getTypeStoreSizeInBits(CmpLoadTy))
1304fa0a76dbSAlexey Bataev return false;
1305ec95c6ccSAlexey Bataev
130683c15b13SAlexey Bataev if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
130783c15b13SAlexey Bataev auto *SI = dyn_cast<StoreInst>(U);
130883c15b13SAlexey Bataev return SI && SI->getPointerOperand() != LI &&
13092a6c8715SSebastian Neubauer InstCombiner::peekThroughBitcast(SI->getPointerOperand()) !=
13102a6c8715SSebastian Neubauer LoadAddr &&
131183c15b13SAlexey Bataev !SI->getPointerOperand()->isSwiftError();
131283c15b13SAlexey Bataev }))
131383c15b13SAlexey Bataev return false;
131483c15b13SAlexey Bataev
131583c15b13SAlexey Bataev IC.Builder.SetInsertPoint(LI);
1316b4dd928fSNikita Popov LoadInst *NewLI = IC.combineLoadToNewType(*LI, CmpLoadTy);
131783c15b13SAlexey Bataev // Replace all the stores with stores of the newly loaded value.
131883c15b13SAlexey Bataev for (auto *UI : LI->users()) {
131983c15b13SAlexey Bataev auto *USI = cast<StoreInst>(UI);
132083c15b13SAlexey Bataev IC.Builder.SetInsertPoint(USI);
132183c15b13SAlexey Bataev combineStoreToNewValue(IC, *USI, NewLI);
132283c15b13SAlexey Bataev }
1323ce192cedSJuneyoung Lee IC.replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
132483c15b13SAlexey Bataev IC.eraseInstFromFunction(*LI);
1325fa0a76dbSAlexey Bataev return true;
1326ec95c6ccSAlexey Bataev }
1327ec95c6ccSAlexey Bataev
visitStoreInst(StoreInst & SI)13282a6c8715SSebastian Neubauer Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
1329a65e2f73SChris Lattner Value *Val = SI.getOperand(0);
1330a65e2f73SChris Lattner Value *Ptr = SI.getOperand(1);
1331a65e2f73SChris Lattner
1332816d26feSChandler Carruth // Try to canonicalize the stored type.
1333816d26feSChandler Carruth if (combineStoreToValueType(*this, SI))
13344b198802SSanjay Patel return eraseInstFromFunction(SI);
1335816d26feSChandler Carruth
1336a65e2f73SChris Lattner // Attempt to improve the alignment.
133768b2e507SCraig Topper const Align KnownAlign = getOrEnforceKnownAlignment(
133868b2e507SCraig Topper Ptr, DL.getPrefTypeAlign(Val->getType()), DL, &SI, &AC, &DT);
1339604f4497SNikita Popov if (KnownAlign > SI.getAlign())
1340a65e2f73SChris Lattner SI.setAlignment(KnownAlign);
1341a65e2f73SChris Lattner
1342b344ac9aSMehdi Amini // Try to canonicalize the stored type.
1343b344ac9aSMehdi Amini if (unpackStoreToAggregate(*this, SI))
13444b198802SSanjay Patel return eraseInstFromFunction(SI);
1345b344ac9aSMehdi Amini
1346fa0a76dbSAlexey Bataev if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
1347fa0a76dbSAlexey Bataev return eraseInstFromFunction(SI);
1348ec95c6ccSAlexey Bataev
1349847e05f5SHal Finkel // Replace GEP indices if possible.
1350847e05f5SHal Finkel if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
1351e6c9ab4fSNikita Popov Worklist.push(NewGEPI);
1352847e05f5SHal Finkel return &SI;
1353847e05f5SHal Finkel }
1354847e05f5SHal Finkel
1355d7a6cc85SPhilip Reames // Don't hack volatile/ordered stores.
1356d7a6cc85SPhilip Reames // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1357d7a6cc85SPhilip Reames if (!SI.isUnordered()) return nullptr;
13588bc586e7SEli Friedman
13598bc586e7SEli Friedman // If the RHS is an alloca with a single use, zapify the store, making the
13608bc586e7SEli Friedman // alloca dead.
13618bc586e7SEli Friedman if (Ptr->hasOneUse()) {
13628bc586e7SEli Friedman if (isa<AllocaInst>(Ptr))
13634b198802SSanjay Patel return eraseInstFromFunction(SI);
13648bc586e7SEli Friedman if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
13658bc586e7SEli Friedman if (isa<AllocaInst>(GEP->getOperand(0))) {
13668bc586e7SEli Friedman if (GEP->getOperand(0)->hasOneUse())
13674b198802SSanjay Patel return eraseInstFromFunction(SI);
13688bc586e7SEli Friedman }
13698bc586e7SEli Friedman }
13708bc586e7SEli Friedman }
13718bc586e7SEli Friedman
1372d748689cSPhilip Reames // If we have a store to a location which is known constant, we can conclude
1373d748689cSPhilip Reames // that the store must be storing the constant value (else the memory
1374d748689cSPhilip Reames // wouldn't be constant), and this must be a noop.
1375d748689cSPhilip Reames if (AA->pointsToConstantMemory(Ptr))
1376d748689cSPhilip Reames return eraseInstFromFunction(SI);
1377d748689cSPhilip Reames
1378a65e2f73SChris Lattner // Do really simple DSE, to catch cases where there are several consecutive
1379a65e2f73SChris Lattner // stores to the same location, separated by a few arithmetic operations. This
1380a65e2f73SChris Lattner // situation often occurs with bitfield accesses.
13819f8aaf21SDuncan P. N. Exon Smith BasicBlock::iterator BBI(SI);
1382a65e2f73SChris Lattner for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1383a65e2f73SChris Lattner --ScanInsts) {
1384a65e2f73SChris Lattner --BBI;
13855f8c8c03SVictor Hernandez // Don't count debug info directives, lest they affect codegen,
13865f8c8c03SVictor Hernandez // and we skip pointer-to-pointer bitcasts, which are NOPs.
138730bb5be3SHongtao Yu if (BBI->isDebugOrPseudoInst() ||
138819d0b47bSDuncan Sands (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1389a65e2f73SChris Lattner ScanInsts++;
1390a65e2f73SChris Lattner continue;
1391a65e2f73SChris Lattner }
1392a65e2f73SChris Lattner
1393a65e2f73SChris Lattner if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1394a65e2f73SChris Lattner // Prev store isn't volatile, and stores to the same location?
13954a26abc0SArthur Eubanks if (PrevSI->isUnordered() &&
13964a26abc0SArthur Eubanks equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) &&
13974a26abc0SArthur Eubanks PrevSI->getValueOperand()->getType() ==
13984a26abc0SArthur Eubanks SI.getValueOperand()->getType()) {
1399a65e2f73SChris Lattner ++NumDeadStore;
1400522c030aSNikita Popov // Manually add back the original store to the worklist now, so it will
1401522c030aSNikita Popov // be processed after the operands of the removed store, as this may
1402522c030aSNikita Popov // expose additional DSE opportunities.
1403e6c9ab4fSNikita Popov Worklist.push(&SI);
14044b198802SSanjay Patel eraseInstFromFunction(*PrevSI);
1405522c030aSNikita Popov return nullptr;
1406a65e2f73SChris Lattner }
1407a65e2f73SChris Lattner break;
1408a65e2f73SChris Lattner }
1409a65e2f73SChris Lattner
1410a65e2f73SChris Lattner // If this is a load, we have to stop. However, if the loaded value is from
1411a65e2f73SChris Lattner // the pointer we're loading and is producing the pointer we're storing,
1412a65e2f73SChris Lattner // then *this* store is dead (X = load P; store X -> P).
1413a65e2f73SChris Lattner if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1414d7a6cc85SPhilip Reames if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1415d7a6cc85SPhilip Reames assert(SI.isUnordered() && "can't eliminate ordering operation");
14164b198802SSanjay Patel return eraseInstFromFunction(SI);
1417d7a6cc85SPhilip Reames }
1418a65e2f73SChris Lattner
1419a65e2f73SChris Lattner // Otherwise, this is a load from some other location. Stores before it
1420a65e2f73SChris Lattner // may not be dead.
1421a65e2f73SChris Lattner break;
1422a65e2f73SChris Lattner }
1423a65e2f73SChris Lattner
1424679bc32cSSanjoy Das // Don't skip over loads, throws or things that can modify memory.
1425679bc32cSSanjoy Das if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1426a65e2f73SChris Lattner break;
1427a65e2f73SChris Lattner }
1428a65e2f73SChris Lattner
1429a65e2f73SChris Lattner // store X, null -> turns into 'unreachable' in SimplifyCFG
14302dd9835fSAnna Thomas // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
14312dd9835fSAnna Thomas if (canSimplifyNullStoreOrGEP(SI)) {
1432ce192cedSJuneyoung Lee if (!isa<PoisonValue>(Val))
1433ce192cedSJuneyoung Lee return replaceOperand(SI, 0, PoisonValue::get(Val->getType()));
1434f40110f4SCraig Topper return nullptr; // Do not modify these!
1435a65e2f73SChris Lattner }
1436a65e2f73SChris Lattner
1437a65e2f73SChris Lattner // store undef, Ptr -> noop
14381881711fSNikita Popov // FIXME: This is technically incorrect because it might overwrite a poison
14391881711fSNikita Popov // value. Change to PoisonValue once #52930 is resolved.
1440a65e2f73SChris Lattner if (isa<UndefValue>(Val))
14414b198802SSanjay Patel return eraseInstFromFunction(SI);
1442a65e2f73SChris Lattner
1443f40110f4SCraig Topper return nullptr;
1444a65e2f73SChris Lattner }
1445a65e2f73SChris Lattner
14464a12aa97SSanjay Patel /// Try to transform:
1447a65e2f73SChris Lattner /// if () { *P = v1; } else { *P = v2 }
14484a12aa97SSanjay Patel /// or:
1449a65e2f73SChris Lattner /// *P = v1; if () { *P = v2; }
1450a65e2f73SChris Lattner /// into a phi node with a store in the successor.
mergeStoreIntoSuccessor(StoreInst & SI)14512a6c8715SSebastian Neubauer bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
1452e2b75cafSRoman Lebedev if (!SI.isUnordered())
1453e2b75cafSRoman Lebedev return false; // This code has not been audited for volatile/ordered case.
14545f0e3694SPhilip Reames
14554a12aa97SSanjay Patel // Check if the successor block has exactly 2 incoming edges.
1456a65e2f73SChris Lattner BasicBlock *StoreBB = SI.getParent();
1457a65e2f73SChris Lattner BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
14584de31bbaSVedant Kumar if (!DestBB->hasNPredecessors(2))
1459a65e2f73SChris Lattner return false;
1460a65e2f73SChris Lattner
14614a12aa97SSanjay Patel // Capture the other block (the block that doesn't contain our store).
14624a12aa97SSanjay Patel pred_iterator PredIter = pred_begin(DestBB);
14634a12aa97SSanjay Patel if (*PredIter == StoreBB)
14644a12aa97SSanjay Patel ++PredIter;
14654a12aa97SSanjay Patel BasicBlock *OtherBB = *PredIter;
1466a65e2f73SChris Lattner
14674a12aa97SSanjay Patel // Bail out if all of the relevant blocks aren't distinct. This can happen,
14684a12aa97SSanjay Patel // for example, if SI is in an infinite loop.
1469a65e2f73SChris Lattner if (StoreBB == DestBB || OtherBB == DestBB)
1470a65e2f73SChris Lattner return false;
1471a65e2f73SChris Lattner
1472a65e2f73SChris Lattner // Verify that the other block ends in a branch and is not otherwise empty.
14739f8aaf21SDuncan P. N. Exon Smith BasicBlock::iterator BBI(OtherBB->getTerminator());
1474a65e2f73SChris Lattner BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1475a65e2f73SChris Lattner if (!OtherBr || BBI == OtherBB->begin())
1476a65e2f73SChris Lattner return false;
1477a65e2f73SChris Lattner
1478a65e2f73SChris Lattner // If the other block ends in an unconditional branch, check for the 'if then
14794a12aa97SSanjay Patel // else' case. There is an instruction before the branch.
1480f40110f4SCraig Topper StoreInst *OtherStore = nullptr;
1481a65e2f73SChris Lattner if (OtherBr->isUnconditional()) {
1482a65e2f73SChris Lattner --BBI;
1483098a0d8fSHongtao Yu // Skip over debugging info and pseudo probes.
1484098a0d8fSHongtao Yu while (BBI->isDebugOrPseudoInst() ||
148519d0b47bSDuncan Sands (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1486a65e2f73SChris Lattner if (BBI==OtherBB->begin())
1487a65e2f73SChris Lattner return false;
1488a65e2f73SChris Lattner --BBI;
1489a65e2f73SChris Lattner }
14908bc586e7SEli Friedman // If this isn't a store, isn't a store to the same location, or is not the
14918bc586e7SEli Friedman // right kind of store, bail out.
1492a65e2f73SChris Lattner OtherStore = dyn_cast<StoreInst>(BBI);
1493a65e2f73SChris Lattner if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
14948bc586e7SEli Friedman !SI.isSameOperationAs(OtherStore))
1495a65e2f73SChris Lattner return false;
1496a65e2f73SChris Lattner } else {
1497a65e2f73SChris Lattner // Otherwise, the other block ended with a conditional branch. If one of the
1498a65e2f73SChris Lattner // destinations is StoreBB, then we have the if/then case.
1499a65e2f73SChris Lattner if (OtherBr->getSuccessor(0) != StoreBB &&
1500a65e2f73SChris Lattner OtherBr->getSuccessor(1) != StoreBB)
1501a65e2f73SChris Lattner return false;
1502a65e2f73SChris Lattner
1503a65e2f73SChris Lattner // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1504a65e2f73SChris Lattner // if/then triangle. See if there is a store to the same ptr as SI that
1505a65e2f73SChris Lattner // lives in OtherBB.
1506a65e2f73SChris Lattner for (;; --BBI) {
1507a65e2f73SChris Lattner // Check to see if we find the matching store.
1508a65e2f73SChris Lattner if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
1509a65e2f73SChris Lattner if (OtherStore->getOperand(1) != SI.getOperand(1) ||
15108bc586e7SEli Friedman !SI.isSameOperationAs(OtherStore))
1511a65e2f73SChris Lattner return false;
1512a65e2f73SChris Lattner break;
1513a65e2f73SChris Lattner }
1514a65e2f73SChris Lattner // If we find something that may be using or overwriting the stored
15154a12aa97SSanjay Patel // value, or if we run out of instructions, we can't do the transform.
1516679bc32cSSanjoy Das if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1517679bc32cSSanjoy Das BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1518a65e2f73SChris Lattner return false;
1519a65e2f73SChris Lattner }
1520a65e2f73SChris Lattner
15214a12aa97SSanjay Patel // In order to eliminate the store in OtherBr, we have to make sure nothing
15224a12aa97SSanjay Patel // reads or overwrites the stored value in StoreBB.
1523a65e2f73SChris Lattner for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1524a65e2f73SChris Lattner // FIXME: This should really be AA driven.
1525679bc32cSSanjoy Das if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1526a65e2f73SChris Lattner return false;
1527a65e2f73SChris Lattner }
1528a65e2f73SChris Lattner }
1529a65e2f73SChris Lattner
1530a65e2f73SChris Lattner // Insert a PHI node now if we need it.
1531a65e2f73SChris Lattner Value *MergedVal = OtherStore->getOperand(0);
1532238533ecSVedant Kumar // The debug locations of the original instructions might differ. Merge them.
1533238533ecSVedant Kumar DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
1534238533ecSVedant Kumar OtherStore->getDebugLoc());
1535a65e2f73SChris Lattner if (MergedVal != SI.getOperand(0)) {
153652131344SJay Foad PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
1537a65e2f73SChris Lattner PN->addIncoming(SI.getOperand(0), SI.getParent());
1538a65e2f73SChris Lattner PN->addIncoming(OtherStore->getOperand(0), OtherBB);
1539a65e2f73SChris Lattner MergedVal = InsertNewInstBefore(PN, DestBB->front());
1540238533ecSVedant Kumar PN->setDebugLoc(MergedLoc);
1541a65e2f73SChris Lattner }
1542a65e2f73SChris Lattner
15434a12aa97SSanjay Patel // Advance to a place where it is safe to insert the new store and insert it.
15448ddfc09eSBill Wendling BBI = DestBB->getFirstInsertionPt();
154511aa3707SEli Friedman StoreInst *NewSI =
154611aa3707SEli Friedman new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
15474a12aa97SSanjay Patel SI.getOrdering(), SI.getSyncScopeID());
154835211c60SEli Friedman InsertNewInstBefore(NewSI, *BBI);
1549238533ecSVedant Kumar NewSI->setDebugLoc(MergedLoc);
1550a65e2f73SChris Lattner
1551cc39b675SHal Finkel // If the two stores had AA tags, merge them.
15520fc624f0SNikita Popov AAMDNodes AATags = SI.getAAMetadata();
15530fc624f0SNikita Popov if (AATags)
15540fc624f0SNikita Popov NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
1555eeefe1bcSChris Lattner
1556a65e2f73SChris Lattner // Nuke the old stores.
15574b198802SSanjay Patel eraseInstFromFunction(SI);
15584b198802SSanjay Patel eraseInstFromFunction(*OtherStore);
1559a65e2f73SChris Lattner return true;
1560a65e2f73SChris Lattner }
1561