1 //===- InstCombineCalls.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visitCall, visitInvoke, and visitCallBr functions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLFunctionalExtras.h"
21 #include "llvm/ADT/SmallBitVector.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/Analysis/AssumeBundleQueries.h"
26 #include "llvm/Analysis/AssumptionCache.h"
27 #include "llvm/Analysis/InstructionSimplify.h"
28 #include "llvm/Analysis/Loads.h"
29 #include "llvm/Analysis/MemoryBuiltins.h"
30 #include "llvm/Analysis/ValueTracking.h"
31 #include "llvm/Analysis/VectorUtils.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/Constant.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/GlobalVariable.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/IR/InstrTypes.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/IntrinsicsAArch64.h"
47 #include "llvm/IR/IntrinsicsAMDGPU.h"
48 #include "llvm/IR/IntrinsicsARM.h"
49 #include "llvm/IR/IntrinsicsHexagon.h"
50 #include "llvm/IR/LLVMContext.h"
51 #include "llvm/IR/Metadata.h"
52 #include "llvm/IR/PatternMatch.h"
53 #include "llvm/IR/Statepoint.h"
54 #include "llvm/IR/Type.h"
55 #include "llvm/IR/User.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/IR/ValueHandle.h"
58 #include "llvm/Support/AtomicOrdering.h"
59 #include "llvm/Support/Casting.h"
60 #include "llvm/Support/CommandLine.h"
61 #include "llvm/Support/Compiler.h"
62 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/ErrorHandling.h"
64 #include "llvm/Support/KnownBits.h"
65 #include "llvm/Support/MathExtras.h"
66 #include "llvm/Support/raw_ostream.h"
67 #include "llvm/Transforms/InstCombine/InstCombiner.h"
68 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
69 #include "llvm/Transforms/Utils/Local.h"
70 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
71 #include <algorithm>
72 #include <cassert>
73 #include <cstdint>
74 #include <utility>
75 #include <vector>
76 
77 #define DEBUG_TYPE "instcombine"
78 #include "llvm/Transforms/Utils/InstructionWorklist.h"
79 
80 using namespace llvm;
81 using namespace PatternMatch;
82 
83 STATISTIC(NumSimplified, "Number of library calls simplified");
84 
85 static cl::opt<unsigned> GuardWideningWindow(
86     "instcombine-guard-widening-window",
87     cl::init(3),
88     cl::desc("How wide an instruction window to bypass looking for "
89              "another guard"));
90 
91 namespace llvm {
92 /// enable preservation of attributes in assume like:
93 /// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
94 extern cl::opt<bool> EnableKnowledgeRetention;
95 } // namespace llvm
96 
97 /// Return the specified type promoted as it would be to pass though a va_arg
98 /// area.
99 static Type *getPromotedType(Type *Ty) {
100   if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
101     if (ITy->getBitWidth() < 32)
102       return Type::getInt32Ty(Ty->getContext());
103   }
104   return Ty;
105 }
106 
107 Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
108   Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
109   MaybeAlign CopyDstAlign = MI->getDestAlign();
110   if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
111     MI->setDestAlignment(DstAlign);
112     return MI;
113   }
114 
115   Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
116   MaybeAlign CopySrcAlign = MI->getSourceAlign();
117   if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
118     MI->setSourceAlignment(SrcAlign);
119     return MI;
120   }
121 
122   // If we have a store to a location which is known constant, we can conclude
123   // that the store must be storing the constant value (else the memory
124   // wouldn't be constant), and this must be a noop.
125   if (AA->pointsToConstantMemory(MI->getDest())) {
126     // Set the size of the copy to 0, it will be deleted on the next iteration.
127     MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
128     return MI;
129   }
130 
131   // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
132   // load/store.
133   ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
134   if (!MemOpLength) return nullptr;
135 
136   // Source and destination pointer types are always "i8*" for intrinsic.  See
137   // if the size is something we can handle with a single primitive load/store.
138   // A single load+store correctly handles overlapping memory in the memmove
139   // case.
140   uint64_t Size = MemOpLength->getLimitedValue();
141   assert(Size && "0-sized memory transferring should be removed already.");
142 
143   if (Size > 8 || (Size&(Size-1)))
144     return nullptr;  // If not 1/2/4/8 bytes, exit.
145 
146   // If it is an atomic and alignment is less than the size then we will
147   // introduce the unaligned memory access which will be later transformed
148   // into libcall in CodeGen. This is not evident performance gain so disable
149   // it now.
150   if (isa<AtomicMemTransferInst>(MI))
151     if (*CopyDstAlign < Size || *CopySrcAlign < Size)
152       return nullptr;
153 
154   // Use an integer load+store unless we can find something better.
155   unsigned SrcAddrSp =
156     cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
157   unsigned DstAddrSp =
158     cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
159 
160   IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
161   Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
162   Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
163 
164   // If the memcpy has metadata describing the members, see if we can get the
165   // TBAA tag describing our copy.
166   MDNode *CopyMD = nullptr;
167   if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa)) {
168     CopyMD = M;
169   } else if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
170     if (M->getNumOperands() == 3 && M->getOperand(0) &&
171         mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
172         mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
173         M->getOperand(1) &&
174         mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
175         mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
176         Size &&
177         M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
178       CopyMD = cast<MDNode>(M->getOperand(2));
179   }
180 
181   Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
182   Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
183   LoadInst *L = Builder.CreateLoad(IntType, Src);
184   // Alignment from the mem intrinsic will be better, so use it.
185   L->setAlignment(*CopySrcAlign);
186   if (CopyMD)
187     L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
188   MDNode *LoopMemParallelMD =
189     MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
190   if (LoopMemParallelMD)
191     L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
192   MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
193   if (AccessGroupMD)
194     L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
195 
196   StoreInst *S = Builder.CreateStore(L, Dest);
197   // Alignment from the mem intrinsic will be better, so use it.
198   S->setAlignment(*CopyDstAlign);
199   if (CopyMD)
200     S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
201   if (LoopMemParallelMD)
202     S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
203   if (AccessGroupMD)
204     S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
205 
206   if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
207     // non-atomics can be volatile
208     L->setVolatile(MT->isVolatile());
209     S->setVolatile(MT->isVolatile());
210   }
211   if (isa<AtomicMemTransferInst>(MI)) {
212     // atomics have to be unordered
213     L->setOrdering(AtomicOrdering::Unordered);
214     S->setOrdering(AtomicOrdering::Unordered);
215   }
216 
217   // Set the size of the copy to 0, it will be deleted on the next iteration.
218   MI->setLength(Constant::getNullValue(MemOpLength->getType()));
219   return MI;
220 }
221 
222 Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
223   const Align KnownAlignment =
224       getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
225   MaybeAlign MemSetAlign = MI->getDestAlign();
226   if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
227     MI->setDestAlignment(KnownAlignment);
228     return MI;
229   }
230 
231   // If we have a store to a location which is known constant, we can conclude
232   // that the store must be storing the constant value (else the memory
233   // wouldn't be constant), and this must be a noop.
234   if (AA->pointsToConstantMemory(MI->getDest())) {
235     // Set the size of the copy to 0, it will be deleted on the next iteration.
236     MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
237     return MI;
238   }
239 
240   // Extract the length and alignment and fill if they are constant.
241   ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
242   ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
243   if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
244     return nullptr;
245   const uint64_t Len = LenC->getLimitedValue();
246   assert(Len && "0-sized memory setting should be removed already.");
247   const Align Alignment = assumeAligned(MI->getDestAlignment());
248 
249   // If it is an atomic and alignment is less than the size then we will
250   // introduce the unaligned memory access which will be later transformed
251   // into libcall in CodeGen. This is not evident performance gain so disable
252   // it now.
253   if (isa<AtomicMemSetInst>(MI))
254     if (Alignment < Len)
255       return nullptr;
256 
257   // memset(s,c,n) -> store s, c (for n=1,2,4,8)
258   if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
259     Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
260 
261     Value *Dest = MI->getDest();
262     unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
263     Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
264     Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
265 
266     // Extract the fill value and store.
267     uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
268     StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
269                                        MI->isVolatile());
270     S->setAlignment(Alignment);
271     if (isa<AtomicMemSetInst>(MI))
272       S->setOrdering(AtomicOrdering::Unordered);
273 
274     // Set the size of the copy to 0, it will be deleted on the next iteration.
275     MI->setLength(Constant::getNullValue(LenC->getType()));
276     return MI;
277   }
278 
279   return nullptr;
280 }
281 
282 // TODO, Obvious Missing Transforms:
283 // * Narrow width by halfs excluding zero/undef lanes
284 Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
285   Value *LoadPtr = II.getArgOperand(0);
286   const Align Alignment =
287       cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
288 
289   // If the mask is all ones or undefs, this is a plain vector load of the 1st
290   // argument.
291   if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
292     LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
293                                             "unmaskedload");
294     L->copyMetadata(II);
295     return L;
296   }
297 
298   // If we can unconditionally load from this address, replace with a
299   // load/select idiom. TODO: use DT for context sensitive query
300   if (isDereferenceablePointer(LoadPtr, II.getType(),
301                                II.getModule()->getDataLayout(), &II, nullptr)) {
302     LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
303                                              "unmaskedload");
304     LI->copyMetadata(II);
305     return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
306   }
307 
308   return nullptr;
309 }
310 
311 // TODO, Obvious Missing Transforms:
312 // * Single constant active lane -> store
313 // * Narrow width by halfs excluding zero/undef lanes
314 Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
315   auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
316   if (!ConstMask)
317     return nullptr;
318 
319   // If the mask is all zeros, this instruction does nothing.
320   if (ConstMask->isNullValue())
321     return eraseInstFromFunction(II);
322 
323   // If the mask is all ones, this is a plain vector store of the 1st argument.
324   if (ConstMask->isAllOnesValue()) {
325     Value *StorePtr = II.getArgOperand(1);
326     Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
327     StoreInst *S =
328         new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
329     S->copyMetadata(II);
330     return S;
331   }
332 
333   if (isa<ScalableVectorType>(ConstMask->getType()))
334     return nullptr;
335 
336   // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
337   APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
338   APInt UndefElts(DemandedElts.getBitWidth(), 0);
339   if (Value *V =
340           SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
341     return replaceOperand(II, 0, V);
342 
343   return nullptr;
344 }
345 
346 // TODO, Obvious Missing Transforms:
347 // * Single constant active lane load -> load
348 // * Dereferenceable address & few lanes -> scalarize speculative load/selects
349 // * Adjacent vector addresses -> masked.load
350 // * Narrow width by halfs excluding zero/undef lanes
351 // * Vector incrementing address -> vector masked load
352 Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
353   auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
354   if (!ConstMask)
355     return nullptr;
356 
357   // Vector splat address w/known mask -> scalar load
358   // Fold the gather to load the source vector first lane
359   // because it is reloading the same value each time
360   if (ConstMask->isAllOnesValue())
361     if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
362       auto *VecTy = cast<VectorType>(II.getType());
363       const Align Alignment =
364           cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
365       LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
366                                               Alignment, "load.scalar");
367       Value *Shuf =
368           Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
369       return replaceInstUsesWith(II, cast<Instruction>(Shuf));
370     }
371 
372   return nullptr;
373 }
374 
375 // TODO, Obvious Missing Transforms:
376 // * Single constant active lane -> store
377 // * Adjacent vector addresses -> masked.store
378 // * Narrow store width by halfs excluding zero/undef lanes
379 // * Vector incrementing address -> vector masked store
380 Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
381   auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
382   if (!ConstMask)
383     return nullptr;
384 
385   // If the mask is all zeros, a scatter does nothing.
386   if (ConstMask->isNullValue())
387     return eraseInstFromFunction(II);
388 
389   // Vector splat address -> scalar store
390   if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
391     // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
392     if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
393       Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
394       StoreInst *S =
395           new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, Alignment);
396       S->copyMetadata(II);
397       return S;
398     }
399     // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
400     // lastlane), ptr
401     if (ConstMask->isAllOnesValue()) {
402       Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
403       VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
404       ElementCount VF = WideLoadTy->getElementCount();
405       Constant *EC =
406           ConstantInt::get(Builder.getInt32Ty(), VF.getKnownMinValue());
407       Value *RunTimeVF = VF.isScalable() ? Builder.CreateVScale(EC) : EC;
408       Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
409       Value *Extract =
410           Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
411       StoreInst *S =
412           new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
413       S->copyMetadata(II);
414       return S;
415     }
416   }
417   if (isa<ScalableVectorType>(ConstMask->getType()))
418     return nullptr;
419 
420   // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
421   APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
422   APInt UndefElts(DemandedElts.getBitWidth(), 0);
423   if (Value *V =
424           SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
425     return replaceOperand(II, 0, V);
426   if (Value *V =
427           SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts))
428     return replaceOperand(II, 1, V);
429 
430   return nullptr;
431 }
432 
433 /// This function transforms launder.invariant.group and strip.invariant.group
434 /// like:
435 /// launder(launder(%x)) -> launder(%x)       (the result is not the argument)
436 /// launder(strip(%x)) -> launder(%x)
437 /// strip(strip(%x)) -> strip(%x)             (the result is not the argument)
438 /// strip(launder(%x)) -> strip(%x)
439 /// This is legal because it preserves the most recent information about
440 /// the presence or absence of invariant.group.
441 static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
442                                                     InstCombinerImpl &IC) {
443   auto *Arg = II.getArgOperand(0);
444   auto *StrippedArg = Arg->stripPointerCasts();
445   auto *StrippedInvariantGroupsArg = StrippedArg;
446   while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
447     if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
448         Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
449       break;
450     StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
451   }
452   if (StrippedArg == StrippedInvariantGroupsArg)
453     return nullptr; // No launders/strips to remove.
454 
455   Value *Result = nullptr;
456 
457   if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
458     Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
459   else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
460     Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
461   else
462     llvm_unreachable(
463         "simplifyInvariantGroupIntrinsic only handles launder and strip");
464   if (Result->getType()->getPointerAddressSpace() !=
465       II.getType()->getPointerAddressSpace())
466     Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
467   if (Result->getType() != II.getType())
468     Result = IC.Builder.CreateBitCast(Result, II.getType());
469 
470   return cast<Instruction>(Result);
471 }
472 
473 static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
474   assert((II.getIntrinsicID() == Intrinsic::cttz ||
475           II.getIntrinsicID() == Intrinsic::ctlz) &&
476          "Expected cttz or ctlz intrinsic");
477   bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
478   Value *Op0 = II.getArgOperand(0);
479   Value *Op1 = II.getArgOperand(1);
480   Value *X;
481   // ctlz(bitreverse(x)) -> cttz(x)
482   // cttz(bitreverse(x)) -> ctlz(x)
483   if (match(Op0, m_BitReverse(m_Value(X)))) {
484     Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
485     Function *F = Intrinsic::getDeclaration(II.getModule(), ID, II.getType());
486     return CallInst::Create(F, {X, II.getArgOperand(1)});
487   }
488 
489   if (II.getType()->isIntOrIntVectorTy(1)) {
490     // ctlz/cttz i1 Op0 --> not Op0
491     if (match(Op1, m_Zero()))
492       return BinaryOperator::CreateNot(Op0);
493     // If zero is poison, then the input can be assumed to be "true", so the
494     // instruction simplifies to "false".
495     assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
496     return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
497   }
498 
499   // If the operand is a select with constant arm(s), try to hoist ctlz/cttz.
500   if (auto *Sel = dyn_cast<SelectInst>(Op0))
501     if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
502       return R;
503 
504   if (IsTZ) {
505     // cttz(-x) -> cttz(x)
506     if (match(Op0, m_Neg(m_Value(X))))
507       return IC.replaceOperand(II, 0, X);
508 
509     // cttz(sext(x)) -> cttz(zext(x))
510     if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
511       auto *Zext = IC.Builder.CreateZExt(X, II.getType());
512       auto *CttzZext =
513           IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
514       return IC.replaceInstUsesWith(II, CttzZext);
515     }
516 
517     // Zext doesn't change the number of trailing zeros, so narrow:
518     // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
519     if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
520       auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
521                                                     IC.Builder.getTrue());
522       auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
523       return IC.replaceInstUsesWith(II, ZextCttz);
524     }
525 
526     // cttz(abs(x)) -> cttz(x)
527     // cttz(nabs(x)) -> cttz(x)
528     Value *Y;
529     SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor;
530     if (SPF == SPF_ABS || SPF == SPF_NABS)
531       return IC.replaceOperand(II, 0, X);
532 
533     if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
534       return IC.replaceOperand(II, 0, X);
535   }
536 
537   KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
538 
539   // Create a mask for bits above (ctlz) or below (cttz) the first known one.
540   unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
541                                 : Known.countMaxLeadingZeros();
542   unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
543                                 : Known.countMinLeadingZeros();
544 
545   // If all bits above (ctlz) or below (cttz) the first known one are known
546   // zero, this value is constant.
547   // FIXME: This should be in InstSimplify because we're replacing an
548   // instruction with a constant.
549   if (PossibleZeros == DefiniteZeros) {
550     auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
551     return IC.replaceInstUsesWith(II, C);
552   }
553 
554   // If the input to cttz/ctlz is known to be non-zero,
555   // then change the 'ZeroIsPoison' parameter to 'true'
556   // because we know the zero behavior can't affect the result.
557   if (!Known.One.isZero() ||
558       isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
559                      &IC.getDominatorTree())) {
560     if (!match(II.getArgOperand(1), m_One()))
561       return IC.replaceOperand(II, 1, IC.Builder.getTrue());
562   }
563 
564   // Add range metadata since known bits can't completely reflect what we know.
565   // TODO: Handle splat vectors.
566   auto *IT = dyn_cast<IntegerType>(Op0->getType());
567   if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
568     Metadata *LowAndHigh[] = {
569         ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
570         ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
571     II.setMetadata(LLVMContext::MD_range,
572                    MDNode::get(II.getContext(), LowAndHigh));
573     return &II;
574   }
575 
576   return nullptr;
577 }
578 
579 static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
580   assert(II.getIntrinsicID() == Intrinsic::ctpop &&
581          "Expected ctpop intrinsic");
582   Type *Ty = II.getType();
583   unsigned BitWidth = Ty->getScalarSizeInBits();
584   Value *Op0 = II.getArgOperand(0);
585   Value *X, *Y;
586 
587   // ctpop(bitreverse(x)) -> ctpop(x)
588   // ctpop(bswap(x)) -> ctpop(x)
589   if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
590     return IC.replaceOperand(II, 0, X);
591 
592   // ctpop(rot(x)) -> ctpop(x)
593   if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
594        match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
595       X == Y)
596     return IC.replaceOperand(II, 0, X);
597 
598   // ctpop(x | -x) -> bitwidth - cttz(x, false)
599   if (Op0->hasOneUse() &&
600       match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
601     Function *F =
602         Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
603     auto *Cttz = IC.Builder.CreateCall(F, {X, IC.Builder.getFalse()});
604     auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
605     return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
606   }
607 
608   // ctpop(~x & (x - 1)) -> cttz(x, false)
609   if (match(Op0,
610             m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) {
611     Function *F =
612         Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
613     return CallInst::Create(F, {X, IC.Builder.getFalse()});
614   }
615 
616   // Zext doesn't change the number of set bits, so narrow:
617   // ctpop (zext X) --> zext (ctpop X)
618   if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
619     Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
620     return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
621   }
622 
623   // If the operand is a select with constant arm(s), try to hoist ctpop.
624   if (auto *Sel = dyn_cast<SelectInst>(Op0))
625     if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
626       return R;
627 
628   KnownBits Known(BitWidth);
629   IC.computeKnownBits(Op0, Known, 0, &II);
630 
631   // If all bits are zero except for exactly one fixed bit, then the result
632   // must be 0 or 1, and we can get that answer by shifting to LSB:
633   // ctpop (X & 32) --> (X & 32) >> 5
634   if ((~Known.Zero).isPowerOf2())
635     return BinaryOperator::CreateLShr(
636         Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
637 
638   // FIXME: Try to simplify vectors of integers.
639   auto *IT = dyn_cast<IntegerType>(Ty);
640   if (!IT)
641     return nullptr;
642 
643   // Add range metadata since known bits can't completely reflect what we know.
644   unsigned MinCount = Known.countMinPopulation();
645   unsigned MaxCount = Known.countMaxPopulation();
646   if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
647     Metadata *LowAndHigh[] = {
648         ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)),
649         ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
650     II.setMetadata(LLVMContext::MD_range,
651                    MDNode::get(II.getContext(), LowAndHigh));
652     return &II;
653   }
654 
655   return nullptr;
656 }
657 
658 /// Convert a table lookup to shufflevector if the mask is constant.
659 /// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
660 /// which case we could lower the shufflevector with rev64 instructions
661 /// as it's actually a byte reverse.
662 static Value *simplifyNeonTbl1(const IntrinsicInst &II,
663                                InstCombiner::BuilderTy &Builder) {
664   // Bail out if the mask is not a constant.
665   auto *C = dyn_cast<Constant>(II.getArgOperand(1));
666   if (!C)
667     return nullptr;
668 
669   auto *VecTy = cast<FixedVectorType>(II.getType());
670   unsigned NumElts = VecTy->getNumElements();
671 
672   // Only perform this transformation for <8 x i8> vector types.
673   if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
674     return nullptr;
675 
676   int Indexes[8];
677 
678   for (unsigned I = 0; I < NumElts; ++I) {
679     Constant *COp = C->getAggregateElement(I);
680 
681     if (!COp || !isa<ConstantInt>(COp))
682       return nullptr;
683 
684     Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
685 
686     // Make sure the mask indices are in range.
687     if ((unsigned)Indexes[I] >= NumElts)
688       return nullptr;
689   }
690 
691   auto *V1 = II.getArgOperand(0);
692   auto *V2 = Constant::getNullValue(V1->getType());
693   return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes));
694 }
695 
696 // Returns true iff the 2 intrinsics have the same operands, limiting the
697 // comparison to the first NumOperands.
698 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
699                              unsigned NumOperands) {
700   assert(I.arg_size() >= NumOperands && "Not enough operands");
701   assert(E.arg_size() >= NumOperands && "Not enough operands");
702   for (unsigned i = 0; i < NumOperands; i++)
703     if (I.getArgOperand(i) != E.getArgOperand(i))
704       return false;
705   return true;
706 }
707 
708 // Remove trivially empty start/end intrinsic ranges, i.e. a start
709 // immediately followed by an end (ignoring debuginfo or other
710 // start/end intrinsics in between). As this handles only the most trivial
711 // cases, tracking the nesting level is not needed:
712 //
713 //   call @llvm.foo.start(i1 0)
714 //   call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
715 //   call @llvm.foo.end(i1 0)
716 //   call @llvm.foo.end(i1 0) ; &I
717 static bool
718 removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
719                           std::function<bool(const IntrinsicInst &)> IsStart) {
720   // We start from the end intrinsic and scan backwards, so that InstCombine
721   // has already processed (and potentially removed) all the instructions
722   // before the end intrinsic.
723   BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
724   for (; BI != BE; ++BI) {
725     if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
726       if (I->isDebugOrPseudoInst() ||
727           I->getIntrinsicID() == EndI.getIntrinsicID())
728         continue;
729       if (IsStart(*I)) {
730         if (haveSameOperands(EndI, *I, EndI.arg_size())) {
731           IC.eraseInstFromFunction(*I);
732           IC.eraseInstFromFunction(EndI);
733           return true;
734         }
735         // Skip start intrinsics that don't pair with this end intrinsic.
736         continue;
737       }
738     }
739     break;
740   }
741 
742   return false;
743 }
744 
745 Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
746   removeTriviallyEmptyRange(I, *this, [](const IntrinsicInst &I) {
747     return I.getIntrinsicID() == Intrinsic::vastart ||
748            I.getIntrinsicID() == Intrinsic::vacopy;
749   });
750   return nullptr;
751 }
752 
753 static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
754   assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
755   Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
756   if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
757     Call.setArgOperand(0, Arg1);
758     Call.setArgOperand(1, Arg0);
759     return &Call;
760   }
761   return nullptr;
762 }
763 
764 /// Creates a result tuple for an overflow intrinsic \p II with a given
765 /// \p Result and a constant \p Overflow value.
766 static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result,
767                                         Constant *Overflow) {
768   Constant *V[] = {UndefValue::get(Result->getType()), Overflow};
769   StructType *ST = cast<StructType>(II->getType());
770   Constant *Struct = ConstantStruct::get(ST, V);
771   return InsertValueInst::Create(Struct, Result, 0);
772 }
773 
774 Instruction *
775 InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
776   WithOverflowInst *WO = cast<WithOverflowInst>(II);
777   Value *OperationResult = nullptr;
778   Constant *OverflowResult = nullptr;
779   if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
780                             WO->getRHS(), *WO, OperationResult, OverflowResult))
781     return createOverflowTuple(WO, OperationResult, OverflowResult);
782   return nullptr;
783 }
784 
785 static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
786                                    const DataLayout &DL, AssumptionCache *AC,
787                                    DominatorTree *DT) {
788   KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT);
789   if (Known.isNonNegative())
790     return false;
791   if (Known.isNegative())
792     return true;
793 
794   return isImpliedByDomCondition(
795       ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
796 }
797 
798 /// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
799 /// can trigger other combines.
800 static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
801                                        InstCombiner::BuilderTy &Builder) {
802   Intrinsic::ID MinMaxID = II->getIntrinsicID();
803   assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
804           MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
805          "Expected a min or max intrinsic");
806 
807   // TODO: Match vectors with undef elements, but undef may not propagate.
808   Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
809   Value *X;
810   const APInt *C0, *C1;
811   if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
812       !match(Op1, m_APInt(C1)))
813     return nullptr;
814 
815   // Check for necessary no-wrap and overflow constraints.
816   bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
817   auto *Add = cast<BinaryOperator>(Op0);
818   if ((IsSigned && !Add->hasNoSignedWrap()) ||
819       (!IsSigned && !Add->hasNoUnsignedWrap()))
820     return nullptr;
821 
822   // If the constant difference overflows, then instsimplify should reduce the
823   // min/max to the add or C1.
824   bool Overflow;
825   APInt CDiff =
826       IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
827   assert(!Overflow && "Expected simplify of min/max");
828 
829   // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
830   // Note: the "mismatched" no-overflow setting does not propagate.
831   Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
832   Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
833   return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
834                   : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
835 }
836 /// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
837 Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
838   Type *Ty = MinMax1.getType();
839 
840   // We are looking for a tree of:
841   // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
842   // Where the min and max could be reversed
843   Instruction *MinMax2;
844   BinaryOperator *AddSub;
845   const APInt *MinValue, *MaxValue;
846   if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
847     if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
848       return nullptr;
849   } else if (match(&MinMax1,
850                    m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
851     if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
852       return nullptr;
853   } else
854     return nullptr;
855 
856   // Check that the constants clamp a saturate, and that the new type would be
857   // sensible to convert to.
858   if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
859     return nullptr;
860   // In what bitwidth can this be treated as saturating arithmetics?
861   unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
862   // FIXME: This isn't quite right for vectors, but using the scalar type is a
863   // good first approximation for what should be done there.
864   if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
865     return nullptr;
866 
867   // Also make sure that the inner min/max and the add/sub have one use.
868   if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
869     return nullptr;
870 
871   // Create the new type (which can be a vector type)
872   Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
873 
874   Intrinsic::ID IntrinsicID;
875   if (AddSub->getOpcode() == Instruction::Add)
876     IntrinsicID = Intrinsic::sadd_sat;
877   else if (AddSub->getOpcode() == Instruction::Sub)
878     IntrinsicID = Intrinsic::ssub_sat;
879   else
880     return nullptr;
881 
882   // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
883   // is usually achieved via a sext from a smaller type.
884   if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) >
885           NewBitWidth ||
886       ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)
887     return nullptr;
888 
889   // Finally create and return the sat intrinsic, truncated to the new type
890   Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
891   Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
892   Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
893   Value *Sat = Builder.CreateCall(F, {AT, BT});
894   return CastInst::Create(Instruction::SExt, Sat, Ty);
895 }
896 
897 
898 /// If we have a clamp pattern like max (min X, 42), 41 -- where the output
899 /// can only be one of two possible constant values -- turn that into a select
900 /// of constants.
901 static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
902                                         InstCombiner::BuilderTy &Builder) {
903   Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
904   Value *X;
905   const APInt *C0, *C1;
906   if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
907     return nullptr;
908 
909   CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
910   switch (II->getIntrinsicID()) {
911   case Intrinsic::smax:
912     if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
913       Pred = ICmpInst::ICMP_SGT;
914     break;
915   case Intrinsic::smin:
916     if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
917       Pred = ICmpInst::ICMP_SLT;
918     break;
919   case Intrinsic::umax:
920     if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
921       Pred = ICmpInst::ICMP_UGT;
922     break;
923   case Intrinsic::umin:
924     if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
925       Pred = ICmpInst::ICMP_ULT;
926     break;
927   default:
928     llvm_unreachable("Expected min/max intrinsic");
929   }
930   if (Pred == CmpInst::BAD_ICMP_PREDICATE)
931     return nullptr;
932 
933   // max (min X, 42), 41 --> X > 41 ? 42 : 41
934   // min (max X, 42), 43 --> X < 43 ? 42 : 43
935   Value *Cmp = Builder.CreateICmp(Pred, X, I1);
936   return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
937 }
938 
939 /// If this min/max has a constant operand and an operand that is a matching
940 /// min/max with a constant operand, constant-fold the 2 constant operands.
941 static Instruction *reassociateMinMaxWithConstants(IntrinsicInst *II) {
942   Intrinsic::ID MinMaxID = II->getIntrinsicID();
943   auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
944   if (!LHS || LHS->getIntrinsicID() != MinMaxID)
945     return nullptr;
946 
947   Constant *C0, *C1;
948   if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
949       !match(II->getArgOperand(1), m_ImmConstant(C1)))
950     return nullptr;
951 
952   // max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC
953   ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(MinMaxID);
954   Constant *CondC = ConstantExpr::getICmp(Pred, C0, C1);
955   Constant *NewC = ConstantExpr::getSelect(CondC, C0, C1);
956 
957   Module *Mod = II->getModule();
958   Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
959   return CallInst::Create(MinMax, {LHS->getArgOperand(0), NewC});
960 }
961 
962 /// If this min/max has a matching min/max operand with a constant, try to push
963 /// the constant operand into this instruction. This can enable more folds.
964 static Instruction *
965 reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
966                                        InstCombiner::BuilderTy &Builder) {
967   // Match and capture a min/max operand candidate.
968   Value *X, *Y;
969   Constant *C;
970   Instruction *Inner;
971   if (!match(II, m_c_MaxOrMin(m_OneUse(m_CombineAnd(
972                                   m_Instruction(Inner),
973                                   m_MaxOrMin(m_Value(X), m_ImmConstant(C)))),
974                               m_Value(Y))))
975     return nullptr;
976 
977   // The inner op must match. Check for constants to avoid infinite loops.
978   Intrinsic::ID MinMaxID = II->getIntrinsicID();
979   auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
980   if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
981       match(X, m_ImmConstant()) || match(Y, m_ImmConstant()))
982     return nullptr;
983 
984   // max (max X, C), Y --> max (max X, Y), C
985   Function *MinMax =
986       Intrinsic::getDeclaration(II->getModule(), MinMaxID, II->getType());
987   Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
988   NewInner->takeName(Inner);
989   return CallInst::Create(MinMax, {NewInner, C});
990 }
991 
992 /// Reduce a sequence of min/max intrinsics with a common operand.
993 static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
994   // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
995   auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
996   auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
997   Intrinsic::ID MinMaxID = II->getIntrinsicID();
998   if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
999       RHS->getIntrinsicID() != MinMaxID ||
1000       (!LHS->hasOneUse() && !RHS->hasOneUse()))
1001     return nullptr;
1002 
1003   Value *A = LHS->getArgOperand(0);
1004   Value *B = LHS->getArgOperand(1);
1005   Value *C = RHS->getArgOperand(0);
1006   Value *D = RHS->getArgOperand(1);
1007 
1008   // Look for a common operand.
1009   Value *MinMaxOp = nullptr;
1010   Value *ThirdOp = nullptr;
1011   if (LHS->hasOneUse()) {
1012     // If the LHS is only used in this chain and the RHS is used outside of it,
1013     // reuse the RHS min/max because that will eliminate the LHS.
1014     if (D == A || C == A) {
1015       // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1016       // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1017       MinMaxOp = RHS;
1018       ThirdOp = B;
1019     } else if (D == B || C == B) {
1020       // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1021       // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1022       MinMaxOp = RHS;
1023       ThirdOp = A;
1024     }
1025   } else {
1026     assert(RHS->hasOneUse() && "Expected one-use operand");
1027     // Reuse the LHS. This will eliminate the RHS.
1028     if (D == A || D == B) {
1029       // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1030       // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1031       MinMaxOp = LHS;
1032       ThirdOp = C;
1033     } else if (C == A || C == B) {
1034       // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1035       // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1036       MinMaxOp = LHS;
1037       ThirdOp = D;
1038     }
1039   }
1040 
1041   if (!MinMaxOp || !ThirdOp)
1042     return nullptr;
1043 
1044   Module *Mod = II->getModule();
1045   Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
1046   return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1047 }
1048 
1049 /// CallInst simplification. This mostly only handles folding of intrinsic
1050 /// instructions. For normal calls, it allows visitCallBase to do the heavy
1051 /// lifting.
1052 Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
1053   // Don't try to simplify calls without uses. It will not do anything useful,
1054   // but will result in the following folds being skipped.
1055   if (!CI.use_empty())
1056     if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
1057       return replaceInstUsesWith(CI, V);
1058 
1059   if (isFreeCall(&CI, &TLI))
1060     return visitFree(CI);
1061 
1062   // If the caller function (i.e. us, the function that contains this CallInst)
1063   // is nounwind, mark the call as nounwind, even if the callee isn't.
1064   if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1065     CI.setDoesNotThrow();
1066     return &CI;
1067   }
1068 
1069   IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
1070   if (!II) return visitCallBase(CI);
1071 
1072   // For atomic unordered mem intrinsics if len is not a positive or
1073   // not a multiple of element size then behavior is undefined.
1074   if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(II))
1075     if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(AMI->getLength()))
1076       if (NumBytes->getSExtValue() < 0 ||
1077           (NumBytes->getZExtValue() % AMI->getElementSizeInBytes() != 0)) {
1078         CreateNonTerminatorUnreachable(AMI);
1079         assert(AMI->getType()->isVoidTy() &&
1080                "non void atomic unordered mem intrinsic");
1081         return eraseInstFromFunction(*AMI);
1082       }
1083 
1084   // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1085   // instead of in visitCallBase.
1086   if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1087     bool Changed = false;
1088 
1089     // memmove/cpy/set of zero bytes is a noop.
1090     if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
1091       if (NumBytes->isNullValue())
1092         return eraseInstFromFunction(CI);
1093 
1094       if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
1095         if (CI->getZExtValue() == 1) {
1096           // Replace the instruction with just byte operations.  We would
1097           // transform other cases to loads/stores, but we don't know if
1098           // alignment is sufficient.
1099         }
1100     }
1101 
1102     // No other transformations apply to volatile transfers.
1103     if (auto *M = dyn_cast<MemIntrinsic>(MI))
1104       if (M->isVolatile())
1105         return nullptr;
1106 
1107     // If we have a memmove and the source operation is a constant global,
1108     // then the source and dest pointers can't alias, so we can change this
1109     // into a call to memcpy.
1110     if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1111       if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1112         if (GVSrc->isConstant()) {
1113           Module *M = CI.getModule();
1114           Intrinsic::ID MemCpyID =
1115               isa<AtomicMemMoveInst>(MMI)
1116                   ? Intrinsic::memcpy_element_unordered_atomic
1117                   : Intrinsic::memcpy;
1118           Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1119                            CI.getArgOperand(1)->getType(),
1120                            CI.getArgOperand(2)->getType() };
1121           CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
1122           Changed = true;
1123         }
1124     }
1125 
1126     if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1127       // memmove(x,x,size) -> noop.
1128       if (MTI->getSource() == MTI->getDest())
1129         return eraseInstFromFunction(CI);
1130     }
1131 
1132     // If we can determine a pointer alignment that is bigger than currently
1133     // set, update the alignment.
1134     if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1135       if (Instruction *I = SimplifyAnyMemTransfer(MTI))
1136         return I;
1137     } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1138       if (Instruction *I = SimplifyAnyMemSet(MSI))
1139         return I;
1140     }
1141 
1142     if (Changed) return II;
1143   }
1144 
1145   // For fixed width vector result intrinsics, use the generic demanded vector
1146   // support.
1147   if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
1148     auto VWidth = IIFVTy->getNumElements();
1149     APInt UndefElts(VWidth, 0);
1150     APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1151     if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
1152       if (V != II)
1153         return replaceInstUsesWith(*II, V);
1154       return II;
1155     }
1156   }
1157 
1158   if (II->isCommutative()) {
1159     if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
1160       return NewCall;
1161   }
1162 
1163   Intrinsic::ID IID = II->getIntrinsicID();
1164   switch (IID) {
1165   case Intrinsic::objectsize:
1166     if (Value *V = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
1167       return replaceInstUsesWith(CI, V);
1168     return nullptr;
1169   case Intrinsic::abs: {
1170     Value *IIOperand = II->getArgOperand(0);
1171     bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
1172 
1173     // abs(-x) -> abs(x)
1174     // TODO: Copy nsw if it was present on the neg?
1175     Value *X;
1176     if (match(IIOperand, m_Neg(m_Value(X))))
1177       return replaceOperand(*II, 0, X);
1178     if (match(IIOperand, m_Select(m_Value(), m_Value(X), m_Neg(m_Deferred(X)))))
1179       return replaceOperand(*II, 0, X);
1180     if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X))))
1181       return replaceOperand(*II, 0, X);
1182 
1183     if (Optional<bool> Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) {
1184       // abs(x) -> x if x >= 0
1185       if (!*Sign)
1186         return replaceInstUsesWith(*II, IIOperand);
1187 
1188       // abs(x) -> -x if x < 0
1189       if (IntMinIsPoison)
1190         return BinaryOperator::CreateNSWNeg(IIOperand);
1191       return BinaryOperator::CreateNeg(IIOperand);
1192     }
1193 
1194     // abs (sext X) --> zext (abs X*)
1195     // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
1196     if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
1197       Value *NarrowAbs =
1198           Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
1199       return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
1200     }
1201 
1202     // Match a complicated way to check if a number is odd/even:
1203     // abs (srem X, 2) --> and X, 1
1204     const APInt *C;
1205     if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
1206       return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
1207 
1208     break;
1209   }
1210   case Intrinsic::umin: {
1211     Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1212     // umin(x, 1) == zext(x != 0)
1213     if (match(I1, m_One())) {
1214       Value *Zero = Constant::getNullValue(I0->getType());
1215       Value *Cmp = Builder.CreateICmpNE(I0, Zero);
1216       return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
1217     }
1218     LLVM_FALLTHROUGH;
1219   }
1220   case Intrinsic::umax: {
1221     Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1222     Value *X, *Y;
1223     if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
1224         (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
1225       Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
1226       return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
1227     }
1228     Constant *C;
1229     if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
1230         I0->hasOneUse()) {
1231       Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
1232       if (ConstantExpr::getZExt(NarrowC, II->getType()) == C) {
1233         Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
1234         return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
1235       }
1236     }
1237     // If both operands of unsigned min/max are sign-extended, it is still ok
1238     // to narrow the operation.
1239     LLVM_FALLTHROUGH;
1240   }
1241   case Intrinsic::smax:
1242   case Intrinsic::smin: {
1243     Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1244     Value *X, *Y;
1245     if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
1246         (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
1247       Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
1248       return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
1249     }
1250 
1251     Constant *C;
1252     if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
1253         I0->hasOneUse()) {
1254       Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
1255       if (ConstantExpr::getSExt(NarrowC, II->getType()) == C) {
1256         Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
1257         return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
1258       }
1259     }
1260 
1261     if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
1262       // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
1263       // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
1264       // TODO: Canonicalize neg after min/max if I1 is constant.
1265       if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
1266           (I0->hasOneUse() || I1->hasOneUse())) {
1267         Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
1268         Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
1269         return BinaryOperator::CreateNSWNeg(InvMaxMin);
1270       }
1271     }
1272 
1273     // If we can eliminate ~A and Y is free to invert:
1274     // max ~A, Y --> ~(min A, ~Y)
1275     //
1276     // Examples:
1277     // max ~A, ~Y --> ~(min A, Y)
1278     // max ~A, C --> ~(min A, ~C)
1279     // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
1280     auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
1281       Value *A;
1282       if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
1283           !isFreeToInvert(A, A->hasOneUse()) &&
1284           isFreeToInvert(Y, Y->hasOneUse())) {
1285         Value *NotY = Builder.CreateNot(Y);
1286         Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
1287         Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
1288         return BinaryOperator::CreateNot(InvMaxMin);
1289       }
1290       return nullptr;
1291     };
1292 
1293     if (Instruction *I = moveNotAfterMinMax(I0, I1))
1294       return I;
1295     if (Instruction *I = moveNotAfterMinMax(I1, I0))
1296       return I;
1297 
1298     if (Instruction *I = moveAddAfterMinMax(II, Builder))
1299       return I;
1300 
1301     // smax(X, -X) --> abs(X)
1302     // smin(X, -X) --> -abs(X)
1303     // umax(X, -X) --> -abs(X)
1304     // umin(X, -X) --> abs(X)
1305     if (isKnownNegation(I0, I1)) {
1306       // We can choose either operand as the input to abs(), but if we can
1307       // eliminate the only use of a value, that's better for subsequent
1308       // transforms/analysis.
1309       if (I0->hasOneUse() && !I1->hasOneUse())
1310         std::swap(I0, I1);
1311 
1312       // This is some variant of abs(). See if we can propagate 'nsw' to the abs
1313       // operation and potentially its negation.
1314       bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
1315       Value *Abs = Builder.CreateBinaryIntrinsic(
1316           Intrinsic::abs, I0,
1317           ConstantInt::getBool(II->getContext(), IntMinIsPoison));
1318 
1319       // We don't have a "nabs" intrinsic, so negate if needed based on the
1320       // max/min operation.
1321       if (IID == Intrinsic::smin || IID == Intrinsic::umax)
1322         Abs = Builder.CreateNeg(Abs, "nabs", /* NUW */ false, IntMinIsPoison);
1323       return replaceInstUsesWith(CI, Abs);
1324     }
1325 
1326     if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
1327       return Sel;
1328 
1329     if (Instruction *SAdd = matchSAddSubSat(*II))
1330       return SAdd;
1331 
1332     if (match(I1, m_ImmConstant()))
1333       if (auto *Sel = dyn_cast<SelectInst>(I0))
1334         if (Instruction *R = FoldOpIntoSelect(*II, Sel))
1335           return R;
1336 
1337     if (Instruction *NewMinMax = reassociateMinMaxWithConstants(II))
1338       return NewMinMax;
1339 
1340     if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
1341       return R;
1342 
1343     if (Instruction *NewMinMax = factorizeMinMaxTree(II))
1344        return NewMinMax;
1345 
1346     break;
1347   }
1348   case Intrinsic::bswap: {
1349     Value *IIOperand = II->getArgOperand(0);
1350     Value *X = nullptr;
1351 
1352     KnownBits Known = computeKnownBits(IIOperand, 0, II);
1353     uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
1354     uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
1355     unsigned BW = Known.getBitWidth();
1356 
1357     // bswap(x) -> shift(x) if x has exactly one "active byte"
1358     if (BW - LZ - TZ == 8) {
1359       assert(LZ != TZ && "active byte cannot be in the middle");
1360       if (LZ > TZ)  // -> shl(x) if the "active byte" is in the low part of x
1361         return BinaryOperator::CreateNUWShl(
1362             IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
1363       // -> lshr(x) if the "active byte" is in the high part of x
1364       return BinaryOperator::CreateExactLShr(
1365             IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
1366     }
1367 
1368     // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1369     if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
1370       unsigned C = X->getType()->getScalarSizeInBits() - BW;
1371       Value *CV = ConstantInt::get(X->getType(), C);
1372       Value *V = Builder.CreateLShr(X, CV);
1373       return new TruncInst(V, IIOperand->getType());
1374     }
1375     break;
1376   }
1377   case Intrinsic::masked_load:
1378     if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
1379       return replaceInstUsesWith(CI, SimplifiedMaskedOp);
1380     break;
1381   case Intrinsic::masked_store:
1382     return simplifyMaskedStore(*II);
1383   case Intrinsic::masked_gather:
1384     return simplifyMaskedGather(*II);
1385   case Intrinsic::masked_scatter:
1386     return simplifyMaskedScatter(*II);
1387   case Intrinsic::launder_invariant_group:
1388   case Intrinsic::strip_invariant_group:
1389     if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
1390       return replaceInstUsesWith(*II, SkippedBarrier);
1391     break;
1392   case Intrinsic::powi:
1393     if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
1394       // 0 and 1 are handled in instsimplify
1395       // powi(x, -1) -> 1/x
1396       if (Power->isMinusOne())
1397         return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
1398                                              II->getArgOperand(0), II);
1399       // powi(x, 2) -> x*x
1400       if (Power->equalsInt(2))
1401         return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
1402                                              II->getArgOperand(0), II);
1403 
1404       if (!Power->getValue()[0]) {
1405         Value *X;
1406         // If power is even:
1407         // powi(-x, p) -> powi(x, p)
1408         // powi(fabs(x), p) -> powi(x, p)
1409         // powi(copysign(x, y), p) -> powi(x, p)
1410         if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
1411             match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
1412             match(II->getArgOperand(0),
1413                   m_Intrinsic<Intrinsic::copysign>(m_Value(X), m_Value())))
1414           return replaceOperand(*II, 0, X);
1415       }
1416     }
1417     break;
1418 
1419   case Intrinsic::cttz:
1420   case Intrinsic::ctlz:
1421     if (auto *I = foldCttzCtlz(*II, *this))
1422       return I;
1423     break;
1424 
1425   case Intrinsic::ctpop:
1426     if (auto *I = foldCtpop(*II, *this))
1427       return I;
1428     break;
1429 
1430   case Intrinsic::fshl:
1431   case Intrinsic::fshr: {
1432     Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1433     Type *Ty = II->getType();
1434     unsigned BitWidth = Ty->getScalarSizeInBits();
1435     Constant *ShAmtC;
1436     if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC)) &&
1437         !ShAmtC->containsConstantExpression()) {
1438       // Canonicalize a shift amount constant operand to modulo the bit-width.
1439       Constant *WidthC = ConstantInt::get(Ty, BitWidth);
1440       Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC);
1441       if (ModuloC != ShAmtC)
1442         return replaceOperand(*II, 2, ModuloC);
1443 
1444       assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) ==
1445                  ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) &&
1446              "Shift amount expected to be modulo bitwidth");
1447 
1448       // Canonicalize funnel shift right by constant to funnel shift left. This
1449       // is not entirely arbitrary. For historical reasons, the backend may
1450       // recognize rotate left patterns but miss rotate right patterns.
1451       if (IID == Intrinsic::fshr) {
1452         // fshr X, Y, C --> fshl X, Y, (BitWidth - C)
1453         Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
1454         Module *Mod = II->getModule();
1455         Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty);
1456         return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
1457       }
1458       assert(IID == Intrinsic::fshl &&
1459              "All funnel shifts by simple constants should go left");
1460 
1461       // fshl(X, 0, C) --> shl X, C
1462       // fshl(X, undef, C) --> shl X, C
1463       if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
1464         return BinaryOperator::CreateShl(Op0, ShAmtC);
1465 
1466       // fshl(0, X, C) --> lshr X, (BW-C)
1467       // fshl(undef, X, C) --> lshr X, (BW-C)
1468       if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
1469         return BinaryOperator::CreateLShr(Op1,
1470                                           ConstantExpr::getSub(WidthC, ShAmtC));
1471 
1472       // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
1473       if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
1474         Module *Mod = II->getModule();
1475         Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty);
1476         return CallInst::Create(Bswap, { Op0 });
1477       }
1478     }
1479 
1480     // Left or right might be masked.
1481     if (SimplifyDemandedInstructionBits(*II))
1482       return &CI;
1483 
1484     // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
1485     // so only the low bits of the shift amount are demanded if the bitwidth is
1486     // a power-of-2.
1487     if (!isPowerOf2_32(BitWidth))
1488       break;
1489     APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth));
1490     KnownBits Op2Known(BitWidth);
1491     if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
1492       return &CI;
1493     break;
1494   }
1495   case Intrinsic::uadd_with_overflow:
1496   case Intrinsic::sadd_with_overflow: {
1497     if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
1498       return I;
1499 
1500     // Given 2 constant operands whose sum does not overflow:
1501     // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
1502     // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
1503     Value *X;
1504     const APInt *C0, *C1;
1505     Value *Arg0 = II->getArgOperand(0);
1506     Value *Arg1 = II->getArgOperand(1);
1507     bool IsSigned = IID == Intrinsic::sadd_with_overflow;
1508     bool HasNWAdd = IsSigned ? match(Arg0, m_NSWAdd(m_Value(X), m_APInt(C0)))
1509                              : match(Arg0, m_NUWAdd(m_Value(X), m_APInt(C0)));
1510     if (HasNWAdd && match(Arg1, m_APInt(C1))) {
1511       bool Overflow;
1512       APInt NewC =
1513           IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
1514       if (!Overflow)
1515         return replaceInstUsesWith(
1516             *II, Builder.CreateBinaryIntrinsic(
1517                      IID, X, ConstantInt::get(Arg1->getType(), NewC)));
1518     }
1519     break;
1520   }
1521 
1522   case Intrinsic::umul_with_overflow:
1523   case Intrinsic::smul_with_overflow:
1524   case Intrinsic::usub_with_overflow:
1525     if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
1526       return I;
1527     break;
1528 
1529   case Intrinsic::ssub_with_overflow: {
1530     if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
1531       return I;
1532 
1533     Constant *C;
1534     Value *Arg0 = II->getArgOperand(0);
1535     Value *Arg1 = II->getArgOperand(1);
1536     // Given a constant C that is not the minimum signed value
1537     // for an integer of a given bit width:
1538     //
1539     // ssubo X, C -> saddo X, -C
1540     if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
1541       Value *NegVal = ConstantExpr::getNeg(C);
1542       // Build a saddo call that is equivalent to the discovered
1543       // ssubo call.
1544       return replaceInstUsesWith(
1545           *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
1546                                              Arg0, NegVal));
1547     }
1548 
1549     break;
1550   }
1551 
1552   case Intrinsic::uadd_sat:
1553   case Intrinsic::sadd_sat:
1554   case Intrinsic::usub_sat:
1555   case Intrinsic::ssub_sat: {
1556     SaturatingInst *SI = cast<SaturatingInst>(II);
1557     Type *Ty = SI->getType();
1558     Value *Arg0 = SI->getLHS();
1559     Value *Arg1 = SI->getRHS();
1560 
1561     // Make use of known overflow information.
1562     OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
1563                                         Arg0, Arg1, SI);
1564     switch (OR) {
1565       case OverflowResult::MayOverflow:
1566         break;
1567       case OverflowResult::NeverOverflows:
1568         if (SI->isSigned())
1569           return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
1570         else
1571           return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
1572       case OverflowResult::AlwaysOverflowsLow: {
1573         unsigned BitWidth = Ty->getScalarSizeInBits();
1574         APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
1575         return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
1576       }
1577       case OverflowResult::AlwaysOverflowsHigh: {
1578         unsigned BitWidth = Ty->getScalarSizeInBits();
1579         APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
1580         return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
1581       }
1582     }
1583 
1584     // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
1585     Constant *C;
1586     if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
1587         C->isNotMinSignedValue()) {
1588       Value *NegVal = ConstantExpr::getNeg(C);
1589       return replaceInstUsesWith(
1590           *II, Builder.CreateBinaryIntrinsic(
1591               Intrinsic::sadd_sat, Arg0, NegVal));
1592     }
1593 
1594     // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
1595     // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
1596     // if Val and Val2 have the same sign
1597     if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
1598       Value *X;
1599       const APInt *Val, *Val2;
1600       APInt NewVal;
1601       bool IsUnsigned =
1602           IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
1603       if (Other->getIntrinsicID() == IID &&
1604           match(Arg1, m_APInt(Val)) &&
1605           match(Other->getArgOperand(0), m_Value(X)) &&
1606           match(Other->getArgOperand(1), m_APInt(Val2))) {
1607         if (IsUnsigned)
1608           NewVal = Val->uadd_sat(*Val2);
1609         else if (Val->isNonNegative() == Val2->isNonNegative()) {
1610           bool Overflow;
1611           NewVal = Val->sadd_ov(*Val2, Overflow);
1612           if (Overflow) {
1613             // Both adds together may add more than SignedMaxValue
1614             // without saturating the final result.
1615             break;
1616           }
1617         } else {
1618           // Cannot fold saturated addition with different signs.
1619           break;
1620         }
1621 
1622         return replaceInstUsesWith(
1623             *II, Builder.CreateBinaryIntrinsic(
1624                      IID, X, ConstantInt::get(II->getType(), NewVal)));
1625       }
1626     }
1627     break;
1628   }
1629 
1630   case Intrinsic::minnum:
1631   case Intrinsic::maxnum:
1632   case Intrinsic::minimum:
1633   case Intrinsic::maximum: {
1634     Value *Arg0 = II->getArgOperand(0);
1635     Value *Arg1 = II->getArgOperand(1);
1636     Value *X, *Y;
1637     if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
1638         (Arg0->hasOneUse() || Arg1->hasOneUse())) {
1639       // If both operands are negated, invert the call and negate the result:
1640       // min(-X, -Y) --> -(max(X, Y))
1641       // max(-X, -Y) --> -(min(X, Y))
1642       Intrinsic::ID NewIID;
1643       switch (IID) {
1644       case Intrinsic::maxnum:
1645         NewIID = Intrinsic::minnum;
1646         break;
1647       case Intrinsic::minnum:
1648         NewIID = Intrinsic::maxnum;
1649         break;
1650       case Intrinsic::maximum:
1651         NewIID = Intrinsic::minimum;
1652         break;
1653       case Intrinsic::minimum:
1654         NewIID = Intrinsic::maximum;
1655         break;
1656       default:
1657         llvm_unreachable("unexpected intrinsic ID");
1658       }
1659       Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
1660       Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
1661       FNeg->copyIRFlags(II);
1662       return FNeg;
1663     }
1664 
1665     // m(m(X, C2), C1) -> m(X, C)
1666     const APFloat *C1, *C2;
1667     if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
1668       if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
1669           ((match(M->getArgOperand(0), m_Value(X)) &&
1670             match(M->getArgOperand(1), m_APFloat(C2))) ||
1671            (match(M->getArgOperand(1), m_Value(X)) &&
1672             match(M->getArgOperand(0), m_APFloat(C2))))) {
1673         APFloat Res(0.0);
1674         switch (IID) {
1675         case Intrinsic::maxnum:
1676           Res = maxnum(*C1, *C2);
1677           break;
1678         case Intrinsic::minnum:
1679           Res = minnum(*C1, *C2);
1680           break;
1681         case Intrinsic::maximum:
1682           Res = maximum(*C1, *C2);
1683           break;
1684         case Intrinsic::minimum:
1685           Res = minimum(*C1, *C2);
1686           break;
1687         default:
1688           llvm_unreachable("unexpected intrinsic ID");
1689         }
1690         Instruction *NewCall = Builder.CreateBinaryIntrinsic(
1691             IID, X, ConstantFP::get(Arg0->getType(), Res), II);
1692         // TODO: Conservatively intersecting FMF. If Res == C2, the transform
1693         //       was a simplification (so Arg0 and its original flags could
1694         //       propagate?)
1695         NewCall->andIRFlags(M);
1696         return replaceInstUsesWith(*II, NewCall);
1697       }
1698     }
1699 
1700     // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
1701     if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
1702         match(Arg1, m_OneUse(m_FPExt(m_Value(Y)))) &&
1703         X->getType() == Y->getType()) {
1704       Value *NewCall =
1705           Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
1706       return new FPExtInst(NewCall, II->getType());
1707     }
1708 
1709     // max X, -X --> fabs X
1710     // min X, -X --> -(fabs X)
1711     // TODO: Remove one-use limitation? That is obviously better for max.
1712     //       It would be an extra instruction for min (fnabs), but that is
1713     //       still likely better for analysis and codegen.
1714     if ((match(Arg0, m_OneUse(m_FNeg(m_Value(X)))) && Arg1 == X) ||
1715         (match(Arg1, m_OneUse(m_FNeg(m_Value(X)))) && Arg0 == X)) {
1716       Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
1717       if (IID == Intrinsic::minimum || IID == Intrinsic::minnum)
1718         R = Builder.CreateFNegFMF(R, II);
1719       return replaceInstUsesWith(*II, R);
1720     }
1721 
1722     break;
1723   }
1724   case Intrinsic::fmuladd: {
1725     // Canonicalize fast fmuladd to the separate fmul + fadd.
1726     if (II->isFast()) {
1727       BuilderTy::FastMathFlagGuard Guard(Builder);
1728       Builder.setFastMathFlags(II->getFastMathFlags());
1729       Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
1730                                       II->getArgOperand(1));
1731       Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
1732       Add->takeName(II);
1733       return replaceInstUsesWith(*II, Add);
1734     }
1735 
1736     // Try to simplify the underlying FMul.
1737     if (Value *V = SimplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
1738                                     II->getFastMathFlags(),
1739                                     SQ.getWithInstruction(II))) {
1740       auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
1741       FAdd->copyFastMathFlags(II);
1742       return FAdd;
1743     }
1744 
1745     LLVM_FALLTHROUGH;
1746   }
1747   case Intrinsic::fma: {
1748     // fma fneg(x), fneg(y), z -> fma x, y, z
1749     Value *Src0 = II->getArgOperand(0);
1750     Value *Src1 = II->getArgOperand(1);
1751     Value *X, *Y;
1752     if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
1753       replaceOperand(*II, 0, X);
1754       replaceOperand(*II, 1, Y);
1755       return II;
1756     }
1757 
1758     // fma fabs(x), fabs(x), z -> fma x, x, z
1759     if (match(Src0, m_FAbs(m_Value(X))) &&
1760         match(Src1, m_FAbs(m_Specific(X)))) {
1761       replaceOperand(*II, 0, X);
1762       replaceOperand(*II, 1, X);
1763       return II;
1764     }
1765 
1766     // Try to simplify the underlying FMul. We can only apply simplifications
1767     // that do not require rounding.
1768     if (Value *V = SimplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1),
1769                                    II->getFastMathFlags(),
1770                                    SQ.getWithInstruction(II))) {
1771       auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
1772       FAdd->copyFastMathFlags(II);
1773       return FAdd;
1774     }
1775 
1776     // fma x, y, 0 -> fmul x, y
1777     // This is always valid for -0.0, but requires nsz for +0.0 as
1778     // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
1779     if (match(II->getArgOperand(2), m_NegZeroFP()) ||
1780         (match(II->getArgOperand(2), m_PosZeroFP()) &&
1781          II->getFastMathFlags().noSignedZeros()))
1782       return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
1783 
1784     break;
1785   }
1786   case Intrinsic::copysign: {
1787     Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
1788     if (SignBitMustBeZero(Sign, &TLI)) {
1789       // If we know that the sign argument is positive, reduce to FABS:
1790       // copysign Mag, +Sign --> fabs Mag
1791       Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
1792       return replaceInstUsesWith(*II, Fabs);
1793     }
1794     // TODO: There should be a ValueTracking sibling like SignBitMustBeOne.
1795     const APFloat *C;
1796     if (match(Sign, m_APFloat(C)) && C->isNegative()) {
1797       // If we know that the sign argument is negative, reduce to FNABS:
1798       // copysign Mag, -Sign --> fneg (fabs Mag)
1799       Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
1800       return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
1801     }
1802 
1803     // Propagate sign argument through nested calls:
1804     // copysign Mag, (copysign ?, X) --> copysign Mag, X
1805     Value *X;
1806     if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X))))
1807       return replaceOperand(*II, 1, X);
1808 
1809     // Peek through changes of magnitude's sign-bit. This call rewrites those:
1810     // copysign (fabs X), Sign --> copysign X, Sign
1811     // copysign (fneg X), Sign --> copysign X, Sign
1812     if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
1813       return replaceOperand(*II, 0, X);
1814 
1815     break;
1816   }
1817   case Intrinsic::fabs: {
1818     Value *Cond, *TVal, *FVal;
1819     if (match(II->getArgOperand(0),
1820               m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
1821       // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
1822       if (isa<Constant>(TVal) && isa<Constant>(FVal)) {
1823         CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
1824         CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
1825         return SelectInst::Create(Cond, AbsT, AbsF);
1826       }
1827       // fabs (select Cond, -FVal, FVal) --> fabs FVal
1828       if (match(TVal, m_FNeg(m_Specific(FVal))))
1829         return replaceOperand(*II, 0, FVal);
1830       // fabs (select Cond, TVal, -TVal) --> fabs TVal
1831       if (match(FVal, m_FNeg(m_Specific(TVal))))
1832         return replaceOperand(*II, 0, TVal);
1833     }
1834 
1835     LLVM_FALLTHROUGH;
1836   }
1837   case Intrinsic::ceil:
1838   case Intrinsic::floor:
1839   case Intrinsic::round:
1840   case Intrinsic::roundeven:
1841   case Intrinsic::nearbyint:
1842   case Intrinsic::rint:
1843   case Intrinsic::trunc: {
1844     Value *ExtSrc;
1845     if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
1846       // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
1847       Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
1848       return new FPExtInst(NarrowII, II->getType());
1849     }
1850     break;
1851   }
1852   case Intrinsic::cos:
1853   case Intrinsic::amdgcn_cos: {
1854     Value *X;
1855     Value *Src = II->getArgOperand(0);
1856     if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X)))) {
1857       // cos(-x) -> cos(x)
1858       // cos(fabs(x)) -> cos(x)
1859       return replaceOperand(*II, 0, X);
1860     }
1861     break;
1862   }
1863   case Intrinsic::sin: {
1864     Value *X;
1865     if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
1866       // sin(-x) --> -sin(x)
1867       Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II);
1868       Instruction *FNeg = UnaryOperator::CreateFNeg(NewSin);
1869       FNeg->copyFastMathFlags(II);
1870       return FNeg;
1871     }
1872     break;
1873   }
1874 
1875   case Intrinsic::arm_neon_vtbl1:
1876   case Intrinsic::aarch64_neon_tbl1:
1877     if (Value *V = simplifyNeonTbl1(*II, Builder))
1878       return replaceInstUsesWith(*II, V);
1879     break;
1880 
1881   case Intrinsic::arm_neon_vmulls:
1882   case Intrinsic::arm_neon_vmullu:
1883   case Intrinsic::aarch64_neon_smull:
1884   case Intrinsic::aarch64_neon_umull: {
1885     Value *Arg0 = II->getArgOperand(0);
1886     Value *Arg1 = II->getArgOperand(1);
1887 
1888     // Handle mul by zero first:
1889     if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
1890       return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
1891     }
1892 
1893     // Check for constant LHS & RHS - in this case we just simplify.
1894     bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
1895                  IID == Intrinsic::aarch64_neon_umull);
1896     VectorType *NewVT = cast<VectorType>(II->getType());
1897     if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
1898       if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
1899         CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
1900         CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
1901 
1902         return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
1903       }
1904 
1905       // Couldn't simplify - canonicalize constant to the RHS.
1906       std::swap(Arg0, Arg1);
1907     }
1908 
1909     // Handle mul by one:
1910     if (Constant *CV1 = dyn_cast<Constant>(Arg1))
1911       if (ConstantInt *Splat =
1912               dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
1913         if (Splat->isOne())
1914           return CastInst::CreateIntegerCast(Arg0, II->getType(),
1915                                              /*isSigned=*/!Zext);
1916 
1917     break;
1918   }
1919   case Intrinsic::arm_neon_aesd:
1920   case Intrinsic::arm_neon_aese:
1921   case Intrinsic::aarch64_crypto_aesd:
1922   case Intrinsic::aarch64_crypto_aese: {
1923     Value *DataArg = II->getArgOperand(0);
1924     Value *KeyArg  = II->getArgOperand(1);
1925 
1926     // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
1927     Value *Data, *Key;
1928     if (match(KeyArg, m_ZeroInt()) &&
1929         match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
1930       replaceOperand(*II, 0, Data);
1931       replaceOperand(*II, 1, Key);
1932       return II;
1933     }
1934     break;
1935   }
1936   case Intrinsic::hexagon_V6_vandvrt:
1937   case Intrinsic::hexagon_V6_vandvrt_128B: {
1938     // Simplify Q -> V -> Q conversion.
1939     if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
1940       Intrinsic::ID ID0 = Op0->getIntrinsicID();
1941       if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
1942           ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
1943         break;
1944       Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
1945       uint64_t Bytes1 = computeKnownBits(Bytes, 0, Op0).One.getZExtValue();
1946       uint64_t Mask1 = computeKnownBits(Mask, 0, II).One.getZExtValue();
1947       // Check if every byte has common bits in Bytes and Mask.
1948       uint64_t C = Bytes1 & Mask1;
1949       if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
1950         return replaceInstUsesWith(*II, Op0->getArgOperand(0));
1951     }
1952     break;
1953   }
1954   case Intrinsic::stackrestore: {
1955     enum class ClassifyResult {
1956       None,
1957       Alloca,
1958       StackRestore,
1959       CallWithSideEffects,
1960     };
1961     auto Classify = [](const Instruction *I) {
1962       if (isa<AllocaInst>(I))
1963         return ClassifyResult::Alloca;
1964 
1965       if (auto *CI = dyn_cast<CallInst>(I)) {
1966         if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
1967           if (II->getIntrinsicID() == Intrinsic::stackrestore)
1968             return ClassifyResult::StackRestore;
1969 
1970           if (II->mayHaveSideEffects())
1971             return ClassifyResult::CallWithSideEffects;
1972         } else {
1973           // Consider all non-intrinsic calls to be side effects
1974           return ClassifyResult::CallWithSideEffects;
1975         }
1976       }
1977 
1978       return ClassifyResult::None;
1979     };
1980 
1981     // If the stacksave and the stackrestore are in the same BB, and there is
1982     // no intervening call, alloca, or stackrestore of a different stacksave,
1983     // remove the restore. This can happen when variable allocas are DCE'd.
1984     if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
1985       if (SS->getIntrinsicID() == Intrinsic::stacksave &&
1986           SS->getParent() == II->getParent()) {
1987         BasicBlock::iterator BI(SS);
1988         bool CannotRemove = false;
1989         for (++BI; &*BI != II; ++BI) {
1990           switch (Classify(&*BI)) {
1991           case ClassifyResult::None:
1992             // So far so good, look at next instructions.
1993             break;
1994 
1995           case ClassifyResult::StackRestore:
1996             // If we found an intervening stackrestore for a different
1997             // stacksave, we can't remove the stackrestore. Otherwise, continue.
1998             if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
1999               CannotRemove = true;
2000             break;
2001 
2002           case ClassifyResult::Alloca:
2003           case ClassifyResult::CallWithSideEffects:
2004             // If we found an alloca, a non-intrinsic call, or an intrinsic
2005             // call with side effects, we can't remove the stackrestore.
2006             CannotRemove = true;
2007             break;
2008           }
2009           if (CannotRemove)
2010             break;
2011         }
2012 
2013         if (!CannotRemove)
2014           return eraseInstFromFunction(CI);
2015       }
2016     }
2017 
2018     // Scan down this block to see if there is another stack restore in the
2019     // same block without an intervening call/alloca.
2020     BasicBlock::iterator BI(II);
2021     Instruction *TI = II->getParent()->getTerminator();
2022     bool CannotRemove = false;
2023     for (++BI; &*BI != TI; ++BI) {
2024       switch (Classify(&*BI)) {
2025       case ClassifyResult::None:
2026         // So far so good, look at next instructions.
2027         break;
2028 
2029       case ClassifyResult::StackRestore:
2030         // If there is a stackrestore below this one, remove this one.
2031         return eraseInstFromFunction(CI);
2032 
2033       case ClassifyResult::Alloca:
2034       case ClassifyResult::CallWithSideEffects:
2035         // If we found an alloca, a non-intrinsic call, or an intrinsic call
2036         // with side effects (such as llvm.stacksave and llvm.read_register),
2037         // we can't remove the stack restore.
2038         CannotRemove = true;
2039         break;
2040       }
2041       if (CannotRemove)
2042         break;
2043     }
2044 
2045     // If the stack restore is in a return, resume, or unwind block and if there
2046     // are no allocas or calls between the restore and the return, nuke the
2047     // restore.
2048     if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
2049       return eraseInstFromFunction(CI);
2050     break;
2051   }
2052   case Intrinsic::lifetime_end:
2053     // Asan needs to poison memory to detect invalid access which is possible
2054     // even for empty lifetime range.
2055     if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
2056         II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
2057         II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
2058       break;
2059 
2060     if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
2061           return I.getIntrinsicID() == Intrinsic::lifetime_start;
2062         }))
2063       return nullptr;
2064     break;
2065   case Intrinsic::assume: {
2066     Value *IIOperand = II->getArgOperand(0);
2067     SmallVector<OperandBundleDef, 4> OpBundles;
2068     II->getOperandBundlesAsDefs(OpBundles);
2069 
2070     /// This will remove the boolean Condition from the assume given as
2071     /// argument and remove the assume if it becomes useless.
2072     /// always returns nullptr for use as a return values.
2073     auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
2074       assert(isa<AssumeInst>(Assume));
2075       if (isAssumeWithEmptyBundle(*cast<AssumeInst>(II)))
2076         return eraseInstFromFunction(CI);
2077       replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
2078       return nullptr;
2079     };
2080     // Remove an assume if it is followed by an identical assume.
2081     // TODO: Do we need this? Unless there are conflicting assumptions, the
2082     // computeKnownBits(IIOperand) below here eliminates redundant assumes.
2083     Instruction *Next = II->getNextNonDebugInstruction();
2084     if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
2085       return RemoveConditionFromAssume(Next);
2086 
2087     // Canonicalize assume(a && b) -> assume(a); assume(b);
2088     // Note: New assumption intrinsics created here are registered by
2089     // the InstCombineIRInserter object.
2090     FunctionType *AssumeIntrinsicTy = II->getFunctionType();
2091     Value *AssumeIntrinsic = II->getCalledOperand();
2092     Value *A, *B;
2093     if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
2094       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
2095                          II->getName());
2096       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
2097       return eraseInstFromFunction(*II);
2098     }
2099     // assume(!(a || b)) -> assume(!a); assume(!b);
2100     if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
2101       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
2102                          Builder.CreateNot(A), OpBundles, II->getName());
2103       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
2104                          Builder.CreateNot(B), II->getName());
2105       return eraseInstFromFunction(*II);
2106     }
2107 
2108     // assume( (load addr) != null ) -> add 'nonnull' metadata to load
2109     // (if assume is valid at the load)
2110     CmpInst::Predicate Pred;
2111     Instruction *LHS;
2112     if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
2113         Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
2114         LHS->getType()->isPointerTy() &&
2115         isValidAssumeForContext(II, LHS, &DT)) {
2116       MDNode *MD = MDNode::get(II->getContext(), None);
2117       LHS->setMetadata(LLVMContext::MD_nonnull, MD);
2118       return RemoveConditionFromAssume(II);
2119 
2120       // TODO: apply nonnull return attributes to calls and invokes
2121       // TODO: apply range metadata for range check patterns?
2122     }
2123 
2124     // Convert nonnull assume like:
2125     // %A = icmp ne i32* %PTR, null
2126     // call void @llvm.assume(i1 %A)
2127     // into
2128     // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
2129     if (EnableKnowledgeRetention &&
2130         match(IIOperand, m_Cmp(Pred, m_Value(A), m_Zero())) &&
2131         Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) {
2132       if (auto *Replacement = buildAssumeFromKnowledge(
2133               {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
2134 
2135         Replacement->insertBefore(Next);
2136         AC.registerAssumption(Replacement);
2137         return RemoveConditionFromAssume(II);
2138       }
2139     }
2140 
2141     // Convert alignment assume like:
2142     // %B = ptrtoint i32* %A to i64
2143     // %C = and i64 %B, Constant
2144     // %D = icmp eq i64 %C, 0
2145     // call void @llvm.assume(i1 %D)
2146     // into
2147     // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64  Constant + 1)]
2148     uint64_t AlignMask;
2149     if (EnableKnowledgeRetention &&
2150         match(IIOperand,
2151               m_Cmp(Pred, m_And(m_Value(A), m_ConstantInt(AlignMask)),
2152                     m_Zero())) &&
2153         Pred == CmpInst::ICMP_EQ) {
2154       if (isPowerOf2_64(AlignMask + 1)) {
2155         uint64_t Offset = 0;
2156         match(A, m_Add(m_Value(A), m_ConstantInt(Offset)));
2157         if (match(A, m_PtrToInt(m_Value(A)))) {
2158           /// Note: this doesn't preserve the offset information but merges
2159           /// offset and alignment.
2160           /// TODO: we can generate a GEP instead of merging the alignment with
2161           /// the offset.
2162           RetainedKnowledge RK{Attribute::Alignment,
2163                                (unsigned)MinAlign(Offset, AlignMask + 1), A};
2164           if (auto *Replacement =
2165                   buildAssumeFromKnowledge(RK, Next, &AC, &DT)) {
2166 
2167             Replacement->insertAfter(II);
2168             AC.registerAssumption(Replacement);
2169           }
2170           return RemoveConditionFromAssume(II);
2171         }
2172       }
2173     }
2174 
2175     /// Canonicalize Knowledge in operand bundles.
2176     if (EnableKnowledgeRetention && II->hasOperandBundles()) {
2177       for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
2178         auto &BOI = II->bundle_op_info_begin()[Idx];
2179         RetainedKnowledge RK =
2180           llvm::getKnowledgeFromBundle(cast<AssumeInst>(*II), BOI);
2181         if (BOI.End - BOI.Begin > 2)
2182           continue; // Prevent reducing knowledge in an align with offset since
2183                     // extracting a RetainedKnowledge form them looses offset
2184                     // information
2185         RetainedKnowledge CanonRK =
2186           llvm::simplifyRetainedKnowledge(cast<AssumeInst>(II), RK,
2187                                           &getAssumptionCache(),
2188                                           &getDominatorTree());
2189         if (CanonRK == RK)
2190           continue;
2191         if (!CanonRK) {
2192           if (BOI.End - BOI.Begin > 0) {
2193             Worklist.pushValue(II->op_begin()[BOI.Begin]);
2194             Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
2195           }
2196           continue;
2197         }
2198         assert(RK.AttrKind == CanonRK.AttrKind);
2199         if (BOI.End - BOI.Begin > 0)
2200           II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
2201         if (BOI.End - BOI.Begin > 1)
2202           II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
2203               Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
2204         if (RK.WasOn)
2205           Worklist.pushValue(RK.WasOn);
2206         return II;
2207       }
2208     }
2209 
2210     // If there is a dominating assume with the same condition as this one,
2211     // then this one is redundant, and should be removed.
2212     KnownBits Known(1);
2213     computeKnownBits(IIOperand, Known, 0, II);
2214     if (Known.isAllOnes() && isAssumeWithEmptyBundle(cast<AssumeInst>(*II)))
2215       return eraseInstFromFunction(*II);
2216 
2217     // Update the cache of affected values for this assumption (we might be
2218     // here because we just simplified the condition).
2219     AC.updateAffectedValues(cast<AssumeInst>(II));
2220     break;
2221   }
2222   case Intrinsic::experimental_guard: {
2223     // Is this guard followed by another guard?  We scan forward over a small
2224     // fixed window of instructions to handle common cases with conditions
2225     // computed between guards.
2226     Instruction *NextInst = II->getNextNonDebugInstruction();
2227     for (unsigned i = 0; i < GuardWideningWindow; i++) {
2228       // Note: Using context-free form to avoid compile time blow up
2229       if (!isSafeToSpeculativelyExecute(NextInst))
2230         break;
2231       NextInst = NextInst->getNextNonDebugInstruction();
2232     }
2233     Value *NextCond = nullptr;
2234     if (match(NextInst,
2235               m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
2236       Value *CurrCond = II->getArgOperand(0);
2237 
2238       // Remove a guard that it is immediately preceded by an identical guard.
2239       // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
2240       if (CurrCond != NextCond) {
2241         Instruction *MoveI = II->getNextNonDebugInstruction();
2242         while (MoveI != NextInst) {
2243           auto *Temp = MoveI;
2244           MoveI = MoveI->getNextNonDebugInstruction();
2245           Temp->moveBefore(II);
2246         }
2247         replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
2248       }
2249       eraseInstFromFunction(*NextInst);
2250       return II;
2251     }
2252     break;
2253   }
2254   case Intrinsic::experimental_vector_insert: {
2255     Value *Vec = II->getArgOperand(0);
2256     Value *SubVec = II->getArgOperand(1);
2257     Value *Idx = II->getArgOperand(2);
2258     auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
2259     auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
2260     auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
2261 
2262     // Only canonicalize if the destination vector, Vec, and SubVec are all
2263     // fixed vectors.
2264     if (DstTy && VecTy && SubVecTy) {
2265       unsigned DstNumElts = DstTy->getNumElements();
2266       unsigned VecNumElts = VecTy->getNumElements();
2267       unsigned SubVecNumElts = SubVecTy->getNumElements();
2268       unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
2269 
2270       // An insert that entirely overwrites Vec with SubVec is a nop.
2271       if (VecNumElts == SubVecNumElts)
2272         return replaceInstUsesWith(CI, SubVec);
2273 
2274       // Widen SubVec into a vector of the same width as Vec, since
2275       // shufflevector requires the two input vectors to be the same width.
2276       // Elements beyond the bounds of SubVec within the widened vector are
2277       // undefined.
2278       SmallVector<int, 8> WidenMask;
2279       unsigned i;
2280       for (i = 0; i != SubVecNumElts; ++i)
2281         WidenMask.push_back(i);
2282       for (; i != VecNumElts; ++i)
2283         WidenMask.push_back(UndefMaskElem);
2284 
2285       Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
2286 
2287       SmallVector<int, 8> Mask;
2288       for (unsigned i = 0; i != IdxN; ++i)
2289         Mask.push_back(i);
2290       for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
2291         Mask.push_back(i);
2292       for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
2293         Mask.push_back(i);
2294 
2295       Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
2296       return replaceInstUsesWith(CI, Shuffle);
2297     }
2298     break;
2299   }
2300   case Intrinsic::experimental_vector_extract: {
2301     Value *Vec = II->getArgOperand(0);
2302     Value *Idx = II->getArgOperand(1);
2303 
2304     auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
2305     auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
2306 
2307     // Only canonicalize if the the destination vector and Vec are fixed
2308     // vectors.
2309     if (DstTy && VecTy) {
2310       unsigned DstNumElts = DstTy->getNumElements();
2311       unsigned VecNumElts = VecTy->getNumElements();
2312       unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
2313 
2314       // Extracting the entirety of Vec is a nop.
2315       if (VecNumElts == DstNumElts) {
2316         replaceInstUsesWith(CI, Vec);
2317         return eraseInstFromFunction(CI);
2318       }
2319 
2320       SmallVector<int, 8> Mask;
2321       for (unsigned i = 0; i != DstNumElts; ++i)
2322         Mask.push_back(IdxN + i);
2323 
2324       Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
2325       return replaceInstUsesWith(CI, Shuffle);
2326     }
2327     break;
2328   }
2329   case Intrinsic::experimental_vector_reverse: {
2330     Value *BO0, *BO1, *X, *Y;
2331     Value *Vec = II->getArgOperand(0);
2332     if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) {
2333       auto *OldBinOp = cast<BinaryOperator>(Vec);
2334       if (match(BO0, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
2335                          m_Value(X)))) {
2336         // rev(binop rev(X), rev(Y)) --> binop X, Y
2337         if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
2338                            m_Value(Y))))
2339           return replaceInstUsesWith(CI,
2340                                      BinaryOperator::CreateWithCopiedFlags(
2341                                          OldBinOp->getOpcode(), X, Y, OldBinOp,
2342                                          OldBinOp->getName(), II));
2343         // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
2344         if (isSplatValue(BO1))
2345           return replaceInstUsesWith(CI,
2346                                      BinaryOperator::CreateWithCopiedFlags(
2347                                          OldBinOp->getOpcode(), X, BO1,
2348                                          OldBinOp, OldBinOp->getName(), II));
2349       }
2350       // rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
2351       if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
2352                          m_Value(Y))) &&
2353           isSplatValue(BO0))
2354         return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
2355                                            OldBinOp->getOpcode(), BO0, Y,
2356                                            OldBinOp, OldBinOp->getName(), II));
2357     }
2358     // rev(unop rev(X)) --> unop X
2359     if (match(Vec, m_OneUse(m_UnOp(
2360                        m_Intrinsic<Intrinsic::experimental_vector_reverse>(
2361                            m_Value(X)))))) {
2362       auto *OldUnOp = cast<UnaryOperator>(Vec);
2363       auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
2364           OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(), II);
2365       return replaceInstUsesWith(CI, NewUnOp);
2366     }
2367     break;
2368   }
2369   case Intrinsic::vector_reduce_or:
2370   case Intrinsic::vector_reduce_and: {
2371     // Canonicalize logical or/and reductions:
2372     // Or reduction for i1 is represented as:
2373     // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2374     // %res = cmp ne iReduxWidth %val, 0
2375     // And reduction for i1 is represented as:
2376     // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2377     // %res = cmp eq iReduxWidth %val, 11111
2378     Value *Arg = II->getArgOperand(0);
2379     Value *Vect;
2380     if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
2381       if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
2382         if (FTy->getElementType() == Builder.getInt1Ty()) {
2383           Value *Res = Builder.CreateBitCast(
2384               Vect, Builder.getIntNTy(FTy->getNumElements()));
2385           if (IID == Intrinsic::vector_reduce_and) {
2386             Res = Builder.CreateICmpEQ(
2387                 Res, ConstantInt::getAllOnesValue(Res->getType()));
2388           } else {
2389             assert(IID == Intrinsic::vector_reduce_or &&
2390                    "Expected or reduction.");
2391             Res = Builder.CreateIsNotNull(Res);
2392           }
2393           if (Arg != Vect)
2394             Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
2395                                      II->getType());
2396           return replaceInstUsesWith(CI, Res);
2397         }
2398     }
2399     LLVM_FALLTHROUGH;
2400   }
2401   case Intrinsic::vector_reduce_add: {
2402     if (IID == Intrinsic::vector_reduce_add) {
2403       // Convert vector_reduce_add(ZExt(<n x i1>)) to
2404       // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
2405       // Convert vector_reduce_add(SExt(<n x i1>)) to
2406       // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
2407       // Convert vector_reduce_add(<n x i1>) to
2408       // Trunc(ctpop(bitcast <n x i1> to in)).
2409       Value *Arg = II->getArgOperand(0);
2410       Value *Vect;
2411       if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
2412         if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
2413           if (FTy->getElementType() == Builder.getInt1Ty()) {
2414             Value *V = Builder.CreateBitCast(
2415                 Vect, Builder.getIntNTy(FTy->getNumElements()));
2416             Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
2417             if (Res->getType() != II->getType())
2418               Res = Builder.CreateZExtOrTrunc(Res, II->getType());
2419             if (Arg != Vect &&
2420                 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
2421               Res = Builder.CreateNeg(Res);
2422             return replaceInstUsesWith(CI, Res);
2423           }
2424       }
2425     }
2426     LLVM_FALLTHROUGH;
2427   }
2428   case Intrinsic::vector_reduce_xor: {
2429     if (IID == Intrinsic::vector_reduce_xor) {
2430       // Exclusive disjunction reduction over the vector with
2431       // (potentially-extended) i1 element type is actually a
2432       // (potentially-extended) arithmetic `add` reduction over the original
2433       // non-extended value:
2434       //   vector_reduce_xor(?ext(<n x i1>))
2435       //     -->
2436       //   ?ext(vector_reduce_add(<n x i1>))
2437       Value *Arg = II->getArgOperand(0);
2438       Value *Vect;
2439       if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
2440         if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
2441           if (FTy->getElementType() == Builder.getInt1Ty()) {
2442             Value *Res = Builder.CreateAddReduce(Vect);
2443             if (Arg != Vect)
2444               Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
2445                                        II->getType());
2446             return replaceInstUsesWith(CI, Res);
2447           }
2448       }
2449     }
2450     LLVM_FALLTHROUGH;
2451   }
2452   case Intrinsic::vector_reduce_mul: {
2453     if (IID == Intrinsic::vector_reduce_mul) {
2454       // Multiplicative reduction over the vector with (potentially-extended)
2455       // i1 element type is actually a (potentially zero-extended)
2456       // logical `and` reduction over the original non-extended value:
2457       //   vector_reduce_mul(?ext(<n x i1>))
2458       //     -->
2459       //   zext(vector_reduce_and(<n x i1>))
2460       Value *Arg = II->getArgOperand(0);
2461       Value *Vect;
2462       if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
2463         if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
2464           if (FTy->getElementType() == Builder.getInt1Ty()) {
2465             Value *Res = Builder.CreateAndReduce(Vect);
2466             if (Res->getType() != II->getType())
2467               Res = Builder.CreateZExt(Res, II->getType());
2468             return replaceInstUsesWith(CI, Res);
2469           }
2470       }
2471     }
2472     LLVM_FALLTHROUGH;
2473   }
2474   case Intrinsic::vector_reduce_umin:
2475   case Intrinsic::vector_reduce_umax: {
2476     if (IID == Intrinsic::vector_reduce_umin ||
2477         IID == Intrinsic::vector_reduce_umax) {
2478       // UMin/UMax reduction over the vector with (potentially-extended)
2479       // i1 element type is actually a (potentially-extended)
2480       // logical `and`/`or` reduction over the original non-extended value:
2481       //   vector_reduce_u{min,max}(?ext(<n x i1>))
2482       //     -->
2483       //   ?ext(vector_reduce_{and,or}(<n x i1>))
2484       Value *Arg = II->getArgOperand(0);
2485       Value *Vect;
2486       if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
2487         if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
2488           if (FTy->getElementType() == Builder.getInt1Ty()) {
2489             Value *Res = IID == Intrinsic::vector_reduce_umin
2490                              ? Builder.CreateAndReduce(Vect)
2491                              : Builder.CreateOrReduce(Vect);
2492             if (Arg != Vect)
2493               Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
2494                                        II->getType());
2495             return replaceInstUsesWith(CI, Res);
2496           }
2497       }
2498     }
2499     LLVM_FALLTHROUGH;
2500   }
2501   case Intrinsic::vector_reduce_smin:
2502   case Intrinsic::vector_reduce_smax: {
2503     if (IID == Intrinsic::vector_reduce_smin ||
2504         IID == Intrinsic::vector_reduce_smax) {
2505       // SMin/SMax reduction over the vector with (potentially-extended)
2506       // i1 element type is actually a (potentially-extended)
2507       // logical `and`/`or` reduction over the original non-extended value:
2508       //   vector_reduce_s{min,max}(<n x i1>)
2509       //     -->
2510       //   vector_reduce_{or,and}(<n x i1>)
2511       // and
2512       //   vector_reduce_s{min,max}(sext(<n x i1>))
2513       //     -->
2514       //   sext(vector_reduce_{or,and}(<n x i1>))
2515       // and
2516       //   vector_reduce_s{min,max}(zext(<n x i1>))
2517       //     -->
2518       //   zext(vector_reduce_{and,or}(<n x i1>))
2519       Value *Arg = II->getArgOperand(0);
2520       Value *Vect;
2521       if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
2522         if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
2523           if (FTy->getElementType() == Builder.getInt1Ty()) {
2524             Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
2525             if (Arg != Vect)
2526               ExtOpc = cast<CastInst>(Arg)->getOpcode();
2527             Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
2528                           (ExtOpc == Instruction::CastOps::ZExt))
2529                              ? Builder.CreateAndReduce(Vect)
2530                              : Builder.CreateOrReduce(Vect);
2531             if (Arg != Vect)
2532               Res = Builder.CreateCast(ExtOpc, Res, II->getType());
2533             return replaceInstUsesWith(CI, Res);
2534           }
2535       }
2536     }
2537     LLVM_FALLTHROUGH;
2538   }
2539   case Intrinsic::vector_reduce_fmax:
2540   case Intrinsic::vector_reduce_fmin:
2541   case Intrinsic::vector_reduce_fadd:
2542   case Intrinsic::vector_reduce_fmul: {
2543     bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd &&
2544                               IID != Intrinsic::vector_reduce_fmul) ||
2545                              II->hasAllowReassoc();
2546     const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
2547                              IID == Intrinsic::vector_reduce_fmul)
2548                                 ? 1
2549                                 : 0;
2550     Value *Arg = II->getArgOperand(ArgIdx);
2551     Value *V;
2552     ArrayRef<int> Mask;
2553     if (!isa<FixedVectorType>(Arg->getType()) || !CanBeReassociated ||
2554         !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
2555         !cast<ShuffleVectorInst>(Arg)->isSingleSource())
2556       break;
2557     int Sz = Mask.size();
2558     SmallBitVector UsedIndices(Sz);
2559     for (int Idx : Mask) {
2560       if (Idx == UndefMaskElem || UsedIndices.test(Idx))
2561         break;
2562       UsedIndices.set(Idx);
2563     }
2564     // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
2565     // other changes.
2566     if (UsedIndices.all()) {
2567       replaceUse(II->getOperandUse(ArgIdx), V);
2568       return nullptr;
2569     }
2570     break;
2571   }
2572   default: {
2573     // Handle target specific intrinsics
2574     Optional<Instruction *> V = targetInstCombineIntrinsic(*II);
2575     if (V.hasValue())
2576       return V.getValue();
2577     break;
2578   }
2579   }
2580   // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
2581   // context, so it is handled in visitCallBase and we should trigger it.
2582   return visitCallBase(*II);
2583 }
2584 
2585 // Fence instruction simplification
2586 Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
2587   auto *NFI = dyn_cast<FenceInst>(FI.getNextNonDebugInstruction());
2588   // This check is solely here to handle arbitrary target-dependent syncscopes.
2589   // TODO: Can remove if does not matter in practice.
2590   if (NFI && FI.isIdenticalTo(NFI))
2591     return eraseInstFromFunction(FI);
2592 
2593   // Returns true if FI1 is identical or stronger fence than FI2.
2594   auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
2595     auto FI1SyncScope = FI1->getSyncScopeID();
2596     // Consider same scope, where scope is global or single-thread.
2597     if (FI1SyncScope != FI2->getSyncScopeID() ||
2598         (FI1SyncScope != SyncScope::System &&
2599          FI1SyncScope != SyncScope::SingleThread))
2600       return false;
2601 
2602     return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
2603   };
2604   if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
2605     return eraseInstFromFunction(FI);
2606 
2607   if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNonDebugInstruction()))
2608     if (isIdenticalOrStrongerFence(PFI, &FI))
2609       return eraseInstFromFunction(FI);
2610   return nullptr;
2611 }
2612 
2613 // InvokeInst simplification
2614 Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {
2615   return visitCallBase(II);
2616 }
2617 
2618 // CallBrInst simplification
2619 Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
2620   return visitCallBase(CBI);
2621 }
2622 
2623 /// If this cast does not affect the value passed through the varargs area, we
2624 /// can eliminate the use of the cast.
2625 static bool isSafeToEliminateVarargsCast(const CallBase &Call,
2626                                          const DataLayout &DL,
2627                                          const CastInst *const CI,
2628                                          const int ix) {
2629   if (!CI->isLosslessCast())
2630     return false;
2631 
2632   // If this is a GC intrinsic, avoid munging types.  We need types for
2633   // statepoint reconstruction in SelectionDAG.
2634   // TODO: This is probably something which should be expanded to all
2635   // intrinsics since the entire point of intrinsics is that
2636   // they are understandable by the optimizer.
2637   if (isa<GCStatepointInst>(Call) || isa<GCRelocateInst>(Call) ||
2638       isa<GCResultInst>(Call))
2639     return false;
2640 
2641   // Opaque pointers are compatible with any byval types.
2642   PointerType *SrcTy = cast<PointerType>(CI->getOperand(0)->getType());
2643   if (SrcTy->isOpaque())
2644     return true;
2645 
2646   // The size of ByVal or InAlloca arguments is derived from the type, so we
2647   // can't change to a type with a different size.  If the size were
2648   // passed explicitly we could avoid this check.
2649   if (!Call.isPassPointeeByValueArgument(ix))
2650     return true;
2651 
2652   // The transform currently only handles type replacement for byval, not other
2653   // type-carrying attributes.
2654   if (!Call.isByValArgument(ix))
2655     return false;
2656 
2657   Type *SrcElemTy = SrcTy->getNonOpaquePointerElementType();
2658   Type *DstElemTy = Call.getParamByValType(ix);
2659   if (!SrcElemTy->isSized() || !DstElemTy->isSized())
2660     return false;
2661   if (DL.getTypeAllocSize(SrcElemTy) != DL.getTypeAllocSize(DstElemTy))
2662     return false;
2663   return true;
2664 }
2665 
2666 Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
2667   if (!CI->getCalledFunction()) return nullptr;
2668 
2669   // Skip optimizing notail and musttail calls so
2670   // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
2671   // LibCallSimplifier::optimizeCall should try to preseve tail calls though.
2672   if (CI->isMustTailCall() || CI->isNoTailCall())
2673     return nullptr;
2674 
2675   auto InstCombineRAUW = [this](Instruction *From, Value *With) {
2676     replaceInstUsesWith(*From, With);
2677   };
2678   auto InstCombineErase = [this](Instruction *I) {
2679     eraseInstFromFunction(*I);
2680   };
2681   LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
2682                                InstCombineErase);
2683   if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
2684     ++NumSimplified;
2685     return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
2686   }
2687 
2688   return nullptr;
2689 }
2690 
2691 static IntrinsicInst *findInitTrampolineFromAlloca(Value *TrampMem) {
2692   // Strip off at most one level of pointer casts, looking for an alloca.  This
2693   // is good enough in practice and simpler than handling any number of casts.
2694   Value *Underlying = TrampMem->stripPointerCasts();
2695   if (Underlying != TrampMem &&
2696       (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
2697     return nullptr;
2698   if (!isa<AllocaInst>(Underlying))
2699     return nullptr;
2700 
2701   IntrinsicInst *InitTrampoline = nullptr;
2702   for (User *U : TrampMem->users()) {
2703     IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
2704     if (!II)
2705       return nullptr;
2706     if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
2707       if (InitTrampoline)
2708         // More than one init_trampoline writes to this value.  Give up.
2709         return nullptr;
2710       InitTrampoline = II;
2711       continue;
2712     }
2713     if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
2714       // Allow any number of calls to adjust.trampoline.
2715       continue;
2716     return nullptr;
2717   }
2718 
2719   // No call to init.trampoline found.
2720   if (!InitTrampoline)
2721     return nullptr;
2722 
2723   // Check that the alloca is being used in the expected way.
2724   if (InitTrampoline->getOperand(0) != TrampMem)
2725     return nullptr;
2726 
2727   return InitTrampoline;
2728 }
2729 
2730 static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
2731                                                Value *TrampMem) {
2732   // Visit all the previous instructions in the basic block, and try to find a
2733   // init.trampoline which has a direct path to the adjust.trampoline.
2734   for (BasicBlock::iterator I = AdjustTramp->getIterator(),
2735                             E = AdjustTramp->getParent()->begin();
2736        I != E;) {
2737     Instruction *Inst = &*--I;
2738     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
2739       if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
2740           II->getOperand(0) == TrampMem)
2741         return II;
2742     if (Inst->mayWriteToMemory())
2743       return nullptr;
2744   }
2745   return nullptr;
2746 }
2747 
2748 // Given a call to llvm.adjust.trampoline, find and return the corresponding
2749 // call to llvm.init.trampoline if the call to the trampoline can be optimized
2750 // to a direct call to a function.  Otherwise return NULL.
2751 static IntrinsicInst *findInitTrampoline(Value *Callee) {
2752   Callee = Callee->stripPointerCasts();
2753   IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
2754   if (!AdjustTramp ||
2755       AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
2756     return nullptr;
2757 
2758   Value *TrampMem = AdjustTramp->getOperand(0);
2759 
2760   if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
2761     return IT;
2762   if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
2763     return IT;
2764   return nullptr;
2765 }
2766 
2767 void InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) {
2768   // Note: We only handle cases which can't be driven from generic attributes
2769   // here.  So, for example, nonnull and noalias (which are common properties
2770   // of some allocation functions) are expected to be handled via annotation
2771   // of the respective allocator declaration with generic attributes.
2772 
2773   uint64_t Size;
2774   ObjectSizeOpts Opts;
2775   if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) {
2776     // TODO: We really should just emit deref_or_null here and then
2777     // let the generic inference code combine that with nonnull.
2778     if (Call.hasRetAttr(Attribute::NonNull))
2779       Call.addRetAttr(Attribute::getWithDereferenceableBytes(
2780           Call.getContext(), Size));
2781     else
2782       Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
2783           Call.getContext(), Size));
2784   }
2785 
2786   // Add alignment attribute if alignment is a power of two constant.
2787   Value *Alignment = getAllocAlignment(&Call, TLI);
2788   if (!Alignment)
2789     return;
2790 
2791   ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
2792   if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
2793     uint64_t AlignmentVal = AlignOpC->getZExtValue();
2794     if (llvm::isPowerOf2_64(AlignmentVal)) {
2795       Call.removeRetAttr(Attribute::Alignment);
2796       Call.addRetAttr(Attribute::getWithAlignment(Call.getContext(),
2797                                                   Align(AlignmentVal)));
2798     }
2799   }
2800 }
2801 
2802 /// Improvements for call, callbr and invoke instructions.
2803 Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
2804   if (isAllocationFn(&Call, &TLI))
2805     annotateAnyAllocSite(Call, &TLI);
2806 
2807   bool Changed = false;
2808 
2809   // Mark any parameters that are known to be non-null with the nonnull
2810   // attribute.  This is helpful for inlining calls to functions with null
2811   // checks on their arguments.
2812   SmallVector<unsigned, 4> ArgNos;
2813   unsigned ArgNo = 0;
2814 
2815   for (Value *V : Call.args()) {
2816     if (V->getType()->isPointerTy() &&
2817         !Call.paramHasAttr(ArgNo, Attribute::NonNull) &&
2818         isKnownNonZero(V, DL, 0, &AC, &Call, &DT))
2819       ArgNos.push_back(ArgNo);
2820     ArgNo++;
2821   }
2822 
2823   assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
2824 
2825   if (!ArgNos.empty()) {
2826     AttributeList AS = Call.getAttributes();
2827     LLVMContext &Ctx = Call.getContext();
2828     AS = AS.addParamAttribute(Ctx, ArgNos,
2829                               Attribute::get(Ctx, Attribute::NonNull));
2830     Call.setAttributes(AS);
2831     Changed = true;
2832   }
2833 
2834   // If the callee is a pointer to a function, attempt to move any casts to the
2835   // arguments of the call/callbr/invoke.
2836   Value *Callee = Call.getCalledOperand();
2837   Function *CalleeF = dyn_cast<Function>(Callee);
2838   if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
2839       transformConstExprCastCall(Call))
2840     return nullptr;
2841 
2842   if (CalleeF) {
2843     // Remove the convergent attr on calls when the callee is not convergent.
2844     if (Call.isConvergent() && !CalleeF->isConvergent() &&
2845         !CalleeF->isIntrinsic()) {
2846       LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
2847                         << "\n");
2848       Call.setNotConvergent();
2849       return &Call;
2850     }
2851 
2852     // If the call and callee calling conventions don't match, and neither one
2853     // of the calling conventions is compatible with C calling convention
2854     // this call must be unreachable, as the call is undefined.
2855     if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
2856          !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
2857            TargetLibraryInfoImpl::isCallingConvCCompatible(&Call)) &&
2858          !(Call.getCallingConv() == llvm::CallingConv::C &&
2859            TargetLibraryInfoImpl::isCallingConvCCompatible(CalleeF))) &&
2860         // Only do this for calls to a function with a body.  A prototype may
2861         // not actually end up matching the implementation's calling conv for a
2862         // variety of reasons (e.g. it may be written in assembly).
2863         !CalleeF->isDeclaration()) {
2864       Instruction *OldCall = &Call;
2865       CreateNonTerminatorUnreachable(OldCall);
2866       // If OldCall does not return void then replaceInstUsesWith poison.
2867       // This allows ValueHandlers and custom metadata to adjust itself.
2868       if (!OldCall->getType()->isVoidTy())
2869         replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
2870       if (isa<CallInst>(OldCall))
2871         return eraseInstFromFunction(*OldCall);
2872 
2873       // We cannot remove an invoke or a callbr, because it would change thexi
2874       // CFG, just change the callee to a null pointer.
2875       cast<CallBase>(OldCall)->setCalledFunction(
2876           CalleeF->getFunctionType(),
2877           Constant::getNullValue(CalleeF->getType()));
2878       return nullptr;
2879     }
2880   }
2881 
2882   // Calling a null function pointer is undefined if a null address isn't
2883   // dereferenceable.
2884   if ((isa<ConstantPointerNull>(Callee) &&
2885        !NullPointerIsDefined(Call.getFunction())) ||
2886       isa<UndefValue>(Callee)) {
2887     // If Call does not return void then replaceInstUsesWith poison.
2888     // This allows ValueHandlers and custom metadata to adjust itself.
2889     if (!Call.getType()->isVoidTy())
2890       replaceInstUsesWith(Call, PoisonValue::get(Call.getType()));
2891 
2892     if (Call.isTerminator()) {
2893       // Can't remove an invoke or callbr because we cannot change the CFG.
2894       return nullptr;
2895     }
2896 
2897     // This instruction is not reachable, just remove it.
2898     CreateNonTerminatorUnreachable(&Call);
2899     return eraseInstFromFunction(Call);
2900   }
2901 
2902   if (IntrinsicInst *II = findInitTrampoline(Callee))
2903     return transformCallThroughTrampoline(Call, *II);
2904 
2905   // TODO: Drop this transform once opaque pointer transition is done.
2906   FunctionType *FTy = Call.getFunctionType();
2907   if (FTy->isVarArg()) {
2908     int ix = FTy->getNumParams();
2909     // See if we can optimize any arguments passed through the varargs area of
2910     // the call.
2911     for (auto I = Call.arg_begin() + FTy->getNumParams(), E = Call.arg_end();
2912          I != E; ++I, ++ix) {
2913       CastInst *CI = dyn_cast<CastInst>(*I);
2914       if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) {
2915         replaceUse(*I, CI->getOperand(0));
2916 
2917         // Update the byval type to match the pointer type.
2918         // Not necessary for opaque pointers.
2919         PointerType *NewTy = cast<PointerType>(CI->getOperand(0)->getType());
2920         if (!NewTy->isOpaque() && Call.isByValArgument(ix)) {
2921           Call.removeParamAttr(ix, Attribute::ByVal);
2922           Call.addParamAttr(ix, Attribute::getWithByValType(
2923                                     Call.getContext(),
2924                                     NewTy->getNonOpaquePointerElementType()));
2925         }
2926         Changed = true;
2927       }
2928     }
2929   }
2930 
2931   if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
2932     InlineAsm *IA = cast<InlineAsm>(Callee);
2933     if (!IA->canThrow()) {
2934       // Normal inline asm calls cannot throw - mark them
2935       // 'nounwind'.
2936       Call.setDoesNotThrow();
2937       Changed = true;
2938     }
2939   }
2940 
2941   // Try to optimize the call if possible, we require DataLayout for most of
2942   // this.  None of these calls are seen as possibly dead so go ahead and
2943   // delete the instruction now.
2944   if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
2945     Instruction *I = tryOptimizeCall(CI);
2946     // If we changed something return the result, etc. Otherwise let
2947     // the fallthrough check.
2948     if (I) return eraseInstFromFunction(*I);
2949   }
2950 
2951   if (!Call.use_empty() && !Call.isMustTailCall())
2952     if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
2953       Type *CallTy = Call.getType();
2954       Type *RetArgTy = ReturnedArg->getType();
2955       if (RetArgTy->canLosslesslyBitCastTo(CallTy))
2956         return replaceInstUsesWith(
2957             Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
2958     }
2959 
2960   if (isAllocationFn(&Call, &TLI) &&
2961       isAllocRemovable(&cast<CallBase>(Call), &TLI))
2962     return visitAllocSite(Call);
2963 
2964   // Handle intrinsics which can be used in both call and invoke context.
2965   switch (Call.getIntrinsicID()) {
2966   case Intrinsic::experimental_gc_statepoint: {
2967     GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
2968     SmallPtrSet<Value *, 32> LiveGcValues;
2969     for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
2970       GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
2971 
2972       // Remove the relocation if unused.
2973       if (GCR.use_empty()) {
2974         eraseInstFromFunction(GCR);
2975         continue;
2976       }
2977 
2978       Value *DerivedPtr = GCR.getDerivedPtr();
2979       Value *BasePtr = GCR.getBasePtr();
2980 
2981       // Undef is undef, even after relocation.
2982       if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
2983         replaceInstUsesWith(GCR, UndefValue::get(GCR.getType()));
2984         eraseInstFromFunction(GCR);
2985         continue;
2986       }
2987 
2988       if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
2989         // The relocation of null will be null for most any collector.
2990         // TODO: provide a hook for this in GCStrategy.  There might be some
2991         // weird collector this property does not hold for.
2992         if (isa<ConstantPointerNull>(DerivedPtr)) {
2993           // Use null-pointer of gc_relocate's type to replace it.
2994           replaceInstUsesWith(GCR, ConstantPointerNull::get(PT));
2995           eraseInstFromFunction(GCR);
2996           continue;
2997         }
2998 
2999         // isKnownNonNull -> nonnull attribute
3000         if (!GCR.hasRetAttr(Attribute::NonNull) &&
3001             isKnownNonZero(DerivedPtr, DL, 0, &AC, &Call, &DT)) {
3002           GCR.addRetAttr(Attribute::NonNull);
3003           // We discovered new fact, re-check users.
3004           Worklist.pushUsersToWorkList(GCR);
3005         }
3006       }
3007 
3008       // If we have two copies of the same pointer in the statepoint argument
3009       // list, canonicalize to one.  This may let us common gc.relocates.
3010       if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
3011           GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
3012         auto *OpIntTy = GCR.getOperand(2)->getType();
3013         GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
3014       }
3015 
3016       // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3017       // Canonicalize on the type from the uses to the defs
3018 
3019       // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3020       LiveGcValues.insert(BasePtr);
3021       LiveGcValues.insert(DerivedPtr);
3022     }
3023     Optional<OperandBundleUse> Bundle =
3024         GCSP.getOperandBundle(LLVMContext::OB_gc_live);
3025     unsigned NumOfGCLives = LiveGcValues.size();
3026     if (!Bundle.hasValue() || NumOfGCLives == Bundle->Inputs.size())
3027       break;
3028     // We can reduce the size of gc live bundle.
3029     DenseMap<Value *, unsigned> Val2Idx;
3030     std::vector<Value *> NewLiveGc;
3031     for (unsigned I = 0, E = Bundle->Inputs.size(); I < E; ++I) {
3032       Value *V = Bundle->Inputs[I];
3033       if (Val2Idx.count(V))
3034         continue;
3035       if (LiveGcValues.count(V)) {
3036         Val2Idx[V] = NewLiveGc.size();
3037         NewLiveGc.push_back(V);
3038       } else
3039         Val2Idx[V] = NumOfGCLives;
3040     }
3041     // Update all gc.relocates
3042     for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
3043       GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
3044       Value *BasePtr = GCR.getBasePtr();
3045       assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
3046              "Missed live gc for base pointer");
3047       auto *OpIntTy1 = GCR.getOperand(1)->getType();
3048       GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
3049       Value *DerivedPtr = GCR.getDerivedPtr();
3050       assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
3051              "Missed live gc for derived pointer");
3052       auto *OpIntTy2 = GCR.getOperand(2)->getType();
3053       GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
3054     }
3055     // Create new statepoint instruction.
3056     OperandBundleDef NewBundle("gc-live", NewLiveGc);
3057     return CallBase::Create(&Call, NewBundle);
3058   }
3059   default: { break; }
3060   }
3061 
3062   return Changed ? &Call : nullptr;
3063 }
3064 
3065 /// If the callee is a constexpr cast of a function, attempt to move the cast to
3066 /// the arguments of the call/callbr/invoke.
3067 bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
3068   auto *Callee =
3069       dyn_cast<Function>(Call.getCalledOperand()->stripPointerCasts());
3070   if (!Callee)
3071     return false;
3072 
3073   // If this is a call to a thunk function, don't remove the cast. Thunks are
3074   // used to transparently forward all incoming parameters and outgoing return
3075   // values, so it's important to leave the cast in place.
3076   if (Callee->hasFnAttribute("thunk"))
3077     return false;
3078 
3079   // If this is a musttail call, the callee's prototype must match the caller's
3080   // prototype with the exception of pointee types. The code below doesn't
3081   // implement that, so we can't do this transform.
3082   // TODO: Do the transform if it only requires adding pointer casts.
3083   if (Call.isMustTailCall())
3084     return false;
3085 
3086   Instruction *Caller = &Call;
3087   const AttributeList &CallerPAL = Call.getAttributes();
3088 
3089   // Okay, this is a cast from a function to a different type.  Unless doing so
3090   // would cause a type conversion of one of our arguments, change this call to
3091   // be a direct call with arguments casted to the appropriate types.
3092   FunctionType *FT = Callee->getFunctionType();
3093   Type *OldRetTy = Caller->getType();
3094   Type *NewRetTy = FT->getReturnType();
3095 
3096   // Check to see if we are changing the return type...
3097   if (OldRetTy != NewRetTy) {
3098 
3099     if (NewRetTy->isStructTy())
3100       return false; // TODO: Handle multiple return values.
3101 
3102     if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
3103       if (Callee->isDeclaration())
3104         return false;   // Cannot transform this return value.
3105 
3106       if (!Caller->use_empty() &&
3107           // void -> non-void is handled specially
3108           !NewRetTy->isVoidTy())
3109         return false;   // Cannot transform this return value.
3110     }
3111 
3112     if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
3113       AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
3114       if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
3115         return false;   // Attribute not compatible with transformed value.
3116     }
3117 
3118     // If the callbase is an invoke/callbr instruction, and the return value is
3119     // used by a PHI node in a successor, we cannot change the return type of
3120     // the call because there is no place to put the cast instruction (without
3121     // breaking the critical edge).  Bail out in this case.
3122     if (!Caller->use_empty()) {
3123       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
3124         for (User *U : II->users())
3125           if (PHINode *PN = dyn_cast<PHINode>(U))
3126             if (PN->getParent() == II->getNormalDest() ||
3127                 PN->getParent() == II->getUnwindDest())
3128               return false;
3129       // FIXME: Be conservative for callbr to avoid a quadratic search.
3130       if (isa<CallBrInst>(Caller))
3131         return false;
3132     }
3133   }
3134 
3135   unsigned NumActualArgs = Call.arg_size();
3136   unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
3137 
3138   // Prevent us turning:
3139   // declare void @takes_i32_inalloca(i32* inalloca)
3140   //  call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
3141   //
3142   // into:
3143   //  call void @takes_i32_inalloca(i32* null)
3144   //
3145   //  Similarly, avoid folding away bitcasts of byval calls.
3146   if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
3147       Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
3148     return false;
3149 
3150   auto AI = Call.arg_begin();
3151   for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
3152     Type *ParamTy = FT->getParamType(i);
3153     Type *ActTy = (*AI)->getType();
3154 
3155     if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
3156       return false;   // Cannot transform this parameter value.
3157 
3158     if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
3159             .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
3160       return false;   // Attribute not compatible with transformed value.
3161 
3162     if (Call.isInAllocaArgument(i))
3163       return false;   // Cannot transform to and from inalloca.
3164 
3165     if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
3166       return false;
3167 
3168     // If the parameter is passed as a byval argument, then we have to have a
3169     // sized type and the sized type has to have the same size as the old type.
3170     if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
3171       PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
3172       if (!ParamPTy)
3173         return false;
3174 
3175       if (!ParamPTy->isOpaque()) {
3176         Type *ParamElTy = ParamPTy->getNonOpaquePointerElementType();
3177         if (!ParamElTy->isSized())
3178           return false;
3179 
3180         Type *CurElTy = Call.getParamByValType(i);
3181         if (DL.getTypeAllocSize(CurElTy) != DL.getTypeAllocSize(ParamElTy))
3182           return false;
3183       }
3184     }
3185   }
3186 
3187   if (Callee->isDeclaration()) {
3188     // Do not delete arguments unless we have a function body.
3189     if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
3190       return false;
3191 
3192     // If the callee is just a declaration, don't change the varargsness of the
3193     // call.  We don't want to introduce a varargs call where one doesn't
3194     // already exist.
3195     if (FT->isVarArg() != Call.getFunctionType()->isVarArg())
3196       return false;
3197 
3198     // If both the callee and the cast type are varargs, we still have to make
3199     // sure the number of fixed parameters are the same or we have the same
3200     // ABI issues as if we introduce a varargs call.
3201     if (FT->isVarArg() && Call.getFunctionType()->isVarArg() &&
3202         FT->getNumParams() != Call.getFunctionType()->getNumParams())
3203       return false;
3204   }
3205 
3206   if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
3207       !CallerPAL.isEmpty()) {
3208     // In this case we have more arguments than the new function type, but we
3209     // won't be dropping them.  Check that these extra arguments have attributes
3210     // that are compatible with being a vararg call argument.
3211     unsigned SRetIdx;
3212     if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
3213         SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
3214       return false;
3215   }
3216 
3217   // Okay, we decided that this is a safe thing to do: go ahead and start
3218   // inserting cast instructions as necessary.
3219   SmallVector<Value *, 8> Args;
3220   SmallVector<AttributeSet, 8> ArgAttrs;
3221   Args.reserve(NumActualArgs);
3222   ArgAttrs.reserve(NumActualArgs);
3223 
3224   // Get any return attributes.
3225   AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
3226 
3227   // If the return value is not being used, the type may not be compatible
3228   // with the existing attributes.  Wipe out any problematic attributes.
3229   RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
3230 
3231   LLVMContext &Ctx = Call.getContext();
3232   AI = Call.arg_begin();
3233   for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
3234     Type *ParamTy = FT->getParamType(i);
3235 
3236     Value *NewArg = *AI;
3237     if ((*AI)->getType() != ParamTy)
3238       NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
3239     Args.push_back(NewArg);
3240 
3241     // Add any parameter attributes.
3242     if (CallerPAL.hasParamAttr(i, Attribute::ByVal) &&
3243         !ParamTy->isOpaquePointerTy()) {
3244       AttrBuilder AB(FT->getContext(), CallerPAL.getParamAttrs(i));
3245       AB.addByValAttr(ParamTy->getNonOpaquePointerElementType());
3246       ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
3247     } else
3248       ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
3249   }
3250 
3251   // If the function takes more arguments than the call was taking, add them
3252   // now.
3253   for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
3254     Args.push_back(Constant::getNullValue(FT->getParamType(i)));
3255     ArgAttrs.push_back(AttributeSet());
3256   }
3257 
3258   // If we are removing arguments to the function, emit an obnoxious warning.
3259   if (FT->getNumParams() < NumActualArgs) {
3260     // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
3261     if (FT->isVarArg()) {
3262       // Add all of the arguments in their promoted form to the arg list.
3263       for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
3264         Type *PTy = getPromotedType((*AI)->getType());
3265         Value *NewArg = *AI;
3266         if (PTy != (*AI)->getType()) {
3267           // Must promote to pass through va_arg area!
3268           Instruction::CastOps opcode =
3269             CastInst::getCastOpcode(*AI, false, PTy, false);
3270           NewArg = Builder.CreateCast(opcode, *AI, PTy);
3271         }
3272         Args.push_back(NewArg);
3273 
3274         // Add any parameter attributes.
3275         ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
3276       }
3277     }
3278   }
3279 
3280   AttributeSet FnAttrs = CallerPAL.getFnAttrs();
3281 
3282   if (NewRetTy->isVoidTy())
3283     Caller->setName("");   // Void type should not have a name.
3284 
3285   assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
3286          "missing argument attributes");
3287   AttributeList NewCallerPAL = AttributeList::get(
3288       Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
3289 
3290   SmallVector<OperandBundleDef, 1> OpBundles;
3291   Call.getOperandBundlesAsDefs(OpBundles);
3292 
3293   CallBase *NewCall;
3294   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
3295     NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
3296                                    II->getUnwindDest(), Args, OpBundles);
3297   } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
3298     NewCall = Builder.CreateCallBr(Callee, CBI->getDefaultDest(),
3299                                    CBI->getIndirectDests(), Args, OpBundles);
3300   } else {
3301     NewCall = Builder.CreateCall(Callee, Args, OpBundles);
3302     cast<CallInst>(NewCall)->setTailCallKind(
3303         cast<CallInst>(Caller)->getTailCallKind());
3304   }
3305   NewCall->takeName(Caller);
3306   NewCall->setCallingConv(Call.getCallingConv());
3307   NewCall->setAttributes(NewCallerPAL);
3308 
3309   // Preserve prof metadata if any.
3310   NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
3311 
3312   // Insert a cast of the return type as necessary.
3313   Instruction *NC = NewCall;
3314   Value *NV = NC;
3315   if (OldRetTy != NV->getType() && !Caller->use_empty()) {
3316     if (!NV->getType()->isVoidTy()) {
3317       NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
3318       NC->setDebugLoc(Caller->getDebugLoc());
3319 
3320       // If this is an invoke/callbr instruction, we should insert it after the
3321       // first non-phi instruction in the normal successor block.
3322       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
3323         BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
3324         InsertNewInstBefore(NC, *I);
3325       } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
3326         BasicBlock::iterator I = CBI->getDefaultDest()->getFirstInsertionPt();
3327         InsertNewInstBefore(NC, *I);
3328       } else {
3329         // Otherwise, it's a call, just insert cast right after the call.
3330         InsertNewInstBefore(NC, *Caller);
3331       }
3332       Worklist.pushUsersToWorkList(*Caller);
3333     } else {
3334       NV = UndefValue::get(Caller->getType());
3335     }
3336   }
3337 
3338   if (!Caller->use_empty())
3339     replaceInstUsesWith(*Caller, NV);
3340   else if (Caller->hasValueHandle()) {
3341     if (OldRetTy == NV->getType())
3342       ValueHandleBase::ValueIsRAUWd(Caller, NV);
3343     else
3344       // We cannot call ValueIsRAUWd with a different type, and the
3345       // actual tracked value will disappear.
3346       ValueHandleBase::ValueIsDeleted(Caller);
3347   }
3348 
3349   eraseInstFromFunction(*Caller);
3350   return true;
3351 }
3352 
3353 /// Turn a call to a function created by init_trampoline / adjust_trampoline
3354 /// intrinsic pair into a direct call to the underlying function.
3355 Instruction *
3356 InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
3357                                                  IntrinsicInst &Tramp) {
3358   Value *Callee = Call.getCalledOperand();
3359   Type *CalleeTy = Callee->getType();
3360   FunctionType *FTy = Call.getFunctionType();
3361   AttributeList Attrs = Call.getAttributes();
3362 
3363   // If the call already has the 'nest' attribute somewhere then give up -
3364   // otherwise 'nest' would occur twice after splicing in the chain.
3365   if (Attrs.hasAttrSomewhere(Attribute::Nest))
3366     return nullptr;
3367 
3368   Function *NestF = cast<Function>(Tramp.getArgOperand(1)->stripPointerCasts());
3369   FunctionType *NestFTy = NestF->getFunctionType();
3370 
3371   AttributeList NestAttrs = NestF->getAttributes();
3372   if (!NestAttrs.isEmpty()) {
3373     unsigned NestArgNo = 0;
3374     Type *NestTy = nullptr;
3375     AttributeSet NestAttr;
3376 
3377     // Look for a parameter marked with the 'nest' attribute.
3378     for (FunctionType::param_iterator I = NestFTy->param_begin(),
3379                                       E = NestFTy->param_end();
3380          I != E; ++NestArgNo, ++I) {
3381       AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
3382       if (AS.hasAttribute(Attribute::Nest)) {
3383         // Record the parameter type and any other attributes.
3384         NestTy = *I;
3385         NestAttr = AS;
3386         break;
3387       }
3388     }
3389 
3390     if (NestTy) {
3391       std::vector<Value*> NewArgs;
3392       std::vector<AttributeSet> NewArgAttrs;
3393       NewArgs.reserve(Call.arg_size() + 1);
3394       NewArgAttrs.reserve(Call.arg_size());
3395 
3396       // Insert the nest argument into the call argument list, which may
3397       // mean appending it.  Likewise for attributes.
3398 
3399       {
3400         unsigned ArgNo = 0;
3401         auto I = Call.arg_begin(), E = Call.arg_end();
3402         do {
3403           if (ArgNo == NestArgNo) {
3404             // Add the chain argument and attributes.
3405             Value *NestVal = Tramp.getArgOperand(2);
3406             if (NestVal->getType() != NestTy)
3407               NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
3408             NewArgs.push_back(NestVal);
3409             NewArgAttrs.push_back(NestAttr);
3410           }
3411 
3412           if (I == E)
3413             break;
3414 
3415           // Add the original argument and attributes.
3416           NewArgs.push_back(*I);
3417           NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
3418 
3419           ++ArgNo;
3420           ++I;
3421         } while (true);
3422       }
3423 
3424       // The trampoline may have been bitcast to a bogus type (FTy).
3425       // Handle this by synthesizing a new function type, equal to FTy
3426       // with the chain parameter inserted.
3427 
3428       std::vector<Type*> NewTypes;
3429       NewTypes.reserve(FTy->getNumParams()+1);
3430 
3431       // Insert the chain's type into the list of parameter types, which may
3432       // mean appending it.
3433       {
3434         unsigned ArgNo = 0;
3435         FunctionType::param_iterator I = FTy->param_begin(),
3436           E = FTy->param_end();
3437 
3438         do {
3439           if (ArgNo == NestArgNo)
3440             // Add the chain's type.
3441             NewTypes.push_back(NestTy);
3442 
3443           if (I == E)
3444             break;
3445 
3446           // Add the original type.
3447           NewTypes.push_back(*I);
3448 
3449           ++ArgNo;
3450           ++I;
3451         } while (true);
3452       }
3453 
3454       // Replace the trampoline call with a direct call.  Let the generic
3455       // code sort out any function type mismatches.
3456       FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
3457                                                 FTy->isVarArg());
3458       Constant *NewCallee =
3459         NestF->getType() == PointerType::getUnqual(NewFTy) ?
3460         NestF : ConstantExpr::getBitCast(NestF,
3461                                          PointerType::getUnqual(NewFTy));
3462       AttributeList NewPAL =
3463           AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
3464                              Attrs.getRetAttrs(), NewArgAttrs);
3465 
3466       SmallVector<OperandBundleDef, 1> OpBundles;
3467       Call.getOperandBundlesAsDefs(OpBundles);
3468 
3469       Instruction *NewCaller;
3470       if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
3471         NewCaller = InvokeInst::Create(NewFTy, NewCallee,
3472                                        II->getNormalDest(), II->getUnwindDest(),
3473                                        NewArgs, OpBundles);
3474         cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
3475         cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
3476       } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
3477         NewCaller =
3478             CallBrInst::Create(NewFTy, NewCallee, CBI->getDefaultDest(),
3479                                CBI->getIndirectDests(), NewArgs, OpBundles);
3480         cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
3481         cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
3482       } else {
3483         NewCaller = CallInst::Create(NewFTy, NewCallee, NewArgs, OpBundles);
3484         cast<CallInst>(NewCaller)->setTailCallKind(
3485             cast<CallInst>(Call).getTailCallKind());
3486         cast<CallInst>(NewCaller)->setCallingConv(
3487             cast<CallInst>(Call).getCallingConv());
3488         cast<CallInst>(NewCaller)->setAttributes(NewPAL);
3489       }
3490       NewCaller->setDebugLoc(Call.getDebugLoc());
3491 
3492       return NewCaller;
3493     }
3494   }
3495 
3496   // Replace the trampoline call with a direct call.  Since there is no 'nest'
3497   // parameter, there is no need to adjust the argument list.  Let the generic
3498   // code sort out any function type mismatches.
3499   Constant *NewCallee = ConstantExpr::getBitCast(NestF, CalleeTy);
3500   Call.setCalledFunction(FTy, NewCallee);
3501   return &Call;
3502 }
3503