1 //===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the library calls simplifier. It does not implement
10 // any pass, but can't be used by other passes to do simplifications.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/Analysis/ConstantFolding.h"
20 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
21 #include "llvm/Analysis/TargetLibraryInfo.h"
22 #include "llvm/Transforms/Utils/Local.h"
23 #include "llvm/Analysis/ValueTracking.h"
24 #include "llvm/Analysis/CaptureTracking.h"
25 #include "llvm/Analysis/Loads.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/PatternMatch.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Transforms/Utils/BuildLibCalls.h"
37 
38 using namespace llvm;
39 using namespace PatternMatch;
40 
41 static cl::opt<bool>
42     EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
43                          cl::init(false),
44                          cl::desc("Enable unsafe double to float "
45                                   "shrinking for math lib calls"));
46 
47 
48 //===----------------------------------------------------------------------===//
49 // Helper Functions
50 //===----------------------------------------------------------------------===//
51 
52 static bool ignoreCallingConv(LibFunc Func) {
53   return Func == LibFunc_abs || Func == LibFunc_labs ||
54          Func == LibFunc_llabs || Func == LibFunc_strlen;
55 }
56 
57 static bool isCallingConvCCompatible(CallInst *CI) {
58   switch(CI->getCallingConv()) {
59   default:
60     return false;
61   case llvm::CallingConv::C:
62     return true;
63   case llvm::CallingConv::ARM_APCS:
64   case llvm::CallingConv::ARM_AAPCS:
65   case llvm::CallingConv::ARM_AAPCS_VFP: {
66 
67     // The iOS ABI diverges from the standard in some cases, so for now don't
68     // try to simplify those calls.
69     if (Triple(CI->getModule()->getTargetTriple()).isiOS())
70       return false;
71 
72     auto *FuncTy = CI->getFunctionType();
73 
74     if (!FuncTy->getReturnType()->isPointerTy() &&
75         !FuncTy->getReturnType()->isIntegerTy() &&
76         !FuncTy->getReturnType()->isVoidTy())
77       return false;
78 
79     for (auto Param : FuncTy->params()) {
80       if (!Param->isPointerTy() && !Param->isIntegerTy())
81         return false;
82     }
83     return true;
84   }
85   }
86   return false;
87 }
88 
89 /// Return true if it is only used in equality comparisons with With.
90 static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
91   for (User *U : V->users()) {
92     if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
93       if (IC->isEquality() && IC->getOperand(1) == With)
94         continue;
95     // Unknown instruction.
96     return false;
97   }
98   return true;
99 }
100 
101 static bool callHasFloatingPointArgument(const CallInst *CI) {
102   return any_of(CI->operands(), [](const Use &OI) {
103     return OI->getType()->isFloatingPointTy();
104   });
105 }
106 
107 static bool callHasFP128Argument(const CallInst *CI) {
108   return any_of(CI->operands(), [](const Use &OI) {
109     return OI->getType()->isFP128Ty();
110   });
111 }
112 
113 static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) {
114   if (Base < 2 || Base > 36)
115     // handle special zero base
116     if (Base != 0)
117       return nullptr;
118 
119   char *End;
120   std::string nptr = Str.str();
121   errno = 0;
122   long long int Result = strtoll(nptr.c_str(), &End, Base);
123   if (errno)
124     return nullptr;
125 
126   // if we assume all possible target locales are ASCII supersets,
127   // then if strtoll successfully parses a number on the host,
128   // it will also successfully parse the same way on the target
129   if (*End != '\0')
130     return nullptr;
131 
132   if (!isIntN(CI->getType()->getPrimitiveSizeInBits(), Result))
133     return nullptr;
134 
135   return ConstantInt::get(CI->getType(), Result);
136 }
137 
138 static bool isLocallyOpenedFile(Value *File, CallInst *CI, IRBuilder<> &B,
139                                 const TargetLibraryInfo *TLI) {
140   CallInst *FOpen = dyn_cast<CallInst>(File);
141   if (!FOpen)
142     return false;
143 
144   Function *InnerCallee = FOpen->getCalledFunction();
145   if (!InnerCallee)
146     return false;
147 
148   LibFunc Func;
149   if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
150       Func != LibFunc_fopen)
151     return false;
152 
153   inferLibFuncAttributes(*CI->getCalledFunction(), *TLI);
154   if (PointerMayBeCaptured(File, true, true))
155     return false;
156 
157   return true;
158 }
159 
160 static bool isOnlyUsedInComparisonWithZero(Value *V) {
161   for (User *U : V->users()) {
162     if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
163       if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
164         if (C->isNullValue())
165           continue;
166     // Unknown instruction.
167     return false;
168   }
169   return true;
170 }
171 
172 static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
173                                  const DataLayout &DL) {
174   if (!isOnlyUsedInComparisonWithZero(CI))
175     return false;
176 
177   if (!isDereferenceableAndAlignedPointer(Str, 1, APInt(64, Len), DL))
178     return false;
179 
180   if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
181     return false;
182 
183   return true;
184 }
185 
186 //===----------------------------------------------------------------------===//
187 // String and Memory Library Call Optimizations
188 //===----------------------------------------------------------------------===//
189 
190 Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) {
191   // Extract some information from the instruction
192   Value *Dst = CI->getArgOperand(0);
193   Value *Src = CI->getArgOperand(1);
194 
195   // See if we can get the length of the input string.
196   uint64_t Len = GetStringLength(Src);
197   if (Len == 0)
198     return nullptr;
199   --Len; // Unbias length.
200 
201   // Handle the simple, do-nothing case: strcat(x, "") -> x
202   if (Len == 0)
203     return Dst;
204 
205   return emitStrLenMemCpy(Src, Dst, Len, B);
206 }
207 
208 Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
209                                            IRBuilder<> &B) {
210   // We need to find the end of the destination string.  That's where the
211   // memory is to be moved to. We just generate a call to strlen.
212   Value *DstLen = emitStrLen(Dst, B, DL, TLI);
213   if (!DstLen)
214     return nullptr;
215 
216   // Now that we have the destination's length, we must index into the
217   // destination's pointer to get the actual memcpy destination (end of
218   // the string .. we're concatenating).
219   Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
220 
221   // We have enough information to now generate the memcpy call to do the
222   // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
223   B.CreateMemCpy(CpyDst, 1, Src, 1,
224                  ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
225   return Dst;
226 }
227 
228 Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
229   // Extract some information from the instruction.
230   Value *Dst = CI->getArgOperand(0);
231   Value *Src = CI->getArgOperand(1);
232   uint64_t Len;
233 
234   // We don't do anything if length is not constant.
235   if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
236     Len = LengthArg->getZExtValue();
237   else
238     return nullptr;
239 
240   // See if we can get the length of the input string.
241   uint64_t SrcLen = GetStringLength(Src);
242   if (SrcLen == 0)
243     return nullptr;
244   --SrcLen; // Unbias length.
245 
246   // Handle the simple, do-nothing cases:
247   // strncat(x, "", c) -> x
248   // strncat(x,  c, 0) -> x
249   if (SrcLen == 0 || Len == 0)
250     return Dst;
251 
252   // We don't optimize this case.
253   if (Len < SrcLen)
254     return nullptr;
255 
256   // strncat(x, s, c) -> strcat(x, s)
257   // s is constant so the strcat can be optimized further.
258   return emitStrLenMemCpy(Src, Dst, SrcLen, B);
259 }
260 
261 Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
262   Function *Callee = CI->getCalledFunction();
263   FunctionType *FT = Callee->getFunctionType();
264   Value *SrcStr = CI->getArgOperand(0);
265 
266   // If the second operand is non-constant, see if we can compute the length
267   // of the input string and turn this into memchr.
268   ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
269   if (!CharC) {
270     uint64_t Len = GetStringLength(SrcStr);
271     if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
272       return nullptr;
273 
274     return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
275                       ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len),
276                       B, DL, TLI);
277   }
278 
279   // Otherwise, the character is a constant, see if the first argument is
280   // a string literal.  If so, we can constant fold.
281   StringRef Str;
282   if (!getConstantStringInfo(SrcStr, Str)) {
283     if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
284       return B.CreateGEP(B.getInt8Ty(), SrcStr, emitStrLen(SrcStr, B, DL, TLI),
285                          "strchr");
286     return nullptr;
287   }
288 
289   // Compute the offset, make sure to handle the case when we're searching for
290   // zero (a weird way to spell strlen).
291   size_t I = (0xFF & CharC->getSExtValue()) == 0
292                  ? Str.size()
293                  : Str.find(CharC->getSExtValue());
294   if (I == StringRef::npos) // Didn't find the char.  strchr returns null.
295     return Constant::getNullValue(CI->getType());
296 
297   // strchr(s+n,c)  -> gep(s+n+i,c)
298   return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
299 }
300 
301 Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
302   Value *SrcStr = CI->getArgOperand(0);
303   ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
304 
305   // Cannot fold anything if we're not looking for a constant.
306   if (!CharC)
307     return nullptr;
308 
309   StringRef Str;
310   if (!getConstantStringInfo(SrcStr, Str)) {
311     // strrchr(s, 0) -> strchr(s, 0)
312     if (CharC->isZero())
313       return emitStrChr(SrcStr, '\0', B, TLI);
314     return nullptr;
315   }
316 
317   // Compute the offset.
318   size_t I = (0xFF & CharC->getSExtValue()) == 0
319                  ? Str.size()
320                  : Str.rfind(CharC->getSExtValue());
321   if (I == StringRef::npos) // Didn't find the char. Return null.
322     return Constant::getNullValue(CI->getType());
323 
324   // strrchr(s+n,c) -> gep(s+n+i,c)
325   return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr");
326 }
327 
328 Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
329   Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
330   if (Str1P == Str2P) // strcmp(x,x)  -> 0
331     return ConstantInt::get(CI->getType(), 0);
332 
333   StringRef Str1, Str2;
334   bool HasStr1 = getConstantStringInfo(Str1P, Str1);
335   bool HasStr2 = getConstantStringInfo(Str2P, Str2);
336 
337   // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
338   if (HasStr1 && HasStr2)
339     return ConstantInt::get(CI->getType(), Str1.compare(Str2));
340 
341   if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
342     return B.CreateNeg(B.CreateZExt(
343         B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
344 
345   if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
346     return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
347                         CI->getType());
348 
349   // strcmp(P, "x") -> memcmp(P, "x", 2)
350   uint64_t Len1 = GetStringLength(Str1P);
351   uint64_t Len2 = GetStringLength(Str2P);
352   if (Len1 && Len2) {
353     return emitMemCmp(Str1P, Str2P,
354                       ConstantInt::get(DL.getIntPtrType(CI->getContext()),
355                                        std::min(Len1, Len2)),
356                       B, DL, TLI);
357   }
358 
359   // strcmp to memcmp
360   if (!HasStr1 && HasStr2) {
361     if (canTransformToMemCmp(CI, Str1P, Len2, DL))
362       return emitMemCmp(
363           Str1P, Str2P,
364           ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
365           TLI);
366   } else if (HasStr1 && !HasStr2) {
367     if (canTransformToMemCmp(CI, Str2P, Len1, DL))
368       return emitMemCmp(
369           Str1P, Str2P,
370           ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
371           TLI);
372   }
373 
374   return nullptr;
375 }
376 
377 Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
378   Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
379   if (Str1P == Str2P) // strncmp(x,x,n)  -> 0
380     return ConstantInt::get(CI->getType(), 0);
381 
382   // Get the length argument if it is constant.
383   uint64_t Length;
384   if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
385     Length = LengthArg->getZExtValue();
386   else
387     return nullptr;
388 
389   if (Length == 0) // strncmp(x,y,0)   -> 0
390     return ConstantInt::get(CI->getType(), 0);
391 
392   if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
393     return emitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
394 
395   StringRef Str1, Str2;
396   bool HasStr1 = getConstantStringInfo(Str1P, Str1);
397   bool HasStr2 = getConstantStringInfo(Str2P, Str2);
398 
399   // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
400   if (HasStr1 && HasStr2) {
401     StringRef SubStr1 = Str1.substr(0, Length);
402     StringRef SubStr2 = Str2.substr(0, Length);
403     return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
404   }
405 
406   if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
407     return B.CreateNeg(B.CreateZExt(
408         B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
409 
410   if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
411     return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
412                         CI->getType());
413 
414   uint64_t Len1 = GetStringLength(Str1P);
415   uint64_t Len2 = GetStringLength(Str2P);
416 
417   // strncmp to memcmp
418   if (!HasStr1 && HasStr2) {
419     Len2 = std::min(Len2, Length);
420     if (canTransformToMemCmp(CI, Str1P, Len2, DL))
421       return emitMemCmp(
422           Str1P, Str2P,
423           ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
424           TLI);
425   } else if (HasStr1 && !HasStr2) {
426     Len1 = std::min(Len1, Length);
427     if (canTransformToMemCmp(CI, Str2P, Len1, DL))
428       return emitMemCmp(
429           Str1P, Str2P,
430           ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
431           TLI);
432   }
433 
434   return nullptr;
435 }
436 
437 Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
438   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
439   if (Dst == Src) // strcpy(x,x)  -> x
440     return Src;
441 
442   // See if we can get the length of the input string.
443   uint64_t Len = GetStringLength(Src);
444   if (Len == 0)
445     return nullptr;
446 
447   // We have enough information to now generate the memcpy call to do the
448   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
449   B.CreateMemCpy(Dst, 1, Src, 1,
450                  ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
451   return Dst;
452 }
453 
454 Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
455   Function *Callee = CI->getCalledFunction();
456   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
457   if (Dst == Src) { // stpcpy(x,x)  -> x+strlen(x)
458     Value *StrLen = emitStrLen(Src, B, DL, TLI);
459     return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
460   }
461 
462   // See if we can get the length of the input string.
463   uint64_t Len = GetStringLength(Src);
464   if (Len == 0)
465     return nullptr;
466 
467   Type *PT = Callee->getFunctionType()->getParamType(0);
468   Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
469   Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst,
470                               ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
471 
472   // We have enough information to now generate the memcpy call to do the
473   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
474   B.CreateMemCpy(Dst, 1, Src, 1, LenV);
475   return DstEnd;
476 }
477 
478 Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
479   Function *Callee = CI->getCalledFunction();
480   Value *Dst = CI->getArgOperand(0);
481   Value *Src = CI->getArgOperand(1);
482   Value *LenOp = CI->getArgOperand(2);
483 
484   // See if we can get the length of the input string.
485   uint64_t SrcLen = GetStringLength(Src);
486   if (SrcLen == 0)
487     return nullptr;
488   --SrcLen;
489 
490   if (SrcLen == 0) {
491     // strncpy(x, "", y) -> memset(align 1 x, '\0', y)
492     B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
493     return Dst;
494   }
495 
496   uint64_t Len;
497   if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
498     Len = LengthArg->getZExtValue();
499   else
500     return nullptr;
501 
502   if (Len == 0)
503     return Dst; // strncpy(x, y, 0) -> x
504 
505   // Let strncpy handle the zero padding
506   if (Len > SrcLen + 1)
507     return nullptr;
508 
509   Type *PT = Callee->getFunctionType()->getParamType(0);
510   // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
511   B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len));
512 
513   return Dst;
514 }
515 
516 Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
517                                                unsigned CharSize) {
518   Value *Src = CI->getArgOperand(0);
519 
520   // Constant folding: strlen("xyz") -> 3
521   if (uint64_t Len = GetStringLength(Src, CharSize))
522     return ConstantInt::get(CI->getType(), Len - 1);
523 
524   // If s is a constant pointer pointing to a string literal, we can fold
525   // strlen(s + x) to strlen(s) - x, when x is known to be in the range
526   // [0, strlen(s)] or the string has a single null terminator '\0' at the end.
527   // We only try to simplify strlen when the pointer s points to an array
528   // of i8. Otherwise, we would need to scale the offset x before doing the
529   // subtraction. This will make the optimization more complex, and it's not
530   // very useful because calling strlen for a pointer of other types is
531   // very uncommon.
532   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
533     if (!isGEPBasedOnPointerToString(GEP, CharSize))
534       return nullptr;
535 
536     ConstantDataArraySlice Slice;
537     if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) {
538       uint64_t NullTermIdx;
539       if (Slice.Array == nullptr) {
540         NullTermIdx = 0;
541       } else {
542         NullTermIdx = ~((uint64_t)0);
543         for (uint64_t I = 0, E = Slice.Length; I < E; ++I) {
544           if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) {
545             NullTermIdx = I;
546             break;
547           }
548         }
549         // If the string does not have '\0', leave it to strlen to compute
550         // its length.
551         if (NullTermIdx == ~((uint64_t)0))
552           return nullptr;
553       }
554 
555       Value *Offset = GEP->getOperand(2);
556       KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr);
557       Known.Zero.flipAllBits();
558       uint64_t ArrSize =
559              cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
560 
561       // KnownZero's bits are flipped, so zeros in KnownZero now represent
562       // bits known to be zeros in Offset, and ones in KnowZero represent
563       // bits unknown in Offset. Therefore, Offset is known to be in range
564       // [0, NullTermIdx] when the flipped KnownZero is non-negative and
565       // unsigned-less-than NullTermIdx.
566       //
567       // If Offset is not provably in the range [0, NullTermIdx], we can still
568       // optimize if we can prove that the program has undefined behavior when
569       // Offset is outside that range. That is the case when GEP->getOperand(0)
570       // is a pointer to an object whose memory extent is NullTermIdx+1.
571       if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) ||
572           (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) &&
573            NullTermIdx == ArrSize - 1)) {
574         Offset = B.CreateSExtOrTrunc(Offset, CI->getType());
575         return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
576                            Offset);
577       }
578     }
579 
580     return nullptr;
581   }
582 
583   // strlen(x?"foo":"bars") --> x ? 3 : 4
584   if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
585     uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize);
586     uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize);
587     if (LenTrue && LenFalse) {
588       ORE.emit([&]() {
589         return OptimizationRemark("instcombine", "simplify-libcalls", CI)
590                << "folded strlen(select) to select of constants";
591       });
592       return B.CreateSelect(SI->getCondition(),
593                             ConstantInt::get(CI->getType(), LenTrue - 1),
594                             ConstantInt::get(CI->getType(), LenFalse - 1));
595     }
596   }
597 
598   // strlen(x) != 0 --> *x != 0
599   // strlen(x) == 0 --> *x == 0
600   if (isOnlyUsedInZeroEqualityComparison(CI))
601     return B.CreateZExt(B.CreateLoad(B.getIntNTy(CharSize), Src, "strlenfirst"),
602                         CI->getType());
603 
604   return nullptr;
605 }
606 
607 Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
608   return optimizeStringLength(CI, B, 8);
609 }
610 
611 Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) {
612   Module &M = *CI->getModule();
613   unsigned WCharSize = TLI->getWCharSize(M) * 8;
614   // We cannot perform this optimization without wchar_size metadata.
615   if (WCharSize == 0)
616     return nullptr;
617 
618   return optimizeStringLength(CI, B, WCharSize);
619 }
620 
621 Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
622   StringRef S1, S2;
623   bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
624   bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
625 
626   // strpbrk(s, "") -> nullptr
627   // strpbrk("", s) -> nullptr
628   if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
629     return Constant::getNullValue(CI->getType());
630 
631   // Constant folding.
632   if (HasS1 && HasS2) {
633     size_t I = S1.find_first_of(S2);
634     if (I == StringRef::npos) // No match.
635       return Constant::getNullValue(CI->getType());
636 
637     return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I),
638                        "strpbrk");
639   }
640 
641   // strpbrk(s, "a") -> strchr(s, 'a')
642   if (HasS2 && S2.size() == 1)
643     return emitStrChr(CI->getArgOperand(0), S2[0], B, TLI);
644 
645   return nullptr;
646 }
647 
648 Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) {
649   Value *EndPtr = CI->getArgOperand(1);
650   if (isa<ConstantPointerNull>(EndPtr)) {
651     // With a null EndPtr, this function won't capture the main argument.
652     // It would be readonly too, except that it still may write to errno.
653     CI->addParamAttr(0, Attribute::NoCapture);
654   }
655 
656   return nullptr;
657 }
658 
659 Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilder<> &B) {
660   StringRef S1, S2;
661   bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
662   bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
663 
664   // strspn(s, "") -> 0
665   // strspn("", s) -> 0
666   if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
667     return Constant::getNullValue(CI->getType());
668 
669   // Constant folding.
670   if (HasS1 && HasS2) {
671     size_t Pos = S1.find_first_not_of(S2);
672     if (Pos == StringRef::npos)
673       Pos = S1.size();
674     return ConstantInt::get(CI->getType(), Pos);
675   }
676 
677   return nullptr;
678 }
679 
680 Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) {
681   StringRef S1, S2;
682   bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
683   bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
684 
685   // strcspn("", s) -> 0
686   if (HasS1 && S1.empty())
687     return Constant::getNullValue(CI->getType());
688 
689   // Constant folding.
690   if (HasS1 && HasS2) {
691     size_t Pos = S1.find_first_of(S2);
692     if (Pos == StringRef::npos)
693       Pos = S1.size();
694     return ConstantInt::get(CI->getType(), Pos);
695   }
696 
697   // strcspn(s, "") -> strlen(s)
698   if (HasS2 && S2.empty())
699     return emitStrLen(CI->getArgOperand(0), B, DL, TLI);
700 
701   return nullptr;
702 }
703 
704 Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
705   // fold strstr(x, x) -> x.
706   if (CI->getArgOperand(0) == CI->getArgOperand(1))
707     return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
708 
709   // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
710   if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
711     Value *StrLen = emitStrLen(CI->getArgOperand(1), B, DL, TLI);
712     if (!StrLen)
713       return nullptr;
714     Value *StrNCmp = emitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
715                                  StrLen, B, DL, TLI);
716     if (!StrNCmp)
717       return nullptr;
718     for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) {
719       ICmpInst *Old = cast<ICmpInst>(*UI++);
720       Value *Cmp =
721           B.CreateICmp(Old->getPredicate(), StrNCmp,
722                        ConstantInt::getNullValue(StrNCmp->getType()), "cmp");
723       replaceAllUsesWith(Old, Cmp);
724     }
725     return CI;
726   }
727 
728   // See if either input string is a constant string.
729   StringRef SearchStr, ToFindStr;
730   bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
731   bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
732 
733   // fold strstr(x, "") -> x.
734   if (HasStr2 && ToFindStr.empty())
735     return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
736 
737   // If both strings are known, constant fold it.
738   if (HasStr1 && HasStr2) {
739     size_t Offset = SearchStr.find(ToFindStr);
740 
741     if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
742       return Constant::getNullValue(CI->getType());
743 
744     // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
745     Value *Result = castToCStr(CI->getArgOperand(0), B);
746     Result =
747         B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr");
748     return B.CreateBitCast(Result, CI->getType());
749   }
750 
751   // fold strstr(x, "y") -> strchr(x, 'y').
752   if (HasStr2 && ToFindStr.size() == 1) {
753     Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
754     return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
755   }
756   return nullptr;
757 }
758 
759 Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
760   Value *SrcStr = CI->getArgOperand(0);
761   ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
762   ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
763 
764   // memchr(x, y, 0) -> null
765   if (LenC && LenC->isZero())
766     return Constant::getNullValue(CI->getType());
767 
768   // From now on we need at least constant length and string.
769   StringRef Str;
770   if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
771     return nullptr;
772 
773   // Truncate the string to LenC. If Str is smaller than LenC we will still only
774   // scan the string, as reading past the end of it is undefined and we can just
775   // return null if we don't find the char.
776   Str = Str.substr(0, LenC->getZExtValue());
777 
778   // If the char is variable but the input str and length are not we can turn
779   // this memchr call into a simple bit field test. Of course this only works
780   // when the return value is only checked against null.
781   //
782   // It would be really nice to reuse switch lowering here but we can't change
783   // the CFG at this point.
784   //
785   // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
786   // != 0
787   //   after bounds check.
788   if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) {
789     unsigned char Max =
790         *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()),
791                           reinterpret_cast<const unsigned char *>(Str.end()));
792 
793     // Make sure the bit field we're about to create fits in a register on the
794     // target.
795     // FIXME: On a 64 bit architecture this prevents us from using the
796     // interesting range of alpha ascii chars. We could do better by emitting
797     // two bitfields or shifting the range by 64 if no lower chars are used.
798     if (!DL.fitsInLegalInteger(Max + 1))
799       return nullptr;
800 
801     // For the bit field use a power-of-2 type with at least 8 bits to avoid
802     // creating unnecessary illegal types.
803     unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max));
804 
805     // Now build the bit field.
806     APInt Bitfield(Width, 0);
807     for (char C : Str)
808       Bitfield.setBit((unsigned char)C);
809     Value *BitfieldC = B.getInt(Bitfield);
810 
811     // Adjust width of "C" to the bitfield width, then mask off the high bits.
812     Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType());
813     C = B.CreateAnd(C, B.getIntN(Width, 0xFF));
814 
815     // First check that the bit field access is within bounds.
816     Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
817                                  "memchr.bounds");
818 
819     // Create code that checks if the given bit is set in the field.
820     Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C);
821     Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits");
822 
823     // Finally merge both checks and cast to pointer type. The inttoptr
824     // implicitly zexts the i1 to intptr type.
825     return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType());
826   }
827 
828   // Check if all arguments are constants.  If so, we can constant fold.
829   if (!CharC)
830     return nullptr;
831 
832   // Compute the offset.
833   size_t I = Str.find(CharC->getSExtValue() & 0xFF);
834   if (I == StringRef::npos) // Didn't find the char.  memchr returns null.
835     return Constant::getNullValue(CI->getType());
836 
837   // memchr(s+n,c,l) -> gep(s+n+i,c)
838   return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");
839 }
840 
841 static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
842                                          uint64_t Len, IRBuilder<> &B,
843                                          const DataLayout &DL) {
844   if (Len == 0) // memcmp(s1,s2,0) -> 0
845     return Constant::getNullValue(CI->getType());
846 
847   // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
848   if (Len == 1) {
849     Value *LHSV =
850         B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"),
851                      CI->getType(), "lhsv");
852     Value *RHSV =
853         B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"),
854                      CI->getType(), "rhsv");
855     return B.CreateSub(LHSV, RHSV, "chardiff");
856   }
857 
858   // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
859   // TODO: The case where both inputs are constants does not need to be limited
860   // to legal integers or equality comparison. See block below this.
861   if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
862     IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
863     unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
864 
865     // First, see if we can fold either argument to a constant.
866     Value *LHSV = nullptr;
867     if (auto *LHSC = dyn_cast<Constant>(LHS)) {
868       LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo());
869       LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL);
870     }
871     Value *RHSV = nullptr;
872     if (auto *RHSC = dyn_cast<Constant>(RHS)) {
873       RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo());
874       RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL);
875     }
876 
877     // Don't generate unaligned loads. If either source is constant data,
878     // alignment doesn't matter for that source because there is no load.
879     if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) &&
880         (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) {
881       if (!LHSV) {
882         Type *LHSPtrTy =
883             IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
884         LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
885       }
886       if (!RHSV) {
887         Type *RHSPtrTy =
888             IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
889         RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
890       }
891       return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
892     }
893   }
894 
895   // Constant folding: memcmp(x, y, Len) -> constant (all arguments are const).
896   // TODO: This is limited to i8 arrays.
897   StringRef LHSStr, RHSStr;
898   if (getConstantStringInfo(LHS, LHSStr) &&
899       getConstantStringInfo(RHS, RHSStr)) {
900     // Make sure we're not reading out-of-bounds memory.
901     if (Len > LHSStr.size() || Len > RHSStr.size())
902       return nullptr;
903     // Fold the memcmp and normalize the result.  This way we get consistent
904     // results across multiple platforms.
905     uint64_t Ret = 0;
906     int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len);
907     if (Cmp < 0)
908       Ret = -1;
909     else if (Cmp > 0)
910       Ret = 1;
911     return ConstantInt::get(CI->getType(), Ret);
912   }
913   return nullptr;
914 }
915 
916 Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
917   Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
918   Value *Size = CI->getArgOperand(2);
919 
920   if (LHS == RHS) // memcmp(s,s,x) -> 0
921     return Constant::getNullValue(CI->getType());
922 
923   // Handle constant lengths.
924   if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
925     if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS,
926                                                 LenC->getZExtValue(), B, DL))
927       return Res;
928 
929   // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
930   // `bcmp` can be more efficient than memcmp because it only has to know that
931   // there is a difference, not where it is.
932   if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) {
933     return emitBCmp(LHS, RHS, Size, B, DL, TLI);
934   }
935 
936   return nullptr;
937 }
938 
939 Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
940   // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
941   B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
942                  CI->getArgOperand(2));
943   return CI->getArgOperand(0);
944 }
945 
946 Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
947   // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
948   B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
949                   CI->getArgOperand(2));
950   return CI->getArgOperand(0);
951 }
952 
953 /// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
954 Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) {
955   // This has to be a memset of zeros (bzero).
956   auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1));
957   if (!FillValue || FillValue->getZExtValue() != 0)
958     return nullptr;
959 
960   // TODO: We should handle the case where the malloc has more than one use.
961   // This is necessary to optimize common patterns such as when the result of
962   // the malloc is checked against null or when a memset intrinsic is used in
963   // place of a memset library call.
964   auto *Malloc = dyn_cast<CallInst>(Memset->getArgOperand(0));
965   if (!Malloc || !Malloc->hasOneUse())
966     return nullptr;
967 
968   // Is the inner call really malloc()?
969   Function *InnerCallee = Malloc->getCalledFunction();
970   if (!InnerCallee)
971     return nullptr;
972 
973   LibFunc Func;
974   if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
975       Func != LibFunc_malloc)
976     return nullptr;
977 
978   // The memset must cover the same number of bytes that are malloc'd.
979   if (Memset->getArgOperand(2) != Malloc->getArgOperand(0))
980     return nullptr;
981 
982   // Replace the malloc with a calloc. We need the data layout to know what the
983   // actual size of a 'size_t' parameter is.
984   B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
985   const DataLayout &DL = Malloc->getModule()->getDataLayout();
986   IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
987   Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
988                              Malloc->getArgOperand(0), Malloc->getAttributes(),
989                              B, *TLI);
990   if (!Calloc)
991     return nullptr;
992 
993   Malloc->replaceAllUsesWith(Calloc);
994   eraseFromParent(Malloc);
995 
996   return Calloc;
997 }
998 
999 Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
1000   if (auto *Calloc = foldMallocMemset(CI, B))
1001     return Calloc;
1002 
1003   // memset(p, v, n) -> llvm.memset(align 1 p, v, n)
1004   Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
1005   B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
1006   return CI->getArgOperand(0);
1007 }
1008 
1009 Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilder<> &B) {
1010   if (isa<ConstantPointerNull>(CI->getArgOperand(0)))
1011     return emitMalloc(CI->getArgOperand(1), B, DL, TLI);
1012 
1013   return nullptr;
1014 }
1015 
1016 //===----------------------------------------------------------------------===//
1017 // Math Library Optimizations
1018 //===----------------------------------------------------------------------===//
1019 
1020 // Replace a libcall \p CI with a call to intrinsic \p IID
1021 static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
1022   // Propagate fast-math flags from the existing call to the new call.
1023   IRBuilder<>::FastMathFlagGuard Guard(B);
1024   B.setFastMathFlags(CI->getFastMathFlags());
1025 
1026   Module *M = CI->getModule();
1027   Value *V = CI->getArgOperand(0);
1028   Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
1029   CallInst *NewCall = B.CreateCall(F, V);
1030   NewCall->takeName(CI);
1031   return NewCall;
1032 }
1033 
1034 /// Return a variant of Val with float type.
1035 /// Currently this works in two cases: If Val is an FPExtension of a float
1036 /// value to something bigger, simply return the operand.
1037 /// If Val is a ConstantFP but can be converted to a float ConstantFP without
1038 /// loss of precision do so.
1039 static Value *valueHasFloatPrecision(Value *Val) {
1040   if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) {
1041     Value *Op = Cast->getOperand(0);
1042     if (Op->getType()->isFloatTy())
1043       return Op;
1044   }
1045   if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) {
1046     APFloat F = Const->getValueAPF();
1047     bool losesInfo;
1048     (void)F.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1049                     &losesInfo);
1050     if (!losesInfo)
1051       return ConstantFP::get(Const->getContext(), F);
1052   }
1053   return nullptr;
1054 }
1055 
1056 /// Shrink double -> float functions.
1057 static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
1058                                bool isBinary, bool isPrecise = false) {
1059   if (!CI->getType()->isDoubleTy())
1060     return nullptr;
1061 
1062   // If not all the uses of the function are converted to float, then bail out.
1063   // This matters if the precision of the result is more important than the
1064   // precision of the arguments.
1065   if (isPrecise)
1066     for (User *U : CI->users()) {
1067       FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
1068       if (!Cast || !Cast->getType()->isFloatTy())
1069         return nullptr;
1070     }
1071 
1072   // If this is something like 'g((double) float)', convert to 'gf(float)'.
1073   Value *V[2];
1074   V[0] = valueHasFloatPrecision(CI->getArgOperand(0));
1075   V[1] = isBinary ? valueHasFloatPrecision(CI->getArgOperand(1)) : nullptr;
1076   if (!V[0] || (isBinary && !V[1]))
1077     return nullptr;
1078 
1079   // If call isn't an intrinsic, check that it isn't within a function with the
1080   // same name as the float version of this call, otherwise the result is an
1081   // infinite loop.  For example, from MinGW-w64:
1082   //
1083   // float expf(float val) { return (float) exp((double) val); }
1084   Function *CalleeFn = CI->getCalledFunction();
1085   StringRef CalleeNm = CalleeFn->getName();
1086   AttributeList CalleeAt = CalleeFn->getAttributes();
1087   if (CalleeFn && !CalleeFn->isIntrinsic()) {
1088     const Function *Fn = CI->getFunction();
1089     StringRef FnName = Fn->getName();
1090     if (FnName.back() == 'f' &&
1091         FnName.size() == (CalleeNm.size() + 1) &&
1092         FnName.startswith(CalleeNm))
1093       return nullptr;
1094   }
1095 
1096   // Propagate the math semantics from the current function to the new function.
1097   IRBuilder<>::FastMathFlagGuard Guard(B);
1098   B.setFastMathFlags(CI->getFastMathFlags());
1099 
1100   // g((double) float) -> (double) gf(float)
1101   Value *R;
1102   if (CalleeFn->isIntrinsic()) {
1103     Module *M = CI->getModule();
1104     Intrinsic::ID IID = CalleeFn->getIntrinsicID();
1105     Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
1106     R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
1107   }
1108   else
1109     R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeNm, B, CalleeAt)
1110                  : emitUnaryFloatFnCall(V[0], CalleeNm, B, CalleeAt);
1111 
1112   return B.CreateFPExt(R, B.getDoubleTy());
1113 }
1114 
1115 /// Shrink double -> float for unary functions.
1116 static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
1117                                     bool isPrecise = false) {
1118   return optimizeDoubleFP(CI, B, false, isPrecise);
1119 }
1120 
1121 /// Shrink double -> float for binary functions.
1122 static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B,
1123                                      bool isPrecise = false) {
1124   return optimizeDoubleFP(CI, B, true, isPrecise);
1125 }
1126 
1127 // cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
1128 Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilder<> &B) {
1129   if (!CI->isFast())
1130     return nullptr;
1131 
1132   // Propagate fast-math flags from the existing call to new instructions.
1133   IRBuilder<>::FastMathFlagGuard Guard(B);
1134   B.setFastMathFlags(CI->getFastMathFlags());
1135 
1136   Value *Real, *Imag;
1137   if (CI->getNumArgOperands() == 1) {
1138     Value *Op = CI->getArgOperand(0);
1139     assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!");
1140     Real = B.CreateExtractValue(Op, 0, "real");
1141     Imag = B.CreateExtractValue(Op, 1, "imag");
1142   } else {
1143     assert(CI->getNumArgOperands() == 2 && "Unexpected signature for cabs!");
1144     Real = CI->getArgOperand(0);
1145     Imag = CI->getArgOperand(1);
1146   }
1147 
1148   Value *RealReal = B.CreateFMul(Real, Real);
1149   Value *ImagImag = B.CreateFMul(Imag, Imag);
1150 
1151   Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt,
1152                                               CI->getType());
1153   return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs");
1154 }
1155 
1156 static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
1157                                       IRBuilder<> &B) {
1158   if (!isa<FPMathOperator>(Call))
1159     return nullptr;
1160 
1161   IRBuilder<>::FastMathFlagGuard Guard(B);
1162   B.setFastMathFlags(Call->getFastMathFlags());
1163 
1164   // TODO: Can this be shared to also handle LLVM intrinsics?
1165   Value *X;
1166   switch (Func) {
1167   case LibFunc_sin:
1168   case LibFunc_sinf:
1169   case LibFunc_sinl:
1170   case LibFunc_tan:
1171   case LibFunc_tanf:
1172   case LibFunc_tanl:
1173     // sin(-X) --> -sin(X)
1174     // tan(-X) --> -tan(X)
1175     if (match(Call->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X)))))
1176       return B.CreateFNeg(B.CreateCall(Call->getCalledFunction(), X));
1177     break;
1178   case LibFunc_cos:
1179   case LibFunc_cosf:
1180   case LibFunc_cosl:
1181     // cos(-X) --> cos(X)
1182     if (match(Call->getArgOperand(0), m_FNeg(m_Value(X))))
1183       return B.CreateCall(Call->getCalledFunction(), X, "cos");
1184     break;
1185   default:
1186     break;
1187   }
1188   return nullptr;
1189 }
1190 
1191 static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
1192   // Multiplications calculated using Addition Chains.
1193   // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
1194 
1195   assert(Exp != 0 && "Incorrect exponent 0 not handled");
1196 
1197   if (InnerChain[Exp])
1198     return InnerChain[Exp];
1199 
1200   static const unsigned AddChain[33][2] = {
1201       {0, 0}, // Unused.
1202       {0, 0}, // Unused (base case = pow1).
1203       {1, 1}, // Unused (pre-computed).
1204       {1, 2},  {2, 2},   {2, 3},  {3, 3},   {2, 5},  {4, 4},
1205       {1, 8},  {5, 5},   {1, 10}, {6, 6},   {4, 9},  {7, 7},
1206       {3, 12}, {8, 8},   {8, 9},  {2, 16},  {1, 18}, {10, 10},
1207       {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13},
1208       {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16},
1209   };
1210 
1211   InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B),
1212                                  getPow(InnerChain, AddChain[Exp][1], B));
1213   return InnerChain[Exp];
1214 }
1215 
1216 /// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
1217 /// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x).
1218 Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
1219   Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
1220   AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
1221   Module *Mod = Pow->getModule();
1222   Type *Ty = Pow->getType();
1223   bool Ignored;
1224 
1225   // Evaluate special cases related to a nested function as the base.
1226 
1227   // pow(exp(x), y) -> exp(x * y)
1228   // pow(exp2(x), y) -> exp2(x * y)
1229   // If exp{,2}() is used only once, it is better to fold two transcendental
1230   // math functions into one.  If used again, exp{,2}() would still have to be
1231   // called with the original argument, then keep both original transcendental
1232   // functions.  However, this transformation is only safe with fully relaxed
1233   // math semantics, since, besides rounding differences, it changes overflow
1234   // and underflow behavior quite dramatically.  For example:
1235   //   pow(exp(1000), 0.001) = pow(inf, 0.001) = inf
1236   // Whereas:
1237   //   exp(1000 * 0.001) = exp(1)
1238   // TODO: Loosen the requirement for fully relaxed math semantics.
1239   // TODO: Handle exp10() when more targets have it available.
1240   CallInst *BaseFn = dyn_cast<CallInst>(Base);
1241   if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) {
1242     LibFunc LibFn;
1243 
1244     Function *CalleeFn = BaseFn->getCalledFunction();
1245     if (CalleeFn &&
1246         TLI->getLibFunc(CalleeFn->getName(), LibFn) && TLI->has(LibFn)) {
1247       StringRef ExpName;
1248       Intrinsic::ID ID;
1249       Value *ExpFn;
1250       LibFunc LibFnFloat;
1251       LibFunc LibFnDouble;
1252       LibFunc LibFnLongDouble;
1253 
1254       switch (LibFn) {
1255       default:
1256         return nullptr;
1257       case LibFunc_expf:  case LibFunc_exp:  case LibFunc_expl:
1258         ExpName = TLI->getName(LibFunc_exp);
1259         ID = Intrinsic::exp;
1260         LibFnFloat = LibFunc_expf;
1261         LibFnDouble = LibFunc_exp;
1262         LibFnLongDouble = LibFunc_expl;
1263         break;
1264       case LibFunc_exp2f: case LibFunc_exp2: case LibFunc_exp2l:
1265         ExpName = TLI->getName(LibFunc_exp2);
1266         ID = Intrinsic::exp2;
1267         LibFnFloat = LibFunc_exp2f;
1268         LibFnDouble = LibFunc_exp2;
1269         LibFnLongDouble = LibFunc_exp2l;
1270         break;
1271       }
1272 
1273       // Create new exp{,2}() with the product as its argument.
1274       Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
1275       ExpFn = BaseFn->doesNotAccessMemory()
1276               ? B.CreateCall(Intrinsic::getDeclaration(Mod, ID, Ty),
1277                              FMul, ExpName)
1278               : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat,
1279                                      LibFnLongDouble, B,
1280                                      BaseFn->getAttributes());
1281 
1282       // Since the new exp{,2}() is different from the original one, dead code
1283       // elimination cannot be trusted to remove it, since it may have side
1284       // effects (e.g., errno).  When the only consumer for the original
1285       // exp{,2}() is pow(), then it has to be explicitly erased.
1286       BaseFn->replaceAllUsesWith(ExpFn);
1287       eraseFromParent(BaseFn);
1288 
1289       return ExpFn;
1290     }
1291   }
1292 
1293   // Evaluate special cases related to a constant base.
1294 
1295   const APFloat *BaseF;
1296   if (!match(Pow->getArgOperand(0), m_APFloat(BaseF)))
1297     return nullptr;
1298 
1299   // pow(2.0 ** n, x) -> exp2(n * x)
1300   if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
1301     APFloat BaseR = APFloat(1.0);
1302     BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
1303     BaseR = BaseR / *BaseF;
1304     bool IsInteger    = BaseF->isInteger(),
1305          IsReciprocal = BaseR.isInteger();
1306     const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
1307     APSInt NI(64, false);
1308     if ((IsInteger || IsReciprocal) &&
1309         !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) &&
1310         NI > 1 && NI.isPowerOf2()) {
1311       double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
1312       Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
1313       if (Pow->doesNotAccessMemory())
1314         return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
1315                             FMul, "exp2");
1316       else
1317         return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
1318                                     LibFunc_exp2l, B, Attrs);
1319     }
1320   }
1321 
1322   // pow(10.0, x) -> exp10(x)
1323   // TODO: There is no exp10() intrinsic yet, but some day there shall be one.
1324   if (match(Base, m_SpecificFP(10.0)) &&
1325       hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
1326     return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
1327                                 LibFunc_exp10l, B, Attrs);
1328 
1329   return nullptr;
1330 }
1331 
1332 static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
1333                           Module *M, IRBuilder<> &B,
1334                           const TargetLibraryInfo *TLI) {
1335   // If errno is never set, then use the intrinsic for sqrt().
1336   if (NoErrno) {
1337     Function *SqrtFn =
1338         Intrinsic::getDeclaration(M, Intrinsic::sqrt, V->getType());
1339     return B.CreateCall(SqrtFn, V, "sqrt");
1340   }
1341 
1342   // Otherwise, use the libcall for sqrt().
1343   if (hasUnaryFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
1344                       LibFunc_sqrtl))
1345     // TODO: We also should check that the target can in fact lower the sqrt()
1346     // libcall. We currently have no way to ask this question, so we ask if
1347     // the target has a sqrt() libcall, which is not exactly the same.
1348     return emitUnaryFloatFnCall(V, TLI, LibFunc_sqrt, LibFunc_sqrtf,
1349                                 LibFunc_sqrtl, B, Attrs);
1350 
1351   return nullptr;
1352 }
1353 
1354 /// Use square root in place of pow(x, +/-0.5).
1355 Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
1356   Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
1357   AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
1358   Module *Mod = Pow->getModule();
1359   Type *Ty = Pow->getType();
1360 
1361   const APFloat *ExpoF;
1362   if (!match(Expo, m_APFloat(ExpoF)) ||
1363       (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
1364     return nullptr;
1365 
1366   Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI);
1367   if (!Sqrt)
1368     return nullptr;
1369 
1370   // Handle signed zero base by expanding to fabs(sqrt(x)).
1371   if (!Pow->hasNoSignedZeros()) {
1372     Function *FAbsFn = Intrinsic::getDeclaration(Mod, Intrinsic::fabs, Ty);
1373     Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs");
1374   }
1375 
1376   // Handle non finite base by expanding to
1377   // (x == -infinity ? +infinity : sqrt(x)).
1378   if (!Pow->hasNoInfs()) {
1379     Value *PosInf = ConstantFP::getInfinity(Ty),
1380           *NegInf = ConstantFP::getInfinity(Ty, true);
1381     Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
1382     Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt);
1383   }
1384 
1385   // If the exponent is negative, then get the reciprocal.
1386   if (ExpoF->isNegative())
1387     Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
1388 
1389   return Sqrt;
1390 }
1391 
1392 Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
1393   Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
1394   Function *Callee = Pow->getCalledFunction();
1395   StringRef Name = Callee->getName();
1396   Type *Ty = Pow->getType();
1397   Value *Shrunk = nullptr;
1398   bool Ignored;
1399 
1400   // Bail out if simplifying libcalls to pow() is disabled.
1401   if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl))
1402     return nullptr;
1403 
1404   // Propagate the math semantics from the call to any created instructions.
1405   IRBuilder<>::FastMathFlagGuard Guard(B);
1406   B.setFastMathFlags(Pow->getFastMathFlags());
1407 
1408   // Shrink pow() to powf() if the arguments are single precision,
1409   // unless the result is expected to be double precision.
1410   if (UnsafeFPShrink &&
1411       Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
1412     Shrunk = optimizeBinaryDoubleFP(Pow, B, true);
1413 
1414   // Evaluate special cases related to the base.
1415 
1416   // pow(1.0, x) -> 1.0
1417   if (match(Base, m_FPOne()))
1418     return Base;
1419 
1420   if (Value *Exp = replacePowWithExp(Pow, B))
1421     return Exp;
1422 
1423   // Evaluate special cases related to the exponent.
1424 
1425   // pow(x, -1.0) -> 1.0 / x
1426   if (match(Expo, m_SpecificFP(-1.0)))
1427     return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
1428 
1429   // pow(x, 0.0) -> 1.0
1430   if (match(Expo, m_SpecificFP(0.0)))
1431       return ConstantFP::get(Ty, 1.0);
1432 
1433   // pow(x, 1.0) -> x
1434   if (match(Expo, m_FPOne()))
1435     return Base;
1436 
1437   // pow(x, 2.0) -> x * x
1438   if (match(Expo, m_SpecificFP(2.0)))
1439     return B.CreateFMul(Base, Base, "square");
1440 
1441   if (Value *Sqrt = replacePowWithSqrt(Pow, B))
1442     return Sqrt;
1443 
1444   // pow(x, n) -> x * x * x * ...
1445   const APFloat *ExpoF;
1446   if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) {
1447     // We limit to a max of 7 multiplications, thus the maximum exponent is 32.
1448     // If the exponent is an integer+0.5 we generate a call to sqrt and an
1449     // additional fmul.
1450     // TODO: This whole transformation should be backend specific (e.g. some
1451     //       backends might prefer libcalls or the limit for the exponent might
1452     //       be different) and it should also consider optimizing for size.
1453     APFloat LimF(ExpoF->getSemantics(), 33.0),
1454             ExpoA(abs(*ExpoF));
1455     if (ExpoA.compare(LimF) == APFloat::cmpLessThan) {
1456       // This transformation applies to integer or integer+0.5 exponents only.
1457       // For integer+0.5, we create a sqrt(Base) call.
1458       Value *Sqrt = nullptr;
1459       if (!ExpoA.isInteger()) {
1460         APFloat Expo2 = ExpoA;
1461         // To check if ExpoA is an integer + 0.5, we add it to itself. If there
1462         // is no floating point exception and the result is an integer, then
1463         // ExpoA == integer + 0.5
1464         if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
1465           return nullptr;
1466 
1467         if (!Expo2.isInteger())
1468           return nullptr;
1469 
1470         Sqrt =
1471             getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
1472                         Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI);
1473       }
1474 
1475       // We will memoize intermediate products of the Addition Chain.
1476       Value *InnerChain[33] = {nullptr};
1477       InnerChain[1] = Base;
1478       InnerChain[2] = B.CreateFMul(Base, Base, "square");
1479 
1480       // We cannot readily convert a non-double type (like float) to a double.
1481       // So we first convert it to something which could be converted to double.
1482       ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
1483       Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
1484 
1485       // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x).
1486       if (Sqrt)
1487         FMul = B.CreateFMul(FMul, Sqrt);
1488 
1489       // If the exponent is negative, then get the reciprocal.
1490       if (ExpoF->isNegative())
1491         FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
1492 
1493       return FMul;
1494     }
1495   }
1496 
1497   return Shrunk;
1498 }
1499 
1500 Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
1501   Function *Callee = CI->getCalledFunction();
1502   Value *Ret = nullptr;
1503   StringRef Name = Callee->getName();
1504   if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name))
1505     Ret = optimizeUnaryDoubleFP(CI, B, true);
1506 
1507   Value *Op = CI->getArgOperand(0);
1508   // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= 32
1509   // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < 32
1510   LibFunc LdExp = LibFunc_ldexpl;
1511   if (Op->getType()->isFloatTy())
1512     LdExp = LibFunc_ldexpf;
1513   else if (Op->getType()->isDoubleTy())
1514     LdExp = LibFunc_ldexp;
1515 
1516   if (TLI->has(LdExp)) {
1517     Value *LdExpArg = nullptr;
1518     if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
1519       if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
1520         LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
1521     } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
1522       if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
1523         LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
1524     }
1525 
1526     if (LdExpArg) {
1527       Constant *One = ConstantFP::get(CI->getContext(), APFloat(1.0f));
1528       if (!Op->getType()->isFloatTy())
1529         One = ConstantExpr::getFPExtend(One, Op->getType());
1530 
1531       Module *M = CI->getModule();
1532       FunctionCallee NewCallee = M->getOrInsertFunction(
1533           TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty());
1534       CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg});
1535       if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
1536         CI->setCallingConv(F->getCallingConv());
1537 
1538       return CI;
1539     }
1540   }
1541   return Ret;
1542 }
1543 
1544 Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
1545   Function *Callee = CI->getCalledFunction();
1546   // If we can shrink the call to a float function rather than a double
1547   // function, do that first.
1548   StringRef Name = Callee->getName();
1549   if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
1550     if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
1551       return Ret;
1552 
1553   IRBuilder<>::FastMathFlagGuard Guard(B);
1554   FastMathFlags FMF;
1555   if (CI->isFast()) {
1556     // If the call is 'fast', then anything we create here will also be 'fast'.
1557     FMF.setFast();
1558   } else {
1559     // At a minimum, no-nans-fp-math must be true.
1560     if (!CI->hasNoNaNs())
1561       return nullptr;
1562     // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
1563     // "Ideally, fmax would be sensitive to the sign of zero, for example
1564     // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
1565     // might be impractical."
1566     FMF.setNoSignedZeros();
1567     FMF.setNoNaNs();
1568   }
1569   B.setFastMathFlags(FMF);
1570 
1571   // We have a relaxed floating-point environment. We can ignore NaN-handling
1572   // and transform to a compare and select. We do not have to consider errno or
1573   // exceptions, because fmin/fmax do not have those.
1574   Value *Op0 = CI->getArgOperand(0);
1575   Value *Op1 = CI->getArgOperand(1);
1576   Value *Cmp = Callee->getName().startswith("fmin") ?
1577     B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
1578   return B.CreateSelect(Cmp, Op0, Op1);
1579 }
1580 
1581 Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
1582   Function *Callee = CI->getCalledFunction();
1583   Value *Ret = nullptr;
1584   StringRef Name = Callee->getName();
1585   if (UnsafeFPShrink && hasFloatVersion(Name))
1586     Ret = optimizeUnaryDoubleFP(CI, B, true);
1587 
1588   if (!CI->isFast())
1589     return Ret;
1590   Value *Op1 = CI->getArgOperand(0);
1591   auto *OpC = dyn_cast<CallInst>(Op1);
1592 
1593   // The earlier call must also be 'fast' in order to do these transforms.
1594   if (!OpC || !OpC->isFast())
1595     return Ret;
1596 
1597   // log(pow(x,y)) -> y*log(x)
1598   // This is only applicable to log, log2, log10.
1599   if (Name != "log" && Name != "log2" && Name != "log10")
1600     return Ret;
1601 
1602   IRBuilder<>::FastMathFlagGuard Guard(B);
1603   FastMathFlags FMF;
1604   FMF.setFast();
1605   B.setFastMathFlags(FMF);
1606 
1607   LibFunc Func;
1608   Function *F = OpC->getCalledFunction();
1609   if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
1610       Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow))
1611     return B.CreateFMul(OpC->getArgOperand(1),
1612       emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B,
1613                            Callee->getAttributes()), "mul");
1614 
1615   // log(exp2(y)) -> y*log(2)
1616   if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) &&
1617       TLI->has(Func) && Func == LibFunc_exp2)
1618     return B.CreateFMul(
1619         OpC->getArgOperand(0),
1620         emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0),
1621                              Callee->getName(), B, Callee->getAttributes()),
1622         "logmul");
1623   return Ret;
1624 }
1625 
1626 Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
1627   Function *Callee = CI->getCalledFunction();
1628   Value *Ret = nullptr;
1629   // TODO: Once we have a way (other than checking for the existince of the
1630   // libcall) to tell whether our target can lower @llvm.sqrt, relax the
1631   // condition below.
1632   if (TLI->has(LibFunc_sqrtf) && (Callee->getName() == "sqrt" ||
1633                                   Callee->getIntrinsicID() == Intrinsic::sqrt))
1634     Ret = optimizeUnaryDoubleFP(CI, B, true);
1635 
1636   if (!CI->isFast())
1637     return Ret;
1638 
1639   Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
1640   if (!I || I->getOpcode() != Instruction::FMul || !I->isFast())
1641     return Ret;
1642 
1643   // We're looking for a repeated factor in a multiplication tree,
1644   // so we can do this fold: sqrt(x * x) -> fabs(x);
1645   // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
1646   Value *Op0 = I->getOperand(0);
1647   Value *Op1 = I->getOperand(1);
1648   Value *RepeatOp = nullptr;
1649   Value *OtherOp = nullptr;
1650   if (Op0 == Op1) {
1651     // Simple match: the operands of the multiply are identical.
1652     RepeatOp = Op0;
1653   } else {
1654     // Look for a more complicated pattern: one of the operands is itself
1655     // a multiply, so search for a common factor in that multiply.
1656     // Note: We don't bother looking any deeper than this first level or for
1657     // variations of this pattern because instcombine's visitFMUL and/or the
1658     // reassociation pass should give us this form.
1659     Value *OtherMul0, *OtherMul1;
1660     if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
1661       // Pattern: sqrt((x * y) * z)
1662       if (OtherMul0 == OtherMul1 && cast<Instruction>(Op0)->isFast()) {
1663         // Matched: sqrt((x * x) * z)
1664         RepeatOp = OtherMul0;
1665         OtherOp = Op1;
1666       }
1667     }
1668   }
1669   if (!RepeatOp)
1670     return Ret;
1671 
1672   // Fast math flags for any created instructions should match the sqrt
1673   // and multiply.
1674   IRBuilder<>::FastMathFlagGuard Guard(B);
1675   B.setFastMathFlags(I->getFastMathFlags());
1676 
1677   // If we found a repeated factor, hoist it out of the square root and
1678   // replace it with the fabs of that factor.
1679   Module *M = Callee->getParent();
1680   Type *ArgType = I->getType();
1681   Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
1682   Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
1683   if (OtherOp) {
1684     // If we found a non-repeated factor, we still need to get its square
1685     // root. We then multiply that by the value that was simplified out
1686     // of the square root calculation.
1687     Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
1688     Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
1689     return B.CreateFMul(FabsCall, SqrtCall);
1690   }
1691   return FabsCall;
1692 }
1693 
1694 // TODO: Generalize to handle any trig function and its inverse.
1695 Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
1696   Function *Callee = CI->getCalledFunction();
1697   Value *Ret = nullptr;
1698   StringRef Name = Callee->getName();
1699   if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name))
1700     Ret = optimizeUnaryDoubleFP(CI, B, true);
1701 
1702   Value *Op1 = CI->getArgOperand(0);
1703   auto *OpC = dyn_cast<CallInst>(Op1);
1704   if (!OpC)
1705     return Ret;
1706 
1707   // Both calls must be 'fast' in order to remove them.
1708   if (!CI->isFast() || !OpC->isFast())
1709     return Ret;
1710 
1711   // tan(atan(x)) -> x
1712   // tanf(atanf(x)) -> x
1713   // tanl(atanl(x)) -> x
1714   LibFunc Func;
1715   Function *F = OpC->getCalledFunction();
1716   if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
1717       ((Func == LibFunc_atan && Callee->getName() == "tan") ||
1718        (Func == LibFunc_atanf && Callee->getName() == "tanf") ||
1719        (Func == LibFunc_atanl && Callee->getName() == "tanl")))
1720     Ret = OpC->getArgOperand(0);
1721   return Ret;
1722 }
1723 
1724 static bool isTrigLibCall(CallInst *CI) {
1725   // We can only hope to do anything useful if we can ignore things like errno
1726   // and floating-point exceptions.
1727   // We already checked the prototype.
1728   return CI->hasFnAttr(Attribute::NoUnwind) &&
1729          CI->hasFnAttr(Attribute::ReadNone);
1730 }
1731 
1732 static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
1733                              bool UseFloat, Value *&Sin, Value *&Cos,
1734                              Value *&SinCos) {
1735   Type *ArgTy = Arg->getType();
1736   Type *ResTy;
1737   StringRef Name;
1738 
1739   Triple T(OrigCallee->getParent()->getTargetTriple());
1740   if (UseFloat) {
1741     Name = "__sincospif_stret";
1742 
1743     assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
1744     // x86_64 can't use {float, float} since that would be returned in both
1745     // xmm0 and xmm1, which isn't what a real struct would do.
1746     ResTy = T.getArch() == Triple::x86_64
1747                 ? static_cast<Type *>(VectorType::get(ArgTy, 2))
1748                 : static_cast<Type *>(StructType::get(ArgTy, ArgTy));
1749   } else {
1750     Name = "__sincospi_stret";
1751     ResTy = StructType::get(ArgTy, ArgTy);
1752   }
1753 
1754   Module *M = OrigCallee->getParent();
1755   FunctionCallee Callee =
1756       M->getOrInsertFunction(Name, OrigCallee->getAttributes(), ResTy, ArgTy);
1757 
1758   if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
1759     // If the argument is an instruction, it must dominate all uses so put our
1760     // sincos call there.
1761     B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
1762   } else {
1763     // Otherwise (e.g. for a constant) the beginning of the function is as
1764     // good a place as any.
1765     BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
1766     B.SetInsertPoint(&EntryBB, EntryBB.begin());
1767   }
1768 
1769   SinCos = B.CreateCall(Callee, Arg, "sincospi");
1770 
1771   if (SinCos->getType()->isStructTy()) {
1772     Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
1773     Cos = B.CreateExtractValue(SinCos, 1, "cospi");
1774   } else {
1775     Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
1776                                  "sinpi");
1777     Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
1778                                  "cospi");
1779   }
1780 }
1781 
1782 Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) {
1783   // Make sure the prototype is as expected, otherwise the rest of the
1784   // function is probably invalid and likely to abort.
1785   if (!isTrigLibCall(CI))
1786     return nullptr;
1787 
1788   Value *Arg = CI->getArgOperand(0);
1789   SmallVector<CallInst *, 1> SinCalls;
1790   SmallVector<CallInst *, 1> CosCalls;
1791   SmallVector<CallInst *, 1> SinCosCalls;
1792 
1793   bool IsFloat = Arg->getType()->isFloatTy();
1794 
1795   // Look for all compatible sinpi, cospi and sincospi calls with the same
1796   // argument. If there are enough (in some sense) we can make the
1797   // substitution.
1798   Function *F = CI->getFunction();
1799   for (User *U : Arg->users())
1800     classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls);
1801 
1802   // It's only worthwhile if both sinpi and cospi are actually used.
1803   if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
1804     return nullptr;
1805 
1806   Value *Sin, *Cos, *SinCos;
1807   insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, SinCos);
1808 
1809   auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls,
1810                                  Value *Res) {
1811     for (CallInst *C : Calls)
1812       replaceAllUsesWith(C, Res);
1813   };
1814 
1815   replaceTrigInsts(SinCalls, Sin);
1816   replaceTrigInsts(CosCalls, Cos);
1817   replaceTrigInsts(SinCosCalls, SinCos);
1818 
1819   return nullptr;
1820 }
1821 
1822 void LibCallSimplifier::classifyArgUse(
1823     Value *Val, Function *F, bool IsFloat,
1824     SmallVectorImpl<CallInst *> &SinCalls,
1825     SmallVectorImpl<CallInst *> &CosCalls,
1826     SmallVectorImpl<CallInst *> &SinCosCalls) {
1827   CallInst *CI = dyn_cast<CallInst>(Val);
1828 
1829   if (!CI)
1830     return;
1831 
1832   // Don't consider calls in other functions.
1833   if (CI->getFunction() != F)
1834     return;
1835 
1836   Function *Callee = CI->getCalledFunction();
1837   LibFunc Func;
1838   if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) ||
1839       !isTrigLibCall(CI))
1840     return;
1841 
1842   if (IsFloat) {
1843     if (Func == LibFunc_sinpif)
1844       SinCalls.push_back(CI);
1845     else if (Func == LibFunc_cospif)
1846       CosCalls.push_back(CI);
1847     else if (Func == LibFunc_sincospif_stret)
1848       SinCosCalls.push_back(CI);
1849   } else {
1850     if (Func == LibFunc_sinpi)
1851       SinCalls.push_back(CI);
1852     else if (Func == LibFunc_cospi)
1853       CosCalls.push_back(CI);
1854     else if (Func == LibFunc_sincospi_stret)
1855       SinCosCalls.push_back(CI);
1856   }
1857 }
1858 
1859 //===----------------------------------------------------------------------===//
1860 // Integer Library Call Optimizations
1861 //===----------------------------------------------------------------------===//
1862 
1863 Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
1864   // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
1865   Value *Op = CI->getArgOperand(0);
1866   Type *ArgType = Op->getType();
1867   Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
1868                                           Intrinsic::cttz, ArgType);
1869   Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
1870   V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
1871   V = B.CreateIntCast(V, B.getInt32Ty(), false);
1872 
1873   Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
1874   return B.CreateSelect(Cond, V, B.getInt32(0));
1875 }
1876 
1877 Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) {
1878   // fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false))
1879   Value *Op = CI->getArgOperand(0);
1880   Type *ArgType = Op->getType();
1881   Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
1882                                           Intrinsic::ctlz, ArgType);
1883   Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz");
1884   V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()),
1885                   V);
1886   return B.CreateIntCast(V, CI->getType(), false);
1887 }
1888 
1889 Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
1890   // abs(x) -> x <s 0 ? -x : x
1891   // The negation has 'nsw' because abs of INT_MIN is undefined.
1892   Value *X = CI->getArgOperand(0);
1893   Value *IsNeg = B.CreateICmpSLT(X, Constant::getNullValue(X->getType()));
1894   Value *NegX = B.CreateNSWNeg(X, "neg");
1895   return B.CreateSelect(IsNeg, NegX, X);
1896 }
1897 
1898 Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
1899   // isdigit(c) -> (c-'0') <u 10
1900   Value *Op = CI->getArgOperand(0);
1901   Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
1902   Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
1903   return B.CreateZExt(Op, CI->getType());
1904 }
1905 
1906 Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
1907   // isascii(c) -> c <u 128
1908   Value *Op = CI->getArgOperand(0);
1909   Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
1910   return B.CreateZExt(Op, CI->getType());
1911 }
1912 
1913 Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) {
1914   // toascii(c) -> c & 0x7f
1915   return B.CreateAnd(CI->getArgOperand(0),
1916                      ConstantInt::get(CI->getType(), 0x7F));
1917 }
1918 
1919 Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilder<> &B) {
1920   StringRef Str;
1921   if (!getConstantStringInfo(CI->getArgOperand(0), Str))
1922     return nullptr;
1923 
1924   return convertStrToNumber(CI, Str, 10);
1925 }
1926 
1927 Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilder<> &B) {
1928   StringRef Str;
1929   if (!getConstantStringInfo(CI->getArgOperand(0), Str))
1930     return nullptr;
1931 
1932   if (!isa<ConstantPointerNull>(CI->getArgOperand(1)))
1933     return nullptr;
1934 
1935   if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) {
1936     return convertStrToNumber(CI, Str, CInt->getSExtValue());
1937   }
1938 
1939   return nullptr;
1940 }
1941 
1942 //===----------------------------------------------------------------------===//
1943 // Formatting and IO Library Call Optimizations
1944 //===----------------------------------------------------------------------===//
1945 
1946 static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg);
1947 
1948 Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
1949                                                  int StreamArg) {
1950   Function *Callee = CI->getCalledFunction();
1951   // Error reporting calls should be cold, mark them as such.
1952   // This applies even to non-builtin calls: it is only a hint and applies to
1953   // functions that the frontend might not understand as builtins.
1954 
1955   // This heuristic was suggested in:
1956   // Improving Static Branch Prediction in a Compiler
1957   // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
1958   // Proceedings of PACT'98, Oct. 1998, IEEE
1959   if (!CI->hasFnAttr(Attribute::Cold) &&
1960       isReportingError(Callee, CI, StreamArg)) {
1961     CI->addAttribute(AttributeList::FunctionIndex, Attribute::Cold);
1962   }
1963 
1964   return nullptr;
1965 }
1966 
1967 static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
1968   if (!Callee || !Callee->isDeclaration())
1969     return false;
1970 
1971   if (StreamArg < 0)
1972     return true;
1973 
1974   // These functions might be considered cold, but only if their stream
1975   // argument is stderr.
1976 
1977   if (StreamArg >= (int)CI->getNumArgOperands())
1978     return false;
1979   LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
1980   if (!LI)
1981     return false;
1982   GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
1983   if (!GV || !GV->isDeclaration())
1984     return false;
1985   return GV->getName() == "stderr";
1986 }
1987 
1988 Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
1989   // Check for a fixed format string.
1990   StringRef FormatStr;
1991   if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
1992     return nullptr;
1993 
1994   // Empty format string -> noop.
1995   if (FormatStr.empty()) // Tolerate printf's declared void.
1996     return CI->use_empty() ? (Value *)CI : ConstantInt::get(CI->getType(), 0);
1997 
1998   // Do not do any of the following transformations if the printf return value
1999   // is used, in general the printf return value is not compatible with either
2000   // putchar() or puts().
2001   if (!CI->use_empty())
2002     return nullptr;
2003 
2004   // printf("x") -> putchar('x'), even for "%" and "%%".
2005   if (FormatStr.size() == 1 || FormatStr == "%%")
2006     return emitPutChar(B.getInt32(FormatStr[0]), B, TLI);
2007 
2008   // printf("%s", "a") --> putchar('a')
2009   if (FormatStr == "%s" && CI->getNumArgOperands() > 1) {
2010     StringRef ChrStr;
2011     if (!getConstantStringInfo(CI->getOperand(1), ChrStr))
2012       return nullptr;
2013     if (ChrStr.size() != 1)
2014       return nullptr;
2015     return emitPutChar(B.getInt32(ChrStr[0]), B, TLI);
2016   }
2017 
2018   // printf("foo\n") --> puts("foo")
2019   if (FormatStr[FormatStr.size() - 1] == '\n' &&
2020       FormatStr.find('%') == StringRef::npos) { // No format characters.
2021     // Create a string literal with no \n on it.  We expect the constant merge
2022     // pass to be run after this pass, to merge duplicate strings.
2023     FormatStr = FormatStr.drop_back();
2024     Value *GV = B.CreateGlobalString(FormatStr, "str");
2025     return emitPutS(GV, B, TLI);
2026   }
2027 
2028   // Optimize specific format strings.
2029   // printf("%c", chr) --> putchar(chr)
2030   if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
2031       CI->getArgOperand(1)->getType()->isIntegerTy())
2032     return emitPutChar(CI->getArgOperand(1), B, TLI);
2033 
2034   // printf("%s\n", str) --> puts(str)
2035   if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
2036       CI->getArgOperand(1)->getType()->isPointerTy())
2037     return emitPutS(CI->getArgOperand(1), B, TLI);
2038   return nullptr;
2039 }
2040 
2041 Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
2042 
2043   Function *Callee = CI->getCalledFunction();
2044   FunctionType *FT = Callee->getFunctionType();
2045   if (Value *V = optimizePrintFString(CI, B)) {
2046     return V;
2047   }
2048 
2049   // printf(format, ...) -> iprintf(format, ...) if no floating point
2050   // arguments.
2051   if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) {
2052     Module *M = B.GetInsertBlock()->getParent()->getParent();
2053     FunctionCallee IPrintFFn =
2054         M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
2055     CallInst *New = cast<CallInst>(CI->clone());
2056     New->setCalledFunction(IPrintFFn);
2057     B.Insert(New);
2058     return New;
2059   }
2060 
2061   // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
2062   // arguments.
2063   if (TLI->has(LibFunc_small_printf) && !callHasFP128Argument(CI)) {
2064     Module *M = B.GetInsertBlock()->getParent()->getParent();
2065     auto SmallPrintFFn =
2066         M->getOrInsertFunction(TLI->getName(LibFunc_small_printf),
2067                                FT, Callee->getAttributes());
2068     CallInst *New = cast<CallInst>(CI->clone());
2069     New->setCalledFunction(SmallPrintFFn);
2070     B.Insert(New);
2071     return New;
2072   }
2073 
2074   return nullptr;
2075 }
2076 
2077 Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
2078   // Check for a fixed format string.
2079   StringRef FormatStr;
2080   if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
2081     return nullptr;
2082 
2083   // If we just have a format string (nothing else crazy) transform it.
2084   if (CI->getNumArgOperands() == 2) {
2085     // Make sure there's no % in the constant array.  We could try to handle
2086     // %% -> % in the future if we cared.
2087     if (FormatStr.find('%') != StringRef::npos)
2088       return nullptr; // we found a format specifier, bail out.
2089 
2090     // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
2091     B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
2092                    ConstantInt::get(DL.getIntPtrType(CI->getContext()),
2093                                     FormatStr.size() + 1)); // Copy the null byte.
2094     return ConstantInt::get(CI->getType(), FormatStr.size());
2095   }
2096 
2097   // The remaining optimizations require the format string to be "%s" or "%c"
2098   // and have an extra operand.
2099   if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
2100       CI->getNumArgOperands() < 3)
2101     return nullptr;
2102 
2103   // Decode the second character of the format string.
2104   if (FormatStr[1] == 'c') {
2105     // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
2106     if (!CI->getArgOperand(2)->getType()->isIntegerTy())
2107       return nullptr;
2108     Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
2109     Value *Ptr = castToCStr(CI->getArgOperand(0), B);
2110     B.CreateStore(V, Ptr);
2111     Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
2112     B.CreateStore(B.getInt8(0), Ptr);
2113 
2114     return ConstantInt::get(CI->getType(), 1);
2115   }
2116 
2117   if (FormatStr[1] == 's') {
2118     // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str,
2119     // strlen(str)+1)
2120     if (!CI->getArgOperand(2)->getType()->isPointerTy())
2121       return nullptr;
2122 
2123     Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI);
2124     if (!Len)
2125       return nullptr;
2126     Value *IncLen =
2127         B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
2128     B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, IncLen);
2129 
2130     // The sprintf result is the unincremented number of bytes in the string.
2131     return B.CreateIntCast(Len, CI->getType(), false);
2132   }
2133   return nullptr;
2134 }
2135 
2136 Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
2137   Function *Callee = CI->getCalledFunction();
2138   FunctionType *FT = Callee->getFunctionType();
2139   if (Value *V = optimizeSPrintFString(CI, B)) {
2140     return V;
2141   }
2142 
2143   // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
2144   // point arguments.
2145   if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) {
2146     Module *M = B.GetInsertBlock()->getParent()->getParent();
2147     FunctionCallee SIPrintFFn =
2148         M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
2149     CallInst *New = cast<CallInst>(CI->clone());
2150     New->setCalledFunction(SIPrintFFn);
2151     B.Insert(New);
2152     return New;
2153   }
2154 
2155   // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
2156   // floating point arguments.
2157   if (TLI->has(LibFunc_small_sprintf) && !callHasFP128Argument(CI)) {
2158     Module *M = B.GetInsertBlock()->getParent()->getParent();
2159     auto SmallSPrintFFn =
2160         M->getOrInsertFunction(TLI->getName(LibFunc_small_sprintf),
2161                                FT, Callee->getAttributes());
2162     CallInst *New = cast<CallInst>(CI->clone());
2163     New->setCalledFunction(SmallSPrintFFn);
2164     B.Insert(New);
2165     return New;
2166   }
2167 
2168   return nullptr;
2169 }
2170 
2171 Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
2172   // Check for a fixed format string.
2173   StringRef FormatStr;
2174   if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr))
2175     return nullptr;
2176 
2177   // Check for size
2178   ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
2179   if (!Size)
2180     return nullptr;
2181 
2182   uint64_t N = Size->getZExtValue();
2183 
2184   // If we just have a format string (nothing else crazy) transform it.
2185   if (CI->getNumArgOperands() == 3) {
2186     // Make sure there's no % in the constant array.  We could try to handle
2187     // %% -> % in the future if we cared.
2188     if (FormatStr.find('%') != StringRef::npos)
2189       return nullptr; // we found a format specifier, bail out.
2190 
2191     if (N == 0)
2192       return ConstantInt::get(CI->getType(), FormatStr.size());
2193     else if (N < FormatStr.size() + 1)
2194       return nullptr;
2195 
2196     // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt,
2197     // strlen(fmt)+1)
2198     B.CreateMemCpy(
2199         CI->getArgOperand(0), 1, CI->getArgOperand(2), 1,
2200         ConstantInt::get(DL.getIntPtrType(CI->getContext()),
2201                          FormatStr.size() + 1)); // Copy the null byte.
2202     return ConstantInt::get(CI->getType(), FormatStr.size());
2203   }
2204 
2205   // The remaining optimizations require the format string to be "%s" or "%c"
2206   // and have an extra operand.
2207   if (FormatStr.size() == 2 && FormatStr[0] == '%' &&
2208       CI->getNumArgOperands() == 4) {
2209 
2210     // Decode the second character of the format string.
2211     if (FormatStr[1] == 'c') {
2212       if (N == 0)
2213         return ConstantInt::get(CI->getType(), 1);
2214       else if (N == 1)
2215         return nullptr;
2216 
2217       // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
2218       if (!CI->getArgOperand(3)->getType()->isIntegerTy())
2219         return nullptr;
2220       Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char");
2221       Value *Ptr = castToCStr(CI->getArgOperand(0), B);
2222       B.CreateStore(V, Ptr);
2223       Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
2224       B.CreateStore(B.getInt8(0), Ptr);
2225 
2226       return ConstantInt::get(CI->getType(), 1);
2227     }
2228 
2229     if (FormatStr[1] == 's') {
2230       // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1)
2231       StringRef Str;
2232       if (!getConstantStringInfo(CI->getArgOperand(3), Str))
2233         return nullptr;
2234 
2235       if (N == 0)
2236         return ConstantInt::get(CI->getType(), Str.size());
2237       else if (N < Str.size() + 1)
2238         return nullptr;
2239 
2240       B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(3), 1,
2241                      ConstantInt::get(CI->getType(), Str.size() + 1));
2242 
2243       // The snprintf result is the unincremented number of bytes in the string.
2244       return ConstantInt::get(CI->getType(), Str.size());
2245     }
2246   }
2247   return nullptr;
2248 }
2249 
2250 Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) {
2251   if (Value *V = optimizeSnPrintFString(CI, B)) {
2252     return V;
2253   }
2254 
2255   return nullptr;
2256 }
2257 
2258 Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
2259   optimizeErrorReporting(CI, B, 0);
2260 
2261   // All the optimizations depend on the format string.
2262   StringRef FormatStr;
2263   if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
2264     return nullptr;
2265 
2266   // Do not do any of the following transformations if the fprintf return
2267   // value is used, in general the fprintf return value is not compatible
2268   // with fwrite(), fputc() or fputs().
2269   if (!CI->use_empty())
2270     return nullptr;
2271 
2272   // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
2273   if (CI->getNumArgOperands() == 2) {
2274     // Could handle %% -> % if we cared.
2275     if (FormatStr.find('%') != StringRef::npos)
2276       return nullptr; // We found a format specifier.
2277 
2278     return emitFWrite(
2279         CI->getArgOperand(1),
2280         ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()),
2281         CI->getArgOperand(0), B, DL, TLI);
2282   }
2283 
2284   // The remaining optimizations require the format string to be "%s" or "%c"
2285   // and have an extra operand.
2286   if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
2287       CI->getNumArgOperands() < 3)
2288     return nullptr;
2289 
2290   // Decode the second character of the format string.
2291   if (FormatStr[1] == 'c') {
2292     // fprintf(F, "%c", chr) --> fputc(chr, F)
2293     if (!CI->getArgOperand(2)->getType()->isIntegerTy())
2294       return nullptr;
2295     return emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
2296   }
2297 
2298   if (FormatStr[1] == 's') {
2299     // fprintf(F, "%s", str) --> fputs(str, F)
2300     if (!CI->getArgOperand(2)->getType()->isPointerTy())
2301       return nullptr;
2302     return emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
2303   }
2304   return nullptr;
2305 }
2306 
2307 Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
2308   Function *Callee = CI->getCalledFunction();
2309   FunctionType *FT = Callee->getFunctionType();
2310   if (Value *V = optimizeFPrintFString(CI, B)) {
2311     return V;
2312   }
2313 
2314   // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
2315   // floating point arguments.
2316   if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) {
2317     Module *M = B.GetInsertBlock()->getParent()->getParent();
2318     FunctionCallee FIPrintFFn =
2319         M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
2320     CallInst *New = cast<CallInst>(CI->clone());
2321     New->setCalledFunction(FIPrintFFn);
2322     B.Insert(New);
2323     return New;
2324   }
2325 
2326   // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
2327   // 128-bit floating point arguments.
2328   if (TLI->has(LibFunc_small_fprintf) && !callHasFP128Argument(CI)) {
2329     Module *M = B.GetInsertBlock()->getParent()->getParent();
2330     auto SmallFPrintFFn =
2331         M->getOrInsertFunction(TLI->getName(LibFunc_small_fprintf),
2332                                FT, Callee->getAttributes());
2333     CallInst *New = cast<CallInst>(CI->clone());
2334     New->setCalledFunction(SmallFPrintFFn);
2335     B.Insert(New);
2336     return New;
2337   }
2338 
2339   return nullptr;
2340 }
2341 
2342 Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
2343   optimizeErrorReporting(CI, B, 3);
2344 
2345   // Get the element size and count.
2346   ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
2347   ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
2348   if (SizeC && CountC) {
2349     uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue();
2350 
2351     // If this is writing zero records, remove the call (it's a noop).
2352     if (Bytes == 0)
2353       return ConstantInt::get(CI->getType(), 0);
2354 
2355     // If this is writing one byte, turn it into fputc.
2356     // This optimisation is only valid, if the return value is unused.
2357     if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
2358       Value *Char = B.CreateLoad(B.getInt8Ty(),
2359                                  castToCStr(CI->getArgOperand(0), B), "char");
2360       Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI);
2361       return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
2362     }
2363   }
2364 
2365   if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI))
2366     return emitFWriteUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
2367                               CI->getArgOperand(2), CI->getArgOperand(3), B, DL,
2368                               TLI);
2369 
2370   return nullptr;
2371 }
2372 
2373 Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
2374   optimizeErrorReporting(CI, B, 1);
2375 
2376   // Don't rewrite fputs to fwrite when optimising for size because fwrite
2377   // requires more arguments and thus extra MOVs are required.
2378   if (CI->getFunction()->hasOptSize())
2379     return nullptr;
2380 
2381   // Check if has any use
2382   if (!CI->use_empty()) {
2383     if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI))
2384       return emitFPutSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B,
2385                                TLI);
2386     else
2387       // We can't optimize if return value is used.
2388       return nullptr;
2389   }
2390 
2391   // fputs(s,F) --> fwrite(s,strlen(s),1,F)
2392   uint64_t Len = GetStringLength(CI->getArgOperand(0));
2393   if (!Len)
2394     return nullptr;
2395 
2396   // Known to have no uses (see above).
2397   return emitFWrite(
2398       CI->getArgOperand(0),
2399       ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
2400       CI->getArgOperand(1), B, DL, TLI);
2401 }
2402 
2403 Value *LibCallSimplifier::optimizeFPutc(CallInst *CI, IRBuilder<> &B) {
2404   optimizeErrorReporting(CI, B, 1);
2405 
2406   if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI))
2407     return emitFPutCUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B,
2408                              TLI);
2409 
2410   return nullptr;
2411 }
2412 
2413 Value *LibCallSimplifier::optimizeFGetc(CallInst *CI, IRBuilder<> &B) {
2414   if (isLocallyOpenedFile(CI->getArgOperand(0), CI, B, TLI))
2415     return emitFGetCUnlocked(CI->getArgOperand(0), B, TLI);
2416 
2417   return nullptr;
2418 }
2419 
2420 Value *LibCallSimplifier::optimizeFGets(CallInst *CI, IRBuilder<> &B) {
2421   if (isLocallyOpenedFile(CI->getArgOperand(2), CI, B, TLI))
2422     return emitFGetSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
2423                              CI->getArgOperand(2), B, TLI);
2424 
2425   return nullptr;
2426 }
2427 
2428 Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) {
2429   if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI))
2430     return emitFReadUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
2431                              CI->getArgOperand(2), CI->getArgOperand(3), B, DL,
2432                              TLI);
2433 
2434   return nullptr;
2435 }
2436 
2437 Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
2438   if (!CI->use_empty())
2439     return nullptr;
2440 
2441   // Check for a constant string.
2442   // puts("") -> putchar('\n')
2443   StringRef Str;
2444   if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty())
2445     return emitPutChar(B.getInt32('\n'), B, TLI);
2446 
2447   return nullptr;
2448 }
2449 
2450 bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
2451   LibFunc Func;
2452   SmallString<20> FloatFuncName = FuncName;
2453   FloatFuncName += 'f';
2454   if (TLI->getLibFunc(FloatFuncName, Func))
2455     return TLI->has(Func);
2456   return false;
2457 }
2458 
2459 Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
2460                                                       IRBuilder<> &Builder) {
2461   LibFunc Func;
2462   Function *Callee = CI->getCalledFunction();
2463   // Check for string/memory library functions.
2464   if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
2465     // Make sure we never change the calling convention.
2466     assert((ignoreCallingConv(Func) ||
2467             isCallingConvCCompatible(CI)) &&
2468       "Optimizing string/memory libcall would change the calling convention");
2469     switch (Func) {
2470     case LibFunc_strcat:
2471       return optimizeStrCat(CI, Builder);
2472     case LibFunc_strncat:
2473       return optimizeStrNCat(CI, Builder);
2474     case LibFunc_strchr:
2475       return optimizeStrChr(CI, Builder);
2476     case LibFunc_strrchr:
2477       return optimizeStrRChr(CI, Builder);
2478     case LibFunc_strcmp:
2479       return optimizeStrCmp(CI, Builder);
2480     case LibFunc_strncmp:
2481       return optimizeStrNCmp(CI, Builder);
2482     case LibFunc_strcpy:
2483       return optimizeStrCpy(CI, Builder);
2484     case LibFunc_stpcpy:
2485       return optimizeStpCpy(CI, Builder);
2486     case LibFunc_strncpy:
2487       return optimizeStrNCpy(CI, Builder);
2488     case LibFunc_strlen:
2489       return optimizeStrLen(CI, Builder);
2490     case LibFunc_strpbrk:
2491       return optimizeStrPBrk(CI, Builder);
2492     case LibFunc_strtol:
2493     case LibFunc_strtod:
2494     case LibFunc_strtof:
2495     case LibFunc_strtoul:
2496     case LibFunc_strtoll:
2497     case LibFunc_strtold:
2498     case LibFunc_strtoull:
2499       return optimizeStrTo(CI, Builder);
2500     case LibFunc_strspn:
2501       return optimizeStrSpn(CI, Builder);
2502     case LibFunc_strcspn:
2503       return optimizeStrCSpn(CI, Builder);
2504     case LibFunc_strstr:
2505       return optimizeStrStr(CI, Builder);
2506     case LibFunc_memchr:
2507       return optimizeMemChr(CI, Builder);
2508     case LibFunc_memcmp:
2509       return optimizeMemCmp(CI, Builder);
2510     case LibFunc_memcpy:
2511       return optimizeMemCpy(CI, Builder);
2512     case LibFunc_memmove:
2513       return optimizeMemMove(CI, Builder);
2514     case LibFunc_memset:
2515       return optimizeMemSet(CI, Builder);
2516     case LibFunc_realloc:
2517       return optimizeRealloc(CI, Builder);
2518     case LibFunc_wcslen:
2519       return optimizeWcslen(CI, Builder);
2520     default:
2521       break;
2522     }
2523   }
2524   return nullptr;
2525 }
2526 
2527 Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
2528                                                        LibFunc Func,
2529                                                        IRBuilder<> &Builder) {
2530   // Don't optimize calls that require strict floating point semantics.
2531   if (CI->isStrictFP())
2532     return nullptr;
2533 
2534   if (Value *V = optimizeTrigReflections(CI, Func, Builder))
2535     return V;
2536 
2537   switch (Func) {
2538   case LibFunc_sinpif:
2539   case LibFunc_sinpi:
2540   case LibFunc_cospif:
2541   case LibFunc_cospi:
2542     return optimizeSinCosPi(CI, Builder);
2543   case LibFunc_powf:
2544   case LibFunc_pow:
2545   case LibFunc_powl:
2546     return optimizePow(CI, Builder);
2547   case LibFunc_exp2l:
2548   case LibFunc_exp2:
2549   case LibFunc_exp2f:
2550     return optimizeExp2(CI, Builder);
2551   case LibFunc_fabsf:
2552   case LibFunc_fabs:
2553   case LibFunc_fabsl:
2554     return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
2555   case LibFunc_sqrtf:
2556   case LibFunc_sqrt:
2557   case LibFunc_sqrtl:
2558     return optimizeSqrt(CI, Builder);
2559   case LibFunc_log:
2560   case LibFunc_log10:
2561   case LibFunc_log1p:
2562   case LibFunc_log2:
2563   case LibFunc_logb:
2564     return optimizeLog(CI, Builder);
2565   case LibFunc_tan:
2566   case LibFunc_tanf:
2567   case LibFunc_tanl:
2568     return optimizeTan(CI, Builder);
2569   case LibFunc_ceil:
2570     return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
2571   case LibFunc_floor:
2572     return replaceUnaryCall(CI, Builder, Intrinsic::floor);
2573   case LibFunc_round:
2574     return replaceUnaryCall(CI, Builder, Intrinsic::round);
2575   case LibFunc_nearbyint:
2576     return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
2577   case LibFunc_rint:
2578     return replaceUnaryCall(CI, Builder, Intrinsic::rint);
2579   case LibFunc_trunc:
2580     return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
2581   case LibFunc_acos:
2582   case LibFunc_acosh:
2583   case LibFunc_asin:
2584   case LibFunc_asinh:
2585   case LibFunc_atan:
2586   case LibFunc_atanh:
2587   case LibFunc_cbrt:
2588   case LibFunc_cosh:
2589   case LibFunc_exp:
2590   case LibFunc_exp10:
2591   case LibFunc_expm1:
2592   case LibFunc_cos:
2593   case LibFunc_sin:
2594   case LibFunc_sinh:
2595   case LibFunc_tanh:
2596     if (UnsafeFPShrink && hasFloatVersion(CI->getCalledFunction()->getName()))
2597       return optimizeUnaryDoubleFP(CI, Builder, true);
2598     return nullptr;
2599   case LibFunc_copysign:
2600     if (hasFloatVersion(CI->getCalledFunction()->getName()))
2601       return optimizeBinaryDoubleFP(CI, Builder);
2602     return nullptr;
2603   case LibFunc_fminf:
2604   case LibFunc_fmin:
2605   case LibFunc_fminl:
2606   case LibFunc_fmaxf:
2607   case LibFunc_fmax:
2608   case LibFunc_fmaxl:
2609     return optimizeFMinFMax(CI, Builder);
2610   case LibFunc_cabs:
2611   case LibFunc_cabsf:
2612   case LibFunc_cabsl:
2613     return optimizeCAbs(CI, Builder);
2614   default:
2615     return nullptr;
2616   }
2617 }
2618 
2619 Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
2620   // TODO: Split out the code below that operates on FP calls so that
2621   //       we can all non-FP calls with the StrictFP attribute to be
2622   //       optimized.
2623   if (CI->isNoBuiltin())
2624     return nullptr;
2625 
2626   LibFunc Func;
2627   Function *Callee = CI->getCalledFunction();
2628 
2629   SmallVector<OperandBundleDef, 2> OpBundles;
2630   CI->getOperandBundlesAsDefs(OpBundles);
2631   IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
2632   bool isCallingConvC = isCallingConvCCompatible(CI);
2633 
2634   // Command-line parameter overrides instruction attribute.
2635   // This can't be moved to optimizeFloatingPointLibCall() because it may be
2636   // used by the intrinsic optimizations.
2637   if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
2638     UnsafeFPShrink = EnableUnsafeFPShrink;
2639   else if (isa<FPMathOperator>(CI) && CI->isFast())
2640     UnsafeFPShrink = true;
2641 
2642   // First, check for intrinsics.
2643   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
2644     if (!isCallingConvC)
2645       return nullptr;
2646     // The FP intrinsics have corresponding constrained versions so we don't
2647     // need to check for the StrictFP attribute here.
2648     switch (II->getIntrinsicID()) {
2649     case Intrinsic::pow:
2650       return optimizePow(CI, Builder);
2651     case Intrinsic::exp2:
2652       return optimizeExp2(CI, Builder);
2653     case Intrinsic::log:
2654       return optimizeLog(CI, Builder);
2655     case Intrinsic::sqrt:
2656       return optimizeSqrt(CI, Builder);
2657     // TODO: Use foldMallocMemset() with memset intrinsic.
2658     default:
2659       return nullptr;
2660     }
2661   }
2662 
2663   // Also try to simplify calls to fortified library functions.
2664   if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) {
2665     // Try to further simplify the result.
2666     CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI);
2667     if (SimplifiedCI && SimplifiedCI->getCalledFunction()) {
2668       // Use an IR Builder from SimplifiedCI if available instead of CI
2669       // to guarantee we reach all uses we might replace later on.
2670       IRBuilder<> TmpBuilder(SimplifiedCI);
2671       if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) {
2672         // If we were able to further simplify, remove the now redundant call.
2673         SimplifiedCI->replaceAllUsesWith(V);
2674         eraseFromParent(SimplifiedCI);
2675         return V;
2676       }
2677     }
2678     return SimplifiedFortifiedCI;
2679   }
2680 
2681   // Then check for known library functions.
2682   if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
2683     // We never change the calling convention.
2684     if (!ignoreCallingConv(Func) && !isCallingConvC)
2685       return nullptr;
2686     if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
2687       return V;
2688     if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder))
2689       return V;
2690     switch (Func) {
2691     case LibFunc_ffs:
2692     case LibFunc_ffsl:
2693     case LibFunc_ffsll:
2694       return optimizeFFS(CI, Builder);
2695     case LibFunc_fls:
2696     case LibFunc_flsl:
2697     case LibFunc_flsll:
2698       return optimizeFls(CI, Builder);
2699     case LibFunc_abs:
2700     case LibFunc_labs:
2701     case LibFunc_llabs:
2702       return optimizeAbs(CI, Builder);
2703     case LibFunc_isdigit:
2704       return optimizeIsDigit(CI, Builder);
2705     case LibFunc_isascii:
2706       return optimizeIsAscii(CI, Builder);
2707     case LibFunc_toascii:
2708       return optimizeToAscii(CI, Builder);
2709     case LibFunc_atoi:
2710     case LibFunc_atol:
2711     case LibFunc_atoll:
2712       return optimizeAtoi(CI, Builder);
2713     case LibFunc_strtol:
2714     case LibFunc_strtoll:
2715       return optimizeStrtol(CI, Builder);
2716     case LibFunc_printf:
2717       return optimizePrintF(CI, Builder);
2718     case LibFunc_sprintf:
2719       return optimizeSPrintF(CI, Builder);
2720     case LibFunc_snprintf:
2721       return optimizeSnPrintF(CI, Builder);
2722     case LibFunc_fprintf:
2723       return optimizeFPrintF(CI, Builder);
2724     case LibFunc_fwrite:
2725       return optimizeFWrite(CI, Builder);
2726     case LibFunc_fread:
2727       return optimizeFRead(CI, Builder);
2728     case LibFunc_fputs:
2729       return optimizeFPuts(CI, Builder);
2730     case LibFunc_fgets:
2731       return optimizeFGets(CI, Builder);
2732     case LibFunc_fputc:
2733       return optimizeFPutc(CI, Builder);
2734     case LibFunc_fgetc:
2735       return optimizeFGetc(CI, Builder);
2736     case LibFunc_puts:
2737       return optimizePuts(CI, Builder);
2738     case LibFunc_perror:
2739       return optimizeErrorReporting(CI, Builder);
2740     case LibFunc_vfprintf:
2741     case LibFunc_fiprintf:
2742       return optimizeErrorReporting(CI, Builder, 0);
2743     default:
2744       return nullptr;
2745     }
2746   }
2747   return nullptr;
2748 }
2749 
2750 LibCallSimplifier::LibCallSimplifier(
2751     const DataLayout &DL, const TargetLibraryInfo *TLI,
2752     OptimizationRemarkEmitter &ORE,
2753     function_ref<void(Instruction *, Value *)> Replacer,
2754     function_ref<void(Instruction *)> Eraser)
2755     : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
2756       UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
2757 
2758 void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
2759   // Indirect through the replacer used in this instance.
2760   Replacer(I, With);
2761 }
2762 
2763 void LibCallSimplifier::eraseFromParent(Instruction *I) {
2764   Eraser(I);
2765 }
2766 
2767 // TODO:
2768 //   Additional cases that we need to add to this file:
2769 //
2770 // cbrt:
2771 //   * cbrt(expN(X))  -> expN(x/3)
2772 //   * cbrt(sqrt(x))  -> pow(x,1/6)
2773 //   * cbrt(cbrt(x))  -> pow(x,1/9)
2774 //
2775 // exp, expf, expl:
2776 //   * exp(log(x))  -> x
2777 //
2778 // log, logf, logl:
2779 //   * log(exp(x))   -> x
2780 //   * log(exp(y))   -> y*log(e)
2781 //   * log(exp10(y)) -> y*log(10)
2782 //   * log(sqrt(x))  -> 0.5*log(x)
2783 //
2784 // pow, powf, powl:
2785 //   * pow(sqrt(x),y) -> pow(x,y*0.5)
2786 //   * pow(pow(x,y),z)-> pow(x,y*z)
2787 //
2788 // signbit:
2789 //   * signbit(cnst) -> cnst'
2790 //   * signbit(nncst) -> 0 (if pstv is a non-negative constant)
2791 //
2792 // sqrt, sqrtf, sqrtl:
2793 //   * sqrt(expN(x))  -> expN(x*0.5)
2794 //   * sqrt(Nroot(x)) -> pow(x,1/(2*N))
2795 //   * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
2796 //
2797 
2798 //===----------------------------------------------------------------------===//
2799 // Fortified Library Call Optimizations
2800 //===----------------------------------------------------------------------===//
2801 
2802 bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
2803                                                          unsigned ObjSizeOp,
2804                                                          unsigned SizeOp,
2805                                                          bool isString) {
2806   if (CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(SizeOp))
2807     return true;
2808   if (ConstantInt *ObjSizeCI =
2809           dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
2810     if (ObjSizeCI->isMinusOne())
2811       return true;
2812     // If the object size wasn't -1 (unknown), bail out if we were asked to.
2813     if (OnlyLowerUnknownSize)
2814       return false;
2815     if (isString) {
2816       uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp));
2817       // If the length is 0 we don't know how long it is and so we can't
2818       // remove the check.
2819       if (Len == 0)
2820         return false;
2821       return ObjSizeCI->getZExtValue() >= Len;
2822     }
2823     if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeOp)))
2824       return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
2825   }
2826   return false;
2827 }
2828 
2829 Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
2830                                                      IRBuilder<> &B) {
2831   if (isFortifiedCallFoldable(CI, 3, 2, false)) {
2832     B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
2833                    CI->getArgOperand(2));
2834     return CI->getArgOperand(0);
2835   }
2836   return nullptr;
2837 }
2838 
2839 Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
2840                                                       IRBuilder<> &B) {
2841   if (isFortifiedCallFoldable(CI, 3, 2, false)) {
2842     B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
2843                     CI->getArgOperand(2));
2844     return CI->getArgOperand(0);
2845   }
2846   return nullptr;
2847 }
2848 
2849 Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
2850                                                      IRBuilder<> &B) {
2851   // TODO: Try foldMallocMemset() here.
2852 
2853   if (isFortifiedCallFoldable(CI, 3, 2, false)) {
2854     Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
2855     B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
2856     return CI->getArgOperand(0);
2857   }
2858   return nullptr;
2859 }
2860 
2861 Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
2862                                                       IRBuilder<> &B,
2863                                                       LibFunc Func) {
2864   Function *Callee = CI->getCalledFunction();
2865   StringRef Name = Callee->getName();
2866   const DataLayout &DL = CI->getModule()->getDataLayout();
2867   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
2868         *ObjSize = CI->getArgOperand(2);
2869 
2870   // __stpcpy_chk(x,x,...)  -> x+strlen(x)
2871   if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
2872     Value *StrLen = emitStrLen(Src, B, DL, TLI);
2873     return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
2874   }
2875 
2876   // If a) we don't have any length information, or b) we know this will
2877   // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
2878   // st[rp]cpy_chk call which may fail at runtime if the size is too long.
2879   // TODO: It might be nice to get a maximum length out of the possible
2880   // string lengths for varying.
2881   if (isFortifiedCallFoldable(CI, 2, 1, true))
2882     return emitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
2883 
2884   if (OnlyLowerUnknownSize)
2885     return nullptr;
2886 
2887   // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
2888   uint64_t Len = GetStringLength(Src);
2889   if (Len == 0)
2890     return nullptr;
2891 
2892   Type *SizeTTy = DL.getIntPtrType(CI->getContext());
2893   Value *LenV = ConstantInt::get(SizeTTy, Len);
2894   Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
2895   // If the function was an __stpcpy_chk, and we were able to fold it into
2896   // a __memcpy_chk, we still need to return the correct end pointer.
2897   if (Ret && Func == LibFunc_stpcpy_chk)
2898     return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1));
2899   return Ret;
2900 }
2901 
2902 Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
2903                                                        IRBuilder<> &B,
2904                                                        LibFunc Func) {
2905   Function *Callee = CI->getCalledFunction();
2906   StringRef Name = Callee->getName();
2907   if (isFortifiedCallFoldable(CI, 3, 2, false)) {
2908     Value *Ret = emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
2909                              CI->getArgOperand(2), B, TLI, Name.substr(2, 7));
2910     return Ret;
2911   }
2912   return nullptr;
2913 }
2914 
2915 Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
2916   // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
2917   // Some clang users checked for _chk libcall availability using:
2918   //   __has_builtin(__builtin___memcpy_chk)
2919   // When compiling with -fno-builtin, this is always true.
2920   // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
2921   // end up with fortified libcalls, which isn't acceptable in a freestanding
2922   // environment which only provides their non-fortified counterparts.
2923   //
2924   // Until we change clang and/or teach external users to check for availability
2925   // differently, disregard the "nobuiltin" attribute and TLI::has.
2926   //
2927   // PR23093.
2928 
2929   LibFunc Func;
2930   Function *Callee = CI->getCalledFunction();
2931 
2932   SmallVector<OperandBundleDef, 2> OpBundles;
2933   CI->getOperandBundlesAsDefs(OpBundles);
2934   IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
2935   bool isCallingConvC = isCallingConvCCompatible(CI);
2936 
2937   // First, check that this is a known library functions and that the prototype
2938   // is correct.
2939   if (!TLI->getLibFunc(*Callee, Func))
2940     return nullptr;
2941 
2942   // We never change the calling convention.
2943   if (!ignoreCallingConv(Func) && !isCallingConvC)
2944     return nullptr;
2945 
2946   switch (Func) {
2947   case LibFunc_memcpy_chk:
2948     return optimizeMemCpyChk(CI, Builder);
2949   case LibFunc_memmove_chk:
2950     return optimizeMemMoveChk(CI, Builder);
2951   case LibFunc_memset_chk:
2952     return optimizeMemSetChk(CI, Builder);
2953   case LibFunc_stpcpy_chk:
2954   case LibFunc_strcpy_chk:
2955     return optimizeStrpCpyChk(CI, Builder, Func);
2956   case LibFunc_stpncpy_chk:
2957   case LibFunc_strncpy_chk:
2958     return optimizeStrpNCpyChk(CI, Builder, Func);
2959   default:
2960     break;
2961   }
2962   return nullptr;
2963 }
2964 
2965 FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
2966     const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
2967     : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
2968