12cab237bSDimitry Andric //===- AMDGPULibCalls.cpp -------------------------------------------------===//
22cab237bSDimitry Andric //
32cab237bSDimitry Andric //                     The LLVM Compiler Infrastructure
42cab237bSDimitry Andric //
52cab237bSDimitry Andric // This file is distributed under the University of Illinois Open Source
62cab237bSDimitry Andric // License. See LICENSE.TXT for details.
72cab237bSDimitry Andric //
82cab237bSDimitry Andric //===----------------------------------------------------------------------===//
92cab237bSDimitry Andric //
102cab237bSDimitry Andric /// \file
114ba319b5SDimitry Andric /// This file does AMD library function optimizations.
122cab237bSDimitry Andric //
132cab237bSDimitry Andric //===----------------------------------------------------------------------===//
142cab237bSDimitry Andric 
152cab237bSDimitry Andric #define DEBUG_TYPE "amdgpu-simplifylib"
162cab237bSDimitry Andric 
172cab237bSDimitry Andric #include "AMDGPU.h"
182cab237bSDimitry Andric #include "AMDGPULibFunc.h"
192cab237bSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
202cab237bSDimitry Andric #include "llvm/Analysis/Loads.h"
212cab237bSDimitry Andric #include "llvm/ADT/StringSet.h"
222cab237bSDimitry Andric #include "llvm/ADT/StringRef.h"
232cab237bSDimitry Andric #include "llvm/IR/Constants.h"
242cab237bSDimitry Andric #include "llvm/IR/DerivedTypes.h"
252cab237bSDimitry Andric #include "llvm/IR/Instructions.h"
262cab237bSDimitry Andric #include "llvm/IR/IRBuilder.h"
272cab237bSDimitry Andric #include "llvm/IR/Function.h"
282cab237bSDimitry Andric #include "llvm/IR/LLVMContext.h"
292cab237bSDimitry Andric #include "llvm/IR/Module.h"
302cab237bSDimitry Andric #include "llvm/IR/ValueSymbolTable.h"
312cab237bSDimitry Andric #include "llvm/Support/Debug.h"
322cab237bSDimitry Andric #include "llvm/Support/raw_ostream.h"
332cab237bSDimitry Andric #include "llvm/Target/TargetOptions.h"
342cab237bSDimitry Andric #include <vector>
352cab237bSDimitry Andric #include <cmath>
362cab237bSDimitry Andric 
372cab237bSDimitry Andric using namespace llvm;
382cab237bSDimitry Andric 
392cab237bSDimitry Andric static cl::opt<bool> EnablePreLink("amdgpu-prelink",
402cab237bSDimitry Andric   cl::desc("Enable pre-link mode optimizations"),
412cab237bSDimitry Andric   cl::init(false),
422cab237bSDimitry Andric   cl::Hidden);
432cab237bSDimitry Andric 
442cab237bSDimitry Andric static cl::list<std::string> UseNative("amdgpu-use-native",
452cab237bSDimitry Andric   cl::desc("Comma separated list of functions to replace with native, or all"),
462cab237bSDimitry Andric   cl::CommaSeparated, cl::ValueOptional,
472cab237bSDimitry Andric   cl::Hidden);
482cab237bSDimitry Andric 
492cab237bSDimitry Andric #define MATH_PI     3.14159265358979323846264338327950288419716939937511
502cab237bSDimitry Andric #define MATH_E      2.71828182845904523536028747135266249775724709369996
512cab237bSDimitry Andric #define MATH_SQRT2  1.41421356237309504880168872420969807856967187537695
522cab237bSDimitry Andric 
532cab237bSDimitry Andric #define MATH_LOG2E     1.4426950408889634073599246810018921374266459541529859
542cab237bSDimitry Andric #define MATH_LOG10E    0.4342944819032518276511289189166050822943970058036665
552cab237bSDimitry Andric // Value of log2(10)
562cab237bSDimitry Andric #define MATH_LOG2_10   3.3219280948873623478703194294893901758648313930245806
572cab237bSDimitry Andric // Value of 1 / log2(10)
582cab237bSDimitry Andric #define MATH_RLOG2_10  0.3010299956639811952137388947244930267681898814621085
592cab237bSDimitry Andric // Value of 1 / M_LOG2E_F = 1 / log2(e)
602cab237bSDimitry Andric #define MATH_RLOG2_E   0.6931471805599453094172321214581765680755001343602552
612cab237bSDimitry Andric 
622cab237bSDimitry Andric namespace llvm {
632cab237bSDimitry Andric 
642cab237bSDimitry Andric class AMDGPULibCalls {
652cab237bSDimitry Andric private:
662cab237bSDimitry Andric 
672cab237bSDimitry Andric   typedef llvm::AMDGPULibFunc FuncInfo;
682cab237bSDimitry Andric 
692cab237bSDimitry Andric   // -fuse-native.
702cab237bSDimitry Andric   bool AllNative = false;
712cab237bSDimitry Andric 
722cab237bSDimitry Andric   bool useNativeFunc(const StringRef F) const;
732cab237bSDimitry Andric 
742cab237bSDimitry Andric   // Return a pointer (pointer expr) to the function if function defintion with
752cab237bSDimitry Andric   // "FuncName" exists. It may create a new function prototype in pre-link mode.
762cab237bSDimitry Andric   Constant *getFunction(Module *M, const FuncInfo& fInfo);
772cab237bSDimitry Andric 
782cab237bSDimitry Andric   // Replace a normal function with its native version.
792cab237bSDimitry Andric   bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
802cab237bSDimitry Andric 
812cab237bSDimitry Andric   bool parseFunctionName(const StringRef& FMangledName,
822cab237bSDimitry Andric                          FuncInfo *FInfo=nullptr /*out*/);
832cab237bSDimitry Andric 
842cab237bSDimitry Andric   bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
852cab237bSDimitry Andric 
862cab237bSDimitry Andric   /* Specialized optimizations */
872cab237bSDimitry Andric 
882cab237bSDimitry Andric   // recip (half or native)
892cab237bSDimitry Andric   bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
902cab237bSDimitry Andric 
912cab237bSDimitry Andric   // divide (half or native)
922cab237bSDimitry Andric   bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
932cab237bSDimitry Andric 
942cab237bSDimitry Andric   // pow/powr/pown
952cab237bSDimitry Andric   bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
962cab237bSDimitry Andric 
972cab237bSDimitry Andric   // rootn
982cab237bSDimitry Andric   bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
992cab237bSDimitry Andric 
1002cab237bSDimitry Andric   // fma/mad
1012cab237bSDimitry Andric   bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1022cab237bSDimitry Andric 
1032cab237bSDimitry Andric   // -fuse-native for sincos
1042cab237bSDimitry Andric   bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
1052cab237bSDimitry Andric 
1062cab237bSDimitry Andric   // evaluate calls if calls' arguments are constants.
1072cab237bSDimitry Andric   bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
1082cab237bSDimitry Andric     double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
1092cab237bSDimitry Andric   bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);
1102cab237bSDimitry Andric 
1112cab237bSDimitry Andric   // exp
1122cab237bSDimitry Andric   bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1132cab237bSDimitry Andric 
1142cab237bSDimitry Andric   // exp2
1152cab237bSDimitry Andric   bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1162cab237bSDimitry Andric 
1172cab237bSDimitry Andric   // exp10
1182cab237bSDimitry Andric   bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1192cab237bSDimitry Andric 
1202cab237bSDimitry Andric   // log
1212cab237bSDimitry Andric   bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1222cab237bSDimitry Andric 
1232cab237bSDimitry Andric   // log2
1242cab237bSDimitry Andric   bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1252cab237bSDimitry Andric 
1262cab237bSDimitry Andric   // log10
1272cab237bSDimitry Andric   bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1282cab237bSDimitry Andric 
1292cab237bSDimitry Andric   // sqrt
1302cab237bSDimitry Andric   bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1312cab237bSDimitry Andric 
1322cab237bSDimitry Andric   // sin/cos
1332cab237bSDimitry Andric   bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
1342cab237bSDimitry Andric 
1352cab237bSDimitry Andric   // __read_pipe/__write_pipe
1362cab237bSDimitry Andric   bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
1372cab237bSDimitry Andric 
1382cab237bSDimitry Andric   // Get insertion point at entry.
1392cab237bSDimitry Andric   BasicBlock::iterator getEntryIns(CallInst * UI);
1402cab237bSDimitry Andric   // Insert an Alloc instruction.
1412cab237bSDimitry Andric   AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
1422cab237bSDimitry Andric   // Get a scalar native builtin signle argument FP function
1432cab237bSDimitry Andric   Constant* getNativeFunction(Module* M, const FuncInfo &FInfo);
1442cab237bSDimitry Andric 
1452cab237bSDimitry Andric protected:
1462cab237bSDimitry Andric   CallInst *CI;
1472cab237bSDimitry Andric 
1482cab237bSDimitry Andric   bool isUnsafeMath(const CallInst *CI) const;
1492cab237bSDimitry Andric 
replaceCall(Value * With)1502cab237bSDimitry Andric   void replaceCall(Value *With) {
1512cab237bSDimitry Andric     CI->replaceAllUsesWith(With);
1522cab237bSDimitry Andric     CI->eraseFromParent();
1532cab237bSDimitry Andric   }
1542cab237bSDimitry Andric 
1552cab237bSDimitry Andric public:
1562cab237bSDimitry Andric   bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
1572cab237bSDimitry Andric 
1582cab237bSDimitry Andric   void initNativeFuncs();
1592cab237bSDimitry Andric 
1602cab237bSDimitry Andric   // Replace a normal math function call with that native version
1612cab237bSDimitry Andric   bool useNative(CallInst *CI);
1622cab237bSDimitry Andric };
1632cab237bSDimitry Andric 
1642cab237bSDimitry Andric } // end llvm namespace
1652cab237bSDimitry Andric 
1662cab237bSDimitry Andric namespace {
1672cab237bSDimitry Andric 
1682cab237bSDimitry Andric   class AMDGPUSimplifyLibCalls : public FunctionPass {
1692cab237bSDimitry Andric 
1702cab237bSDimitry Andric   AMDGPULibCalls Simplifier;
1712cab237bSDimitry Andric 
1722cab237bSDimitry Andric   const TargetOptions Options;
1732cab237bSDimitry Andric 
1742cab237bSDimitry Andric   public:
1752cab237bSDimitry Andric     static char ID; // Pass identification
1762cab237bSDimitry Andric 
AMDGPUSimplifyLibCalls(const TargetOptions & Opt=TargetOptions ())1772cab237bSDimitry Andric     AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
1782cab237bSDimitry Andric       : FunctionPass(ID), Options(Opt) {
1792cab237bSDimitry Andric       initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
1802cab237bSDimitry Andric     }
1812cab237bSDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const1822cab237bSDimitry Andric     void getAnalysisUsage(AnalysisUsage &AU) const override {
1832cab237bSDimitry Andric       AU.addRequired<AAResultsWrapperPass>();
1842cab237bSDimitry Andric     }
1852cab237bSDimitry Andric 
1862cab237bSDimitry Andric     bool runOnFunction(Function &M) override;
1872cab237bSDimitry Andric   };
1882cab237bSDimitry Andric 
1892cab237bSDimitry Andric   class AMDGPUUseNativeCalls : public FunctionPass {
1902cab237bSDimitry Andric 
1912cab237bSDimitry Andric   AMDGPULibCalls Simplifier;
1922cab237bSDimitry Andric 
1932cab237bSDimitry Andric   public:
1942cab237bSDimitry Andric     static char ID; // Pass identification
1952cab237bSDimitry Andric 
AMDGPUUseNativeCalls()1962cab237bSDimitry Andric     AMDGPUUseNativeCalls() : FunctionPass(ID) {
1972cab237bSDimitry Andric       initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
1982cab237bSDimitry Andric       Simplifier.initNativeFuncs();
1992cab237bSDimitry Andric     }
2002cab237bSDimitry Andric 
2012cab237bSDimitry Andric     bool runOnFunction(Function &F) override;
2022cab237bSDimitry Andric   };
2032cab237bSDimitry Andric 
2042cab237bSDimitry Andric } // end anonymous namespace.
2052cab237bSDimitry Andric 
2062cab237bSDimitry Andric char AMDGPUSimplifyLibCalls::ID = 0;
2072cab237bSDimitry Andric char AMDGPUUseNativeCalls::ID = 0;
2082cab237bSDimitry Andric 
2092cab237bSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
2102cab237bSDimitry Andric                       "Simplify well-known AMD library calls", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)2112cab237bSDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2122cab237bSDimitry Andric INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
2132cab237bSDimitry Andric                     "Simplify well-known AMD library calls", false, false)
2142cab237bSDimitry Andric 
2152cab237bSDimitry Andric INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
2162cab237bSDimitry Andric                 "Replace builtin math calls with that native versions.",
2172cab237bSDimitry Andric                 false, false)
2182cab237bSDimitry Andric 
2192cab237bSDimitry Andric template <typename IRB>
2202cab237bSDimitry Andric static CallInst *CreateCallEx(IRB &B, Value *Callee, Value *Arg,
2212cab237bSDimitry Andric                               const Twine &Name = "") {
2222cab237bSDimitry Andric   CallInst *R = B.CreateCall(Callee, Arg, Name);
2232cab237bSDimitry Andric   if (Function* F = dyn_cast<Function>(Callee))
2242cab237bSDimitry Andric     R->setCallingConv(F->getCallingConv());
2252cab237bSDimitry Andric   return R;
2262cab237bSDimitry Andric }
2272cab237bSDimitry Andric 
2282cab237bSDimitry Andric template <typename IRB>
CreateCallEx2(IRB & B,Value * Callee,Value * Arg1,Value * Arg2,const Twine & Name="")2292cab237bSDimitry Andric static CallInst *CreateCallEx2(IRB &B, Value *Callee, Value *Arg1, Value *Arg2,
2302cab237bSDimitry Andric                                const Twine &Name = "") {
2312cab237bSDimitry Andric   CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
2322cab237bSDimitry Andric   if (Function* F = dyn_cast<Function>(Callee))
2332cab237bSDimitry Andric     R->setCallingConv(F->getCallingConv());
2342cab237bSDimitry Andric   return R;
2352cab237bSDimitry Andric }
2362cab237bSDimitry Andric 
2372cab237bSDimitry Andric //  Data structures for table-driven optimizations.
2382cab237bSDimitry Andric //  FuncTbl works for both f32 and f64 functions with 1 input argument
2392cab237bSDimitry Andric 
2402cab237bSDimitry Andric struct TableEntry {
2412cab237bSDimitry Andric   double   result;
2422cab237bSDimitry Andric   double   input;
2432cab237bSDimitry Andric };
2442cab237bSDimitry Andric 
2452cab237bSDimitry Andric /* a list of {result, input} */
2462cab237bSDimitry Andric static const TableEntry tbl_acos[] = {
2472cab237bSDimitry Andric   {MATH_PI/2.0, 0.0},
2482cab237bSDimitry Andric   {MATH_PI/2.0, -0.0},
2492cab237bSDimitry Andric   {0.0, 1.0},
2502cab237bSDimitry Andric   {MATH_PI, -1.0}
2512cab237bSDimitry Andric };
2522cab237bSDimitry Andric static const TableEntry tbl_acosh[] = {
2532cab237bSDimitry Andric   {0.0, 1.0}
2542cab237bSDimitry Andric };
2552cab237bSDimitry Andric static const TableEntry tbl_acospi[] = {
2562cab237bSDimitry Andric   {0.5, 0.0},
2572cab237bSDimitry Andric   {0.5, -0.0},
2582cab237bSDimitry Andric   {0.0, 1.0},
2592cab237bSDimitry Andric   {1.0, -1.0}
2602cab237bSDimitry Andric };
2612cab237bSDimitry Andric static const TableEntry tbl_asin[] = {
2622cab237bSDimitry Andric   {0.0, 0.0},
2632cab237bSDimitry Andric   {-0.0, -0.0},
2642cab237bSDimitry Andric   {MATH_PI/2.0, 1.0},
2652cab237bSDimitry Andric   {-MATH_PI/2.0, -1.0}
2662cab237bSDimitry Andric };
2672cab237bSDimitry Andric static const TableEntry tbl_asinh[] = {
2682cab237bSDimitry Andric   {0.0, 0.0},
2692cab237bSDimitry Andric   {-0.0, -0.0}
2702cab237bSDimitry Andric };
2712cab237bSDimitry Andric static const TableEntry tbl_asinpi[] = {
2722cab237bSDimitry Andric   {0.0, 0.0},
2732cab237bSDimitry Andric   {-0.0, -0.0},
2742cab237bSDimitry Andric   {0.5, 1.0},
2752cab237bSDimitry Andric   {-0.5, -1.0}
2762cab237bSDimitry Andric };
2772cab237bSDimitry Andric static const TableEntry tbl_atan[] = {
2782cab237bSDimitry Andric   {0.0, 0.0},
2792cab237bSDimitry Andric   {-0.0, -0.0},
2802cab237bSDimitry Andric   {MATH_PI/4.0, 1.0},
2812cab237bSDimitry Andric   {-MATH_PI/4.0, -1.0}
2822cab237bSDimitry Andric };
2832cab237bSDimitry Andric static const TableEntry tbl_atanh[] = {
2842cab237bSDimitry Andric   {0.0, 0.0},
2852cab237bSDimitry Andric   {-0.0, -0.0}
2862cab237bSDimitry Andric };
2872cab237bSDimitry Andric static const TableEntry tbl_atanpi[] = {
2882cab237bSDimitry Andric   {0.0, 0.0},
2892cab237bSDimitry Andric   {-0.0, -0.0},
2902cab237bSDimitry Andric   {0.25, 1.0},
2912cab237bSDimitry Andric   {-0.25, -1.0}
2922cab237bSDimitry Andric };
2932cab237bSDimitry Andric static const TableEntry tbl_cbrt[] = {
2942cab237bSDimitry Andric   {0.0, 0.0},
2952cab237bSDimitry Andric   {-0.0, -0.0},
2962cab237bSDimitry Andric   {1.0, 1.0},
2972cab237bSDimitry Andric   {-1.0, -1.0},
2982cab237bSDimitry Andric };
2992cab237bSDimitry Andric static const TableEntry tbl_cos[] = {
3002cab237bSDimitry Andric   {1.0, 0.0},
3012cab237bSDimitry Andric   {1.0, -0.0}
3022cab237bSDimitry Andric };
3032cab237bSDimitry Andric static const TableEntry tbl_cosh[] = {
3042cab237bSDimitry Andric   {1.0, 0.0},
3052cab237bSDimitry Andric   {1.0, -0.0}
3062cab237bSDimitry Andric };
3072cab237bSDimitry Andric static const TableEntry tbl_cospi[] = {
3082cab237bSDimitry Andric   {1.0, 0.0},
3092cab237bSDimitry Andric   {1.0, -0.0}
3102cab237bSDimitry Andric };
3112cab237bSDimitry Andric static const TableEntry tbl_erfc[] = {
3122cab237bSDimitry Andric   {1.0, 0.0},
3132cab237bSDimitry Andric   {1.0, -0.0}
3142cab237bSDimitry Andric };
3152cab237bSDimitry Andric static const TableEntry tbl_erf[] = {
3162cab237bSDimitry Andric   {0.0, 0.0},
3172cab237bSDimitry Andric   {-0.0, -0.0}
3182cab237bSDimitry Andric };
3192cab237bSDimitry Andric static const TableEntry tbl_exp[] = {
3202cab237bSDimitry Andric   {1.0, 0.0},
3212cab237bSDimitry Andric   {1.0, -0.0},
3222cab237bSDimitry Andric   {MATH_E, 1.0}
3232cab237bSDimitry Andric };
3242cab237bSDimitry Andric static const TableEntry tbl_exp2[] = {
3252cab237bSDimitry Andric   {1.0, 0.0},
3262cab237bSDimitry Andric   {1.0, -0.0},
3272cab237bSDimitry Andric   {2.0, 1.0}
3282cab237bSDimitry Andric };
3292cab237bSDimitry Andric static const TableEntry tbl_exp10[] = {
3302cab237bSDimitry Andric   {1.0, 0.0},
3312cab237bSDimitry Andric   {1.0, -0.0},
3322cab237bSDimitry Andric   {10.0, 1.0}
3332cab237bSDimitry Andric };
3342cab237bSDimitry Andric static const TableEntry tbl_expm1[] = {
3352cab237bSDimitry Andric   {0.0, 0.0},
3362cab237bSDimitry Andric   {-0.0, -0.0}
3372cab237bSDimitry Andric };
3382cab237bSDimitry Andric static const TableEntry tbl_log[] = {
3392cab237bSDimitry Andric   {0.0, 1.0},
3402cab237bSDimitry Andric   {1.0, MATH_E}
3412cab237bSDimitry Andric };
3422cab237bSDimitry Andric static const TableEntry tbl_log2[] = {
3432cab237bSDimitry Andric   {0.0, 1.0},
3442cab237bSDimitry Andric   {1.0, 2.0}
3452cab237bSDimitry Andric };
3462cab237bSDimitry Andric static const TableEntry tbl_log10[] = {
3472cab237bSDimitry Andric   {0.0, 1.0},
3482cab237bSDimitry Andric   {1.0, 10.0}
3492cab237bSDimitry Andric };
3502cab237bSDimitry Andric static const TableEntry tbl_rsqrt[] = {
3512cab237bSDimitry Andric   {1.0, 1.0},
3522cab237bSDimitry Andric   {1.0/MATH_SQRT2, 2.0}
3532cab237bSDimitry Andric };
3542cab237bSDimitry Andric static const TableEntry tbl_sin[] = {
3552cab237bSDimitry Andric   {0.0, 0.0},
3562cab237bSDimitry Andric   {-0.0, -0.0}
3572cab237bSDimitry Andric };
3582cab237bSDimitry Andric static const TableEntry tbl_sinh[] = {
3592cab237bSDimitry Andric   {0.0, 0.0},
3602cab237bSDimitry Andric   {-0.0, -0.0}
3612cab237bSDimitry Andric };
3622cab237bSDimitry Andric static const TableEntry tbl_sinpi[] = {
3632cab237bSDimitry Andric   {0.0, 0.0},
3642cab237bSDimitry Andric   {-0.0, -0.0}
3652cab237bSDimitry Andric };
3662cab237bSDimitry Andric static const TableEntry tbl_sqrt[] = {
3672cab237bSDimitry Andric   {0.0, 0.0},
3682cab237bSDimitry Andric   {1.0, 1.0},
3692cab237bSDimitry Andric   {MATH_SQRT2, 2.0}
3702cab237bSDimitry Andric };
3712cab237bSDimitry Andric static const TableEntry tbl_tan[] = {
3722cab237bSDimitry Andric   {0.0, 0.0},
3732cab237bSDimitry Andric   {-0.0, -0.0}
3742cab237bSDimitry Andric };
3752cab237bSDimitry Andric static const TableEntry tbl_tanh[] = {
3762cab237bSDimitry Andric   {0.0, 0.0},
3772cab237bSDimitry Andric   {-0.0, -0.0}
3782cab237bSDimitry Andric };
3792cab237bSDimitry Andric static const TableEntry tbl_tanpi[] = {
3802cab237bSDimitry Andric   {0.0, 0.0},
3812cab237bSDimitry Andric   {-0.0, -0.0}
3822cab237bSDimitry Andric };
3832cab237bSDimitry Andric static const TableEntry tbl_tgamma[] = {
3842cab237bSDimitry Andric   {1.0, 1.0},
3852cab237bSDimitry Andric   {1.0, 2.0},
3862cab237bSDimitry Andric   {2.0, 3.0},
3872cab237bSDimitry Andric   {6.0, 4.0}
3882cab237bSDimitry Andric };
3892cab237bSDimitry Andric 
HasNative(AMDGPULibFunc::EFuncId id)3902cab237bSDimitry Andric static bool HasNative(AMDGPULibFunc::EFuncId id) {
3912cab237bSDimitry Andric   switch(id) {
3922cab237bSDimitry Andric   case AMDGPULibFunc::EI_DIVIDE:
3932cab237bSDimitry Andric   case AMDGPULibFunc::EI_COS:
3942cab237bSDimitry Andric   case AMDGPULibFunc::EI_EXP:
3952cab237bSDimitry Andric   case AMDGPULibFunc::EI_EXP2:
3962cab237bSDimitry Andric   case AMDGPULibFunc::EI_EXP10:
3972cab237bSDimitry Andric   case AMDGPULibFunc::EI_LOG:
3982cab237bSDimitry Andric   case AMDGPULibFunc::EI_LOG2:
3992cab237bSDimitry Andric   case AMDGPULibFunc::EI_LOG10:
4002cab237bSDimitry Andric   case AMDGPULibFunc::EI_POWR:
4012cab237bSDimitry Andric   case AMDGPULibFunc::EI_RECIP:
4022cab237bSDimitry Andric   case AMDGPULibFunc::EI_RSQRT:
4032cab237bSDimitry Andric   case AMDGPULibFunc::EI_SIN:
4042cab237bSDimitry Andric   case AMDGPULibFunc::EI_SINCOS:
4052cab237bSDimitry Andric   case AMDGPULibFunc::EI_SQRT:
4062cab237bSDimitry Andric   case AMDGPULibFunc::EI_TAN:
4072cab237bSDimitry Andric     return true;
4082cab237bSDimitry Andric   default:;
4092cab237bSDimitry Andric   }
4102cab237bSDimitry Andric   return false;
4112cab237bSDimitry Andric }
4122cab237bSDimitry Andric 
4132cab237bSDimitry Andric struct TableRef {
4142cab237bSDimitry Andric   size_t size;
4152cab237bSDimitry Andric   const TableEntry *table; // variable size: from 0 to (size - 1)
4162cab237bSDimitry Andric 
TableRefTableRef4172cab237bSDimitry Andric   TableRef() : size(0), table(nullptr) {}
4182cab237bSDimitry Andric 
4192cab237bSDimitry Andric   template <size_t N>
TableRefTableRef4202cab237bSDimitry Andric   TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
4212cab237bSDimitry Andric };
4222cab237bSDimitry Andric 
getOptTable(AMDGPULibFunc::EFuncId id)4232cab237bSDimitry Andric static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
4242cab237bSDimitry Andric   switch(id) {
4252cab237bSDimitry Andric   case AMDGPULibFunc::EI_ACOS:    return TableRef(tbl_acos);
4262cab237bSDimitry Andric   case AMDGPULibFunc::EI_ACOSH:   return TableRef(tbl_acosh);
4272cab237bSDimitry Andric   case AMDGPULibFunc::EI_ACOSPI:  return TableRef(tbl_acospi);
4282cab237bSDimitry Andric   case AMDGPULibFunc::EI_ASIN:    return TableRef(tbl_asin);
4292cab237bSDimitry Andric   case AMDGPULibFunc::EI_ASINH:   return TableRef(tbl_asinh);
4302cab237bSDimitry Andric   case AMDGPULibFunc::EI_ASINPI:  return TableRef(tbl_asinpi);
4312cab237bSDimitry Andric   case AMDGPULibFunc::EI_ATAN:    return TableRef(tbl_atan);
4322cab237bSDimitry Andric   case AMDGPULibFunc::EI_ATANH:   return TableRef(tbl_atanh);
4332cab237bSDimitry Andric   case AMDGPULibFunc::EI_ATANPI:  return TableRef(tbl_atanpi);
4342cab237bSDimitry Andric   case AMDGPULibFunc::EI_CBRT:    return TableRef(tbl_cbrt);
4352cab237bSDimitry Andric   case AMDGPULibFunc::EI_NCOS:
4362cab237bSDimitry Andric   case AMDGPULibFunc::EI_COS:     return TableRef(tbl_cos);
4372cab237bSDimitry Andric   case AMDGPULibFunc::EI_COSH:    return TableRef(tbl_cosh);
4382cab237bSDimitry Andric   case AMDGPULibFunc::EI_COSPI:   return TableRef(tbl_cospi);
4392cab237bSDimitry Andric   case AMDGPULibFunc::EI_ERFC:    return TableRef(tbl_erfc);
4402cab237bSDimitry Andric   case AMDGPULibFunc::EI_ERF:     return TableRef(tbl_erf);
4412cab237bSDimitry Andric   case AMDGPULibFunc::EI_EXP:     return TableRef(tbl_exp);
4422cab237bSDimitry Andric   case AMDGPULibFunc::EI_NEXP2:
4432cab237bSDimitry Andric   case AMDGPULibFunc::EI_EXP2:    return TableRef(tbl_exp2);
4442cab237bSDimitry Andric   case AMDGPULibFunc::EI_EXP10:   return TableRef(tbl_exp10);
4452cab237bSDimitry Andric   case AMDGPULibFunc::EI_EXPM1:   return TableRef(tbl_expm1);
4462cab237bSDimitry Andric   case AMDGPULibFunc::EI_LOG:     return TableRef(tbl_log);
4472cab237bSDimitry Andric   case AMDGPULibFunc::EI_NLOG2:
4482cab237bSDimitry Andric   case AMDGPULibFunc::EI_LOG2:    return TableRef(tbl_log2);
4492cab237bSDimitry Andric   case AMDGPULibFunc::EI_LOG10:   return TableRef(tbl_log10);
4502cab237bSDimitry Andric   case AMDGPULibFunc::EI_NRSQRT:
4512cab237bSDimitry Andric   case AMDGPULibFunc::EI_RSQRT:   return TableRef(tbl_rsqrt);
4522cab237bSDimitry Andric   case AMDGPULibFunc::EI_NSIN:
4532cab237bSDimitry Andric   case AMDGPULibFunc::EI_SIN:     return TableRef(tbl_sin);
4542cab237bSDimitry Andric   case AMDGPULibFunc::EI_SINH:    return TableRef(tbl_sinh);
4552cab237bSDimitry Andric   case AMDGPULibFunc::EI_SINPI:   return TableRef(tbl_sinpi);
4562cab237bSDimitry Andric   case AMDGPULibFunc::EI_NSQRT:
4572cab237bSDimitry Andric   case AMDGPULibFunc::EI_SQRT:    return TableRef(tbl_sqrt);
4582cab237bSDimitry Andric   case AMDGPULibFunc::EI_TAN:     return TableRef(tbl_tan);
4592cab237bSDimitry Andric   case AMDGPULibFunc::EI_TANH:    return TableRef(tbl_tanh);
4602cab237bSDimitry Andric   case AMDGPULibFunc::EI_TANPI:   return TableRef(tbl_tanpi);
4612cab237bSDimitry Andric   case AMDGPULibFunc::EI_TGAMMA:  return TableRef(tbl_tgamma);
4622cab237bSDimitry Andric   default:;
4632cab237bSDimitry Andric   }
4642cab237bSDimitry Andric   return TableRef();
4652cab237bSDimitry Andric }
4662cab237bSDimitry Andric 
getVecSize(const AMDGPULibFunc & FInfo)4672cab237bSDimitry Andric static inline int getVecSize(const AMDGPULibFunc& FInfo) {
4682cab237bSDimitry Andric   return FInfo.getLeads()[0].VectorSize;
4692cab237bSDimitry Andric }
4702cab237bSDimitry Andric 
getArgType(const AMDGPULibFunc & FInfo)4712cab237bSDimitry Andric static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
4722cab237bSDimitry Andric   return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
4732cab237bSDimitry Andric }
4742cab237bSDimitry Andric 
getFunction(Module * M,const FuncInfo & fInfo)4752cab237bSDimitry Andric Constant *AMDGPULibCalls::getFunction(Module *M, const FuncInfo& fInfo) {
4762cab237bSDimitry Andric   // If we are doing PreLinkOpt, the function is external. So it is safe to
4772cab237bSDimitry Andric   // use getOrInsertFunction() at this stage.
4782cab237bSDimitry Andric 
4792cab237bSDimitry Andric   return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
4802cab237bSDimitry Andric                        : AMDGPULibFunc::getFunction(M, fInfo);
4812cab237bSDimitry Andric }
4822cab237bSDimitry Andric 
parseFunctionName(const StringRef & FMangledName,FuncInfo * FInfo)4832cab237bSDimitry Andric bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
4842cab237bSDimitry Andric                                     FuncInfo *FInfo) {
4852cab237bSDimitry Andric   return AMDGPULibFunc::parse(FMangledName, *FInfo);
4862cab237bSDimitry Andric }
4872cab237bSDimitry Andric 
isUnsafeMath(const CallInst * CI) const4882cab237bSDimitry Andric bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
4892cab237bSDimitry Andric   if (auto Op = dyn_cast<FPMathOperator>(CI))
4902cab237bSDimitry Andric     if (Op->isFast())
4912cab237bSDimitry Andric       return true;
4922cab237bSDimitry Andric   const Function *F = CI->getParent()->getParent();
4932cab237bSDimitry Andric   Attribute Attr = F->getFnAttribute("unsafe-fp-math");
4942cab237bSDimitry Andric   return Attr.getValueAsString() == "true";
4952cab237bSDimitry Andric }
4962cab237bSDimitry Andric 
useNativeFunc(const StringRef F) const4972cab237bSDimitry Andric bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
4982cab237bSDimitry Andric   return AllNative ||
4992cab237bSDimitry Andric          std::find(UseNative.begin(), UseNative.end(), F) != UseNative.end();
5002cab237bSDimitry Andric }
5012cab237bSDimitry Andric 
initNativeFuncs()5022cab237bSDimitry Andric void AMDGPULibCalls::initNativeFuncs() {
5032cab237bSDimitry Andric   AllNative = useNativeFunc("all") ||
5042cab237bSDimitry Andric               (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
5052cab237bSDimitry Andric                UseNative.begin()->empty());
5062cab237bSDimitry Andric }
5072cab237bSDimitry Andric 
sincosUseNative(CallInst * aCI,const FuncInfo & FInfo)5082cab237bSDimitry Andric bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
5092cab237bSDimitry Andric   bool native_sin = useNativeFunc("sin");
5102cab237bSDimitry Andric   bool native_cos = useNativeFunc("cos");
5112cab237bSDimitry Andric 
5122cab237bSDimitry Andric   if (native_sin && native_cos) {
5132cab237bSDimitry Andric     Module *M = aCI->getModule();
5142cab237bSDimitry Andric     Value *opr0 = aCI->getArgOperand(0);
5152cab237bSDimitry Andric 
5162cab237bSDimitry Andric     AMDGPULibFunc nf;
5172cab237bSDimitry Andric     nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
5182cab237bSDimitry Andric     nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
5192cab237bSDimitry Andric 
5202cab237bSDimitry Andric     nf.setPrefix(AMDGPULibFunc::NATIVE);
5212cab237bSDimitry Andric     nf.setId(AMDGPULibFunc::EI_SIN);
5222cab237bSDimitry Andric     Constant *sinExpr = getFunction(M, nf);
5232cab237bSDimitry Andric 
5242cab237bSDimitry Andric     nf.setPrefix(AMDGPULibFunc::NATIVE);
5252cab237bSDimitry Andric     nf.setId(AMDGPULibFunc::EI_COS);
5262cab237bSDimitry Andric     Constant *cosExpr = getFunction(M, nf);
5272cab237bSDimitry Andric     if (sinExpr && cosExpr) {
5282cab237bSDimitry Andric       Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
5292cab237bSDimitry Andric       Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
5302cab237bSDimitry Andric       new StoreInst(cosval, aCI->getArgOperand(1), aCI);
5312cab237bSDimitry Andric 
5322cab237bSDimitry Andric       DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
5332cab237bSDimitry Andric                                           << " with native version of sin/cos");
5342cab237bSDimitry Andric 
5352cab237bSDimitry Andric       replaceCall(sinval);
5362cab237bSDimitry Andric       return true;
5372cab237bSDimitry Andric     }
5382cab237bSDimitry Andric   }
5392cab237bSDimitry Andric   return false;
5402cab237bSDimitry Andric }
5412cab237bSDimitry Andric 
useNative(CallInst * aCI)5422cab237bSDimitry Andric bool AMDGPULibCalls::useNative(CallInst *aCI) {
5432cab237bSDimitry Andric   CI = aCI;
5442cab237bSDimitry Andric   Function *Callee = aCI->getCalledFunction();
5452cab237bSDimitry Andric 
5462cab237bSDimitry Andric   FuncInfo FInfo;
5472cab237bSDimitry Andric   if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() ||
5482cab237bSDimitry Andric       FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
5492cab237bSDimitry Andric       getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
5502cab237bSDimitry Andric       !(AllNative || useNativeFunc(FInfo.getName()))) {
5512cab237bSDimitry Andric     return false;
5522cab237bSDimitry Andric   }
5532cab237bSDimitry Andric 
5542cab237bSDimitry Andric   if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
5552cab237bSDimitry Andric     return sincosUseNative(aCI, FInfo);
5562cab237bSDimitry Andric 
5572cab237bSDimitry Andric   FInfo.setPrefix(AMDGPULibFunc::NATIVE);
5582cab237bSDimitry Andric   Constant *F = getFunction(aCI->getModule(), FInfo);
5592cab237bSDimitry Andric   if (!F)
5602cab237bSDimitry Andric     return false;
5612cab237bSDimitry Andric 
5622cab237bSDimitry Andric   aCI->setCalledFunction(F);
5632cab237bSDimitry Andric   DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
5642cab237bSDimitry Andric                                       << " with native version");
5652cab237bSDimitry Andric   return true;
5662cab237bSDimitry Andric }
5672cab237bSDimitry Andric 
5682cab237bSDimitry Andric // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
5692cab237bSDimitry Andric // builtin, with appended type size and alignment arguments, where 2 or 4
5702cab237bSDimitry Andric // indicates the original number of arguments. The library has optimized version
5712cab237bSDimitry Andric // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
5722cab237bSDimitry Andric // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
5732cab237bSDimitry Andric // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
5742cab237bSDimitry Andric // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
fold_read_write_pipe(CallInst * CI,IRBuilder<> & B,FuncInfo & FInfo)5752cab237bSDimitry Andric bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
5762cab237bSDimitry Andric                                           FuncInfo &FInfo) {
5772cab237bSDimitry Andric   auto *Callee = CI->getCalledFunction();
5782cab237bSDimitry Andric   if (!Callee->isDeclaration())
5792cab237bSDimitry Andric     return false;
5802cab237bSDimitry Andric 
5812cab237bSDimitry Andric   assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
5822cab237bSDimitry Andric   auto *M = Callee->getParent();
5832cab237bSDimitry Andric   auto &Ctx = M->getContext();
5842cab237bSDimitry Andric   std::string Name = Callee->getName();
5852cab237bSDimitry Andric   auto NumArg = CI->getNumArgOperands();
5862cab237bSDimitry Andric   if (NumArg != 4 && NumArg != 6)
5872cab237bSDimitry Andric     return false;
5882cab237bSDimitry Andric   auto *PacketSize = CI->getArgOperand(NumArg - 2);
5892cab237bSDimitry Andric   auto *PacketAlign = CI->getArgOperand(NumArg - 1);
5902cab237bSDimitry Andric   if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
5912cab237bSDimitry Andric     return false;
5922cab237bSDimitry Andric   unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
5932cab237bSDimitry Andric   unsigned Align = cast<ConstantInt>(PacketAlign)->getZExtValue();
5942cab237bSDimitry Andric   if (Size != Align || !isPowerOf2_32(Size))
5952cab237bSDimitry Andric     return false;
5962cab237bSDimitry Andric 
5972cab237bSDimitry Andric   Type *PtrElemTy;
5982cab237bSDimitry Andric   if (Size <= 8)
5992cab237bSDimitry Andric     PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
6002cab237bSDimitry Andric   else
6012cab237bSDimitry Andric     PtrElemTy = VectorType::get(Type::getInt64Ty(Ctx), Size / 8);
6022cab237bSDimitry Andric   unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
6032cab237bSDimitry Andric   auto PtrArg = CI->getArgOperand(PtrArgLoc);
6042cab237bSDimitry Andric   unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
6052cab237bSDimitry Andric   auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
6062cab237bSDimitry Andric 
6072cab237bSDimitry Andric   SmallVector<llvm::Type *, 6> ArgTys;
6082cab237bSDimitry Andric   for (unsigned I = 0; I != PtrArgLoc; ++I)
6092cab237bSDimitry Andric     ArgTys.push_back(CI->getArgOperand(I)->getType());
6102cab237bSDimitry Andric   ArgTys.push_back(PtrTy);
6112cab237bSDimitry Andric 
6122cab237bSDimitry Andric   Name = Name + "_" + std::to_string(Size);
6132cab237bSDimitry Andric   auto *FTy = FunctionType::get(Callee->getReturnType(),
6142cab237bSDimitry Andric                                 ArrayRef<Type *>(ArgTys), false);
6152cab237bSDimitry Andric   AMDGPULibFunc NewLibFunc(Name, FTy);
6162cab237bSDimitry Andric   auto *F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
6172cab237bSDimitry Andric   if (!F)
6182cab237bSDimitry Andric     return false;
6192cab237bSDimitry Andric 
6202cab237bSDimitry Andric   auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
6212cab237bSDimitry Andric   SmallVector<Value *, 6> Args;
6222cab237bSDimitry Andric   for (unsigned I = 0; I != PtrArgLoc; ++I)
6232cab237bSDimitry Andric     Args.push_back(CI->getArgOperand(I));
6242cab237bSDimitry Andric   Args.push_back(BCast);
6252cab237bSDimitry Andric 
6262cab237bSDimitry Andric   auto *NCI = B.CreateCall(F, Args);
6272cab237bSDimitry Andric   NCI->setAttributes(CI->getAttributes());
6282cab237bSDimitry Andric   CI->replaceAllUsesWith(NCI);
6292cab237bSDimitry Andric   CI->dropAllReferences();
6302cab237bSDimitry Andric   CI->eraseFromParent();
6312cab237bSDimitry Andric 
6322cab237bSDimitry Andric   return true;
6332cab237bSDimitry Andric }
6342cab237bSDimitry Andric 
6352cab237bSDimitry Andric // This function returns false if no change; return true otherwise.
fold(CallInst * CI,AliasAnalysis * AA)6362cab237bSDimitry Andric bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
6372cab237bSDimitry Andric   this->CI = CI;
6382cab237bSDimitry Andric   Function *Callee = CI->getCalledFunction();
6392cab237bSDimitry Andric 
6402cab237bSDimitry Andric   // Ignore indirect calls.
6412cab237bSDimitry Andric   if (Callee == 0) return false;
6422cab237bSDimitry Andric 
6432cab237bSDimitry Andric   FuncInfo FInfo;
6442cab237bSDimitry Andric   if (!parseFunctionName(Callee->getName(), &FInfo))
6452cab237bSDimitry Andric     return false;
6462cab237bSDimitry Andric 
6472cab237bSDimitry Andric   // Further check the number of arguments to see if they match.
6482cab237bSDimitry Andric   if (CI->getNumArgOperands() != FInfo.getNumArgs())
6492cab237bSDimitry Andric     return false;
6502cab237bSDimitry Andric 
6512cab237bSDimitry Andric   BasicBlock *BB = CI->getParent();
6522cab237bSDimitry Andric   LLVMContext &Context = CI->getParent()->getContext();
6532cab237bSDimitry Andric   IRBuilder<> B(Context);
6542cab237bSDimitry Andric 
6552cab237bSDimitry Andric   // Set the builder to the instruction after the call.
6562cab237bSDimitry Andric   B.SetInsertPoint(BB, CI->getIterator());
6572cab237bSDimitry Andric 
6582cab237bSDimitry Andric   // Copy fast flags from the original call.
6592cab237bSDimitry Andric   if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
6602cab237bSDimitry Andric     B.setFastMathFlags(FPOp->getFastMathFlags());
6612cab237bSDimitry Andric 
6622cab237bSDimitry Andric   if (TDOFold(CI, FInfo))
6632cab237bSDimitry Andric     return true;
6642cab237bSDimitry Andric 
6652cab237bSDimitry Andric   // Under unsafe-math, evaluate calls if possible.
6662cab237bSDimitry Andric   // According to Brian Sumner, we can do this for all f32 function calls
6672cab237bSDimitry Andric   // using host's double function calls.
6682cab237bSDimitry Andric   if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
6692cab237bSDimitry Andric     return true;
6702cab237bSDimitry Andric 
6712cab237bSDimitry Andric   // Specilized optimizations for each function call
6722cab237bSDimitry Andric   switch (FInfo.getId()) {
6732cab237bSDimitry Andric   case AMDGPULibFunc::EI_RECIP:
6742cab237bSDimitry Andric     // skip vector function
6752cab237bSDimitry Andric     assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
6762cab237bSDimitry Andric              FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
6772cab237bSDimitry Andric             "recip must be an either native or half function");
6782cab237bSDimitry Andric     return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
6792cab237bSDimitry Andric 
6802cab237bSDimitry Andric   case AMDGPULibFunc::EI_DIVIDE:
6812cab237bSDimitry Andric     // skip vector function
6822cab237bSDimitry Andric     assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
6832cab237bSDimitry Andric              FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
6842cab237bSDimitry Andric             "divide must be an either native or half function");
6852cab237bSDimitry Andric     return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
6862cab237bSDimitry Andric 
6872cab237bSDimitry Andric   case AMDGPULibFunc::EI_POW:
6882cab237bSDimitry Andric   case AMDGPULibFunc::EI_POWR:
6892cab237bSDimitry Andric   case AMDGPULibFunc::EI_POWN:
6902cab237bSDimitry Andric     return fold_pow(CI, B, FInfo);
6912cab237bSDimitry Andric 
6922cab237bSDimitry Andric   case AMDGPULibFunc::EI_ROOTN:
6932cab237bSDimitry Andric     // skip vector function
6942cab237bSDimitry Andric     return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
6952cab237bSDimitry Andric 
6962cab237bSDimitry Andric   case AMDGPULibFunc::EI_FMA:
6972cab237bSDimitry Andric   case AMDGPULibFunc::EI_MAD:
6982cab237bSDimitry Andric   case AMDGPULibFunc::EI_NFMA:
6992cab237bSDimitry Andric     // skip vector function
7002cab237bSDimitry Andric     return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
7012cab237bSDimitry Andric 
7022cab237bSDimitry Andric   case AMDGPULibFunc::EI_SQRT:
7032cab237bSDimitry Andric     return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
7042cab237bSDimitry Andric   case AMDGPULibFunc::EI_COS:
7052cab237bSDimitry Andric   case AMDGPULibFunc::EI_SIN:
7062cab237bSDimitry Andric     if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
7072cab237bSDimitry Andric          getArgType(FInfo) == AMDGPULibFunc::F64)
7082cab237bSDimitry Andric         && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
7092cab237bSDimitry Andric       return fold_sincos(CI, B, AA);
7102cab237bSDimitry Andric 
7112cab237bSDimitry Andric     break;
7122cab237bSDimitry Andric   case AMDGPULibFunc::EI_READ_PIPE_2:
7132cab237bSDimitry Andric   case AMDGPULibFunc::EI_READ_PIPE_4:
7142cab237bSDimitry Andric   case AMDGPULibFunc::EI_WRITE_PIPE_2:
7152cab237bSDimitry Andric   case AMDGPULibFunc::EI_WRITE_PIPE_4:
7162cab237bSDimitry Andric     return fold_read_write_pipe(CI, B, FInfo);
7172cab237bSDimitry Andric 
7182cab237bSDimitry Andric   default:
7192cab237bSDimitry Andric     break;
7202cab237bSDimitry Andric   }
7212cab237bSDimitry Andric 
7222cab237bSDimitry Andric   return false;
7232cab237bSDimitry Andric }
7242cab237bSDimitry Andric 
TDOFold(CallInst * CI,const FuncInfo & FInfo)7252cab237bSDimitry Andric bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
7262cab237bSDimitry Andric   // Table-Driven optimization
7272cab237bSDimitry Andric   const TableRef tr = getOptTable(FInfo.getId());
7282cab237bSDimitry Andric   if (tr.size==0)
7292cab237bSDimitry Andric     return false;
7302cab237bSDimitry Andric 
7312cab237bSDimitry Andric   int const sz = (int)tr.size;
7322cab237bSDimitry Andric   const TableEntry * const ftbl = tr.table;
7332cab237bSDimitry Andric   Value *opr0 = CI->getArgOperand(0);
7342cab237bSDimitry Andric 
7352cab237bSDimitry Andric   if (getVecSize(FInfo) > 1) {
7362cab237bSDimitry Andric     if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
7372cab237bSDimitry Andric       SmallVector<double, 0> DVal;
7382cab237bSDimitry Andric       for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
7392cab237bSDimitry Andric         ConstantFP *eltval = dyn_cast<ConstantFP>(
7402cab237bSDimitry Andric                                CV->getElementAsConstant((unsigned)eltNo));
7412cab237bSDimitry Andric         assert(eltval && "Non-FP arguments in math function!");
7422cab237bSDimitry Andric         bool found = false;
7432cab237bSDimitry Andric         for (int i=0; i < sz; ++i) {
7442cab237bSDimitry Andric           if (eltval->isExactlyValue(ftbl[i].input)) {
7452cab237bSDimitry Andric             DVal.push_back(ftbl[i].result);
7462cab237bSDimitry Andric             found = true;
7472cab237bSDimitry Andric             break;
7482cab237bSDimitry Andric           }
7492cab237bSDimitry Andric         }
7502cab237bSDimitry Andric         if (!found) {
7512cab237bSDimitry Andric           // This vector constants not handled yet.
7522cab237bSDimitry Andric           return false;
7532cab237bSDimitry Andric         }
7542cab237bSDimitry Andric       }
7552cab237bSDimitry Andric       LLVMContext &context = CI->getParent()->getParent()->getContext();
7562cab237bSDimitry Andric       Constant *nval;
7572cab237bSDimitry Andric       if (getArgType(FInfo) == AMDGPULibFunc::F32) {
7582cab237bSDimitry Andric         SmallVector<float, 0> FVal;
7592cab237bSDimitry Andric         for (unsigned i = 0; i < DVal.size(); ++i) {
7602cab237bSDimitry Andric           FVal.push_back((float)DVal[i]);
7612cab237bSDimitry Andric         }
7622cab237bSDimitry Andric         ArrayRef<float> tmp(FVal);
7632cab237bSDimitry Andric         nval = ConstantDataVector::get(context, tmp);
7642cab237bSDimitry Andric       } else { // F64
7652cab237bSDimitry Andric         ArrayRef<double> tmp(DVal);
7662cab237bSDimitry Andric         nval = ConstantDataVector::get(context, tmp);
7672cab237bSDimitry Andric       }
7684ba319b5SDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
7692cab237bSDimitry Andric       replaceCall(nval);
7702cab237bSDimitry Andric       return true;
7712cab237bSDimitry Andric     }
7722cab237bSDimitry Andric   } else {
7732cab237bSDimitry Andric     // Scalar version
7742cab237bSDimitry Andric     if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
7752cab237bSDimitry Andric       for (int i = 0; i < sz; ++i) {
7762cab237bSDimitry Andric         if (CF->isExactlyValue(ftbl[i].input)) {
7772cab237bSDimitry Andric           Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
7784ba319b5SDimitry Andric           LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
7792cab237bSDimitry Andric           replaceCall(nval);
7802cab237bSDimitry Andric           return true;
7812cab237bSDimitry Andric         }
7822cab237bSDimitry Andric       }
7832cab237bSDimitry Andric     }
7842cab237bSDimitry Andric   }
7852cab237bSDimitry Andric 
7862cab237bSDimitry Andric   return false;
7872cab237bSDimitry Andric }
7882cab237bSDimitry Andric 
replaceWithNative(CallInst * CI,const FuncInfo & FInfo)7892cab237bSDimitry Andric bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
7902cab237bSDimitry Andric   Module *M = CI->getModule();
7912cab237bSDimitry Andric   if (getArgType(FInfo) != AMDGPULibFunc::F32 ||
7922cab237bSDimitry Andric       FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
7932cab237bSDimitry Andric       !HasNative(FInfo.getId()))
7942cab237bSDimitry Andric     return false;
7952cab237bSDimitry Andric 
7962cab237bSDimitry Andric   AMDGPULibFunc nf = FInfo;
7972cab237bSDimitry Andric   nf.setPrefix(AMDGPULibFunc::NATIVE);
7982cab237bSDimitry Andric   if (Constant *FPExpr = getFunction(M, nf)) {
7994ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
8002cab237bSDimitry Andric 
8012cab237bSDimitry Andric     CI->setCalledFunction(FPExpr);
8022cab237bSDimitry Andric 
8034ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << *CI << '\n');
8042cab237bSDimitry Andric 
8052cab237bSDimitry Andric     return true;
8062cab237bSDimitry Andric   }
8072cab237bSDimitry Andric   return false;
8082cab237bSDimitry Andric }
8092cab237bSDimitry Andric 
8102cab237bSDimitry Andric //  [native_]half_recip(c) ==> 1.0/c
fold_recip(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)8112cab237bSDimitry Andric bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
8122cab237bSDimitry Andric                                 const FuncInfo &FInfo) {
8132cab237bSDimitry Andric   Value *opr0 = CI->getArgOperand(0);
8142cab237bSDimitry Andric   if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
8152cab237bSDimitry Andric     // Just create a normal div. Later, InstCombine will be able
8162cab237bSDimitry Andric     // to compute the divide into a constant (avoid check float infinity
8172cab237bSDimitry Andric     // or subnormal at this point).
8182cab237bSDimitry Andric     Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
8192cab237bSDimitry Andric                                opr0,
8202cab237bSDimitry Andric                                "recip2div");
8214ba319b5SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
8222cab237bSDimitry Andric     replaceCall(nval);
8232cab237bSDimitry Andric     return true;
8242cab237bSDimitry Andric   }
8252cab237bSDimitry Andric   return false;
8262cab237bSDimitry Andric }
8272cab237bSDimitry Andric 
8282cab237bSDimitry Andric //  [native_]half_divide(x, c) ==> x/c
fold_divide(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)8292cab237bSDimitry Andric bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
8302cab237bSDimitry Andric                                  const FuncInfo &FInfo) {
8312cab237bSDimitry Andric   Value *opr0 = CI->getArgOperand(0);
8322cab237bSDimitry Andric   Value *opr1 = CI->getArgOperand(1);
8332cab237bSDimitry Andric   ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
8342cab237bSDimitry Andric   ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
8352cab237bSDimitry Andric 
8362cab237bSDimitry Andric   if ((CF0 && CF1) ||  // both are constants
8372cab237bSDimitry Andric       (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
8382cab237bSDimitry Andric       // CF1 is constant && f32 divide
8392cab237bSDimitry Andric   {
8402cab237bSDimitry Andric     Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
8412cab237bSDimitry Andric                                 opr1, "__div2recip");
8422cab237bSDimitry Andric     Value *nval  = B.CreateFMul(opr0, nval1, "__div2mul");
8432cab237bSDimitry Andric     replaceCall(nval);
8442cab237bSDimitry Andric     return true;
8452cab237bSDimitry Andric   }
8462cab237bSDimitry Andric   return false;
8472cab237bSDimitry Andric }
8482cab237bSDimitry Andric 
8492cab237bSDimitry Andric namespace llvm {
log2(double V)8502cab237bSDimitry Andric static double log2(double V) {
8512cab237bSDimitry Andric #if _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE || _POSIX_C_SOURCE >= 200112L
8522cab237bSDimitry Andric   return ::log2(V);
8532cab237bSDimitry Andric #else
8542cab237bSDimitry Andric   return log(V) / 0.693147180559945309417;
8552cab237bSDimitry Andric #endif
8562cab237bSDimitry Andric }
8572cab237bSDimitry Andric }
8582cab237bSDimitry Andric 
fold_pow(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)8592cab237bSDimitry Andric bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
8602cab237bSDimitry Andric                               const FuncInfo &FInfo) {
8612cab237bSDimitry Andric   assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
8622cab237bSDimitry Andric           FInfo.getId() == AMDGPULibFunc::EI_POWR ||
8632cab237bSDimitry Andric           FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
8642cab237bSDimitry Andric          "fold_pow: encounter a wrong function call");
8652cab237bSDimitry Andric 
8662cab237bSDimitry Andric   Value *opr0, *opr1;
8672cab237bSDimitry Andric   ConstantFP *CF;
8682cab237bSDimitry Andric   ConstantInt *CINT;
8692cab237bSDimitry Andric   ConstantAggregateZero *CZero;
8702cab237bSDimitry Andric   Type *eltType;
8712cab237bSDimitry Andric 
8722cab237bSDimitry Andric   opr0 = CI->getArgOperand(0);
8732cab237bSDimitry Andric   opr1 = CI->getArgOperand(1);
8742cab237bSDimitry Andric   CZero = dyn_cast<ConstantAggregateZero>(opr1);
8752cab237bSDimitry Andric   if (getVecSize(FInfo) == 1) {
8762cab237bSDimitry Andric     eltType = opr0->getType();
8772cab237bSDimitry Andric     CF = dyn_cast<ConstantFP>(opr1);
8782cab237bSDimitry Andric     CINT = dyn_cast<ConstantInt>(opr1);
8792cab237bSDimitry Andric   } else {
8802cab237bSDimitry Andric     VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
8812cab237bSDimitry Andric     assert(VTy && "Oprand of vector function should be of vectortype");
8822cab237bSDimitry Andric     eltType = VTy->getElementType();
8832cab237bSDimitry Andric     ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
8842cab237bSDimitry Andric 
8852cab237bSDimitry Andric     // Now, only Handle vector const whose elements have the same value.
8862cab237bSDimitry Andric     CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
8872cab237bSDimitry Andric     CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
8882cab237bSDimitry Andric   }
8892cab237bSDimitry Andric 
8902cab237bSDimitry Andric   // No unsafe math , no constant argument, do nothing
8912cab237bSDimitry Andric   if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
8922cab237bSDimitry Andric     return false;
8932cab237bSDimitry Andric 
8942cab237bSDimitry Andric   // 0x1111111 means that we don't do anything for this call.
8952cab237bSDimitry Andric   int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
8962cab237bSDimitry Andric 
8972cab237bSDimitry Andric   if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
8982cab237bSDimitry Andric     //  pow/powr/pown(x, 0) == 1
8994ba319b5SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
9002cab237bSDimitry Andric     Constant *cnval = ConstantFP::get(eltType, 1.0);
9012cab237bSDimitry Andric     if (getVecSize(FInfo) > 1) {
9022cab237bSDimitry Andric       cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9032cab237bSDimitry Andric     }
9042cab237bSDimitry Andric     replaceCall(cnval);
9052cab237bSDimitry Andric     return true;
9062cab237bSDimitry Andric   }
9072cab237bSDimitry Andric   if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
9082cab237bSDimitry Andric     // pow/powr/pown(x, 1.0) = x
9094ba319b5SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
9102cab237bSDimitry Andric     replaceCall(opr0);
9112cab237bSDimitry Andric     return true;
9122cab237bSDimitry Andric   }
9132cab237bSDimitry Andric   if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
9142cab237bSDimitry Andric     // pow/powr/pown(x, 2.0) = x*x
9154ba319b5SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
9164ba319b5SDimitry Andric                       << "\n");
9172cab237bSDimitry Andric     Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
9182cab237bSDimitry Andric     replaceCall(nval);
9192cab237bSDimitry Andric     return true;
9202cab237bSDimitry Andric   }
9212cab237bSDimitry Andric   if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
9222cab237bSDimitry Andric     // pow/powr/pown(x, -1.0) = 1.0/x
9234ba319b5SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
9242cab237bSDimitry Andric     Constant *cnval = ConstantFP::get(eltType, 1.0);
9252cab237bSDimitry Andric     if (getVecSize(FInfo) > 1) {
9262cab237bSDimitry Andric       cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9272cab237bSDimitry Andric     }
9282cab237bSDimitry Andric     Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
9292cab237bSDimitry Andric     replaceCall(nval);
9302cab237bSDimitry Andric     return true;
9312cab237bSDimitry Andric   }
9322cab237bSDimitry Andric 
9332cab237bSDimitry Andric   Module *M = CI->getModule();
9342cab237bSDimitry Andric   if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
9352cab237bSDimitry Andric     // pow[r](x, [-]0.5) = sqrt(x)
9362cab237bSDimitry Andric     bool issqrt = CF->isExactlyValue(0.5);
9372cab237bSDimitry Andric     if (Constant *FPExpr = getFunction(M,
9382cab237bSDimitry Andric         AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
9392cab237bSDimitry Andric                              : AMDGPULibFunc::EI_RSQRT, FInfo))) {
9404ba319b5SDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
9412cab237bSDimitry Andric                         << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
9422cab237bSDimitry Andric       Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
9432cab237bSDimitry Andric                                                         : "__pow2rsqrt");
9442cab237bSDimitry Andric       replaceCall(nval);
9452cab237bSDimitry Andric       return true;
9462cab237bSDimitry Andric     }
9472cab237bSDimitry Andric   }
9482cab237bSDimitry Andric 
9492cab237bSDimitry Andric   if (!isUnsafeMath(CI))
9502cab237bSDimitry Andric     return false;
9512cab237bSDimitry Andric 
9522cab237bSDimitry Andric   // Unsafe Math optimization
9532cab237bSDimitry Andric 
9542cab237bSDimitry Andric   // Remember that ci_opr1 is set if opr1 is integral
9552cab237bSDimitry Andric   if (CF) {
9562cab237bSDimitry Andric     double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
9572cab237bSDimitry Andric                     ? (double)CF->getValueAPF().convertToFloat()
9582cab237bSDimitry Andric                     : CF->getValueAPF().convertToDouble();
9592cab237bSDimitry Andric     int ival = (int)dval;
9602cab237bSDimitry Andric     if ((double)ival == dval) {
9612cab237bSDimitry Andric       ci_opr1 = ival;
9622cab237bSDimitry Andric     } else
9632cab237bSDimitry Andric       ci_opr1 = 0x11111111;
9642cab237bSDimitry Andric   }
9652cab237bSDimitry Andric 
9662cab237bSDimitry Andric   // pow/powr/pown(x, c) = [1/](x*x*..x); where
9672cab237bSDimitry Andric   //   trunc(c) == c && the number of x == c && |c| <= 12
9682cab237bSDimitry Andric   unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
9692cab237bSDimitry Andric   if (abs_opr1 <= 12) {
9702cab237bSDimitry Andric     Constant *cnval;
9712cab237bSDimitry Andric     Value *nval;
9722cab237bSDimitry Andric     if (abs_opr1 == 0) {
9732cab237bSDimitry Andric       cnval = ConstantFP::get(eltType, 1.0);
9742cab237bSDimitry Andric       if (getVecSize(FInfo) > 1) {
9752cab237bSDimitry Andric         cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9762cab237bSDimitry Andric       }
9772cab237bSDimitry Andric       nval = cnval;
9782cab237bSDimitry Andric     } else {
9792cab237bSDimitry Andric       Value *valx2 = nullptr;
9802cab237bSDimitry Andric       nval = nullptr;
9812cab237bSDimitry Andric       while (abs_opr1 > 0) {
9822cab237bSDimitry Andric         valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
9832cab237bSDimitry Andric         if (abs_opr1 & 1) {
9842cab237bSDimitry Andric           nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
9852cab237bSDimitry Andric         }
9862cab237bSDimitry Andric         abs_opr1 >>= 1;
9872cab237bSDimitry Andric       }
9882cab237bSDimitry Andric     }
9892cab237bSDimitry Andric 
9902cab237bSDimitry Andric     if (ci_opr1 < 0) {
9912cab237bSDimitry Andric       cnval = ConstantFP::get(eltType, 1.0);
9922cab237bSDimitry Andric       if (getVecSize(FInfo) > 1) {
9932cab237bSDimitry Andric         cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9942cab237bSDimitry Andric       }
9952cab237bSDimitry Andric       nval = B.CreateFDiv(cnval, nval, "__1powprod");
9962cab237bSDimitry Andric     }
9974ba319b5SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
9984ba319b5SDimitry Andric                       << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
9994ba319b5SDimitry Andric                       << ")\n");
10002cab237bSDimitry Andric     replaceCall(nval);
10012cab237bSDimitry Andric     return true;
10022cab237bSDimitry Andric   }
10032cab237bSDimitry Andric 
10042cab237bSDimitry Andric   // powr ---> exp2(y * log2(x))
10052cab237bSDimitry Andric   // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
10062cab237bSDimitry Andric   Constant *ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2,
10072cab237bSDimitry Andric                                                    FInfo));
10082cab237bSDimitry Andric   if (!ExpExpr)
10092cab237bSDimitry Andric     return false;
10102cab237bSDimitry Andric 
10112cab237bSDimitry Andric   bool needlog = false;
10122cab237bSDimitry Andric   bool needabs = false;
10132cab237bSDimitry Andric   bool needcopysign = false;
10142cab237bSDimitry Andric   Constant *cnval = nullptr;
10152cab237bSDimitry Andric   if (getVecSize(FInfo) == 1) {
10162cab237bSDimitry Andric     CF = dyn_cast<ConstantFP>(opr0);
10172cab237bSDimitry Andric 
10182cab237bSDimitry Andric     if (CF) {
10192cab237bSDimitry Andric       double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
10202cab237bSDimitry Andric                    ? (double)CF->getValueAPF().convertToFloat()
10212cab237bSDimitry Andric                    : CF->getValueAPF().convertToDouble();
10222cab237bSDimitry Andric 
10232cab237bSDimitry Andric       V = log2(std::abs(V));
10242cab237bSDimitry Andric       cnval = ConstantFP::get(eltType, V);
10252cab237bSDimitry Andric       needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
10262cab237bSDimitry Andric                      CF->isNegative();
10272cab237bSDimitry Andric     } else {
10282cab237bSDimitry Andric       needlog = true;
10292cab237bSDimitry Andric       needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
10302cab237bSDimitry Andric                                (!CF || CF->isNegative());
10312cab237bSDimitry Andric     }
10322cab237bSDimitry Andric   } else {
10332cab237bSDimitry Andric     ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
10342cab237bSDimitry Andric 
10352cab237bSDimitry Andric     if (!CDV) {
10362cab237bSDimitry Andric       needlog = true;
10372cab237bSDimitry Andric       needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
10382cab237bSDimitry Andric     } else {
10392cab237bSDimitry Andric       assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
10402cab237bSDimitry Andric               "Wrong vector size detected");
10412cab237bSDimitry Andric 
10422cab237bSDimitry Andric       SmallVector<double, 0> DVal;
10432cab237bSDimitry Andric       for (int i=0; i < getVecSize(FInfo); ++i) {
10442cab237bSDimitry Andric         double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
10452cab237bSDimitry Andric                      ? (double)CDV->getElementAsFloat(i)
10462cab237bSDimitry Andric                      : CDV->getElementAsDouble(i);
10472cab237bSDimitry Andric         if (V < 0.0) needcopysign = true;
10482cab237bSDimitry Andric         V = log2(std::abs(V));
10492cab237bSDimitry Andric         DVal.push_back(V);
10502cab237bSDimitry Andric       }
10512cab237bSDimitry Andric       if (getArgType(FInfo) == AMDGPULibFunc::F32) {
10522cab237bSDimitry Andric         SmallVector<float, 0> FVal;
10532cab237bSDimitry Andric         for (unsigned i=0; i < DVal.size(); ++i) {
10542cab237bSDimitry Andric           FVal.push_back((float)DVal[i]);
10552cab237bSDimitry Andric         }
10562cab237bSDimitry Andric         ArrayRef<float> tmp(FVal);
10572cab237bSDimitry Andric         cnval = ConstantDataVector::get(M->getContext(), tmp);
10582cab237bSDimitry Andric       } else {
10592cab237bSDimitry Andric         ArrayRef<double> tmp(DVal);
10602cab237bSDimitry Andric         cnval = ConstantDataVector::get(M->getContext(), tmp);
10612cab237bSDimitry Andric       }
10622cab237bSDimitry Andric     }
10632cab237bSDimitry Andric   }
10642cab237bSDimitry Andric 
10652cab237bSDimitry Andric   if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
10662cab237bSDimitry Andric     // We cannot handle corner cases for a general pow() function, give up
10672cab237bSDimitry Andric     // unless y is a constant integral value. Then proceed as if it were pown.
10682cab237bSDimitry Andric     if (getVecSize(FInfo) == 1) {
10692cab237bSDimitry Andric       if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
10702cab237bSDimitry Andric         double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
10712cab237bSDimitry Andric                    ? (double)CF->getValueAPF().convertToFloat()
10722cab237bSDimitry Andric                    : CF->getValueAPF().convertToDouble();
10732cab237bSDimitry Andric         if (y != (double)(int64_t)y)
10742cab237bSDimitry Andric           return false;
10752cab237bSDimitry Andric       } else
10762cab237bSDimitry Andric         return false;
10772cab237bSDimitry Andric     } else {
10782cab237bSDimitry Andric       if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
10792cab237bSDimitry Andric         for (int i=0; i < getVecSize(FInfo); ++i) {
10802cab237bSDimitry Andric           double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
10812cab237bSDimitry Andric                      ? (double)CDV->getElementAsFloat(i)
10822cab237bSDimitry Andric                      : CDV->getElementAsDouble(i);
10832cab237bSDimitry Andric           if (y != (double)(int64_t)y)
10842cab237bSDimitry Andric             return false;
10852cab237bSDimitry Andric         }
10862cab237bSDimitry Andric       } else
10872cab237bSDimitry Andric         return false;
10882cab237bSDimitry Andric     }
10892cab237bSDimitry Andric   }
10902cab237bSDimitry Andric 
10912cab237bSDimitry Andric   Value *nval;
10922cab237bSDimitry Andric   if (needabs) {
10932cab237bSDimitry Andric     Constant *AbsExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS,
10942cab237bSDimitry Andric                                                      FInfo));
10952cab237bSDimitry Andric     if (!AbsExpr)
10962cab237bSDimitry Andric       return false;
10972cab237bSDimitry Andric     nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
10982cab237bSDimitry Andric   } else {
10992cab237bSDimitry Andric     nval = cnval ? cnval : opr0;
11002cab237bSDimitry Andric   }
11012cab237bSDimitry Andric   if (needlog) {
11022cab237bSDimitry Andric     Constant *LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2,
11032cab237bSDimitry Andric                                                      FInfo));
11042cab237bSDimitry Andric     if (!LogExpr)
11052cab237bSDimitry Andric       return false;
11062cab237bSDimitry Andric     nval = CreateCallEx(B,LogExpr, nval, "__log2");
11072cab237bSDimitry Andric   }
11082cab237bSDimitry Andric 
11092cab237bSDimitry Andric   if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
11102cab237bSDimitry Andric     // convert int(32) to fp(f32 or f64)
11112cab237bSDimitry Andric     opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
11122cab237bSDimitry Andric   }
11132cab237bSDimitry Andric   nval = B.CreateFMul(opr1, nval, "__ylogx");
11142cab237bSDimitry Andric   nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
11152cab237bSDimitry Andric 
11162cab237bSDimitry Andric   if (needcopysign) {
11172cab237bSDimitry Andric     Value *opr_n;
11182cab237bSDimitry Andric     Type* rTy = opr0->getType();
11192cab237bSDimitry Andric     Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
11202cab237bSDimitry Andric     Type *nTy = nTyS;
11212cab237bSDimitry Andric     if (const VectorType *vTy = dyn_cast<VectorType>(rTy))
11222cab237bSDimitry Andric       nTy = VectorType::get(nTyS, vTy->getNumElements());
11232cab237bSDimitry Andric     unsigned size = nTy->getScalarSizeInBits();
11242cab237bSDimitry Andric     opr_n = CI->getArgOperand(1);
11252cab237bSDimitry Andric     if (opr_n->getType()->isIntegerTy())
11262cab237bSDimitry Andric       opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
11272cab237bSDimitry Andric     else
11282cab237bSDimitry Andric       opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
11292cab237bSDimitry Andric 
11302cab237bSDimitry Andric     Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
11312cab237bSDimitry Andric     sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
11322cab237bSDimitry Andric     nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
11332cab237bSDimitry Andric     nval = B.CreateBitCast(nval, opr0->getType());
11342cab237bSDimitry Andric   }
11352cab237bSDimitry Andric 
11364ba319b5SDimitry Andric   LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
11372cab237bSDimitry Andric                     << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
11382cab237bSDimitry Andric   replaceCall(nval);
11392cab237bSDimitry Andric 
11402cab237bSDimitry Andric   return true;
11412cab237bSDimitry Andric }
11422cab237bSDimitry Andric 
fold_rootn(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)11432cab237bSDimitry Andric bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
11442cab237bSDimitry Andric                                 const FuncInfo &FInfo) {
11452cab237bSDimitry Andric   Value *opr0 = CI->getArgOperand(0);
11462cab237bSDimitry Andric   Value *opr1 = CI->getArgOperand(1);
11472cab237bSDimitry Andric 
11482cab237bSDimitry Andric   ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
11492cab237bSDimitry Andric   if (!CINT) {
11502cab237bSDimitry Andric     return false;
11512cab237bSDimitry Andric   }
11522cab237bSDimitry Andric   int ci_opr1 = (int)CINT->getSExtValue();
11532cab237bSDimitry Andric   if (ci_opr1 == 1) {  // rootn(x, 1) = x
11544ba319b5SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
11552cab237bSDimitry Andric     replaceCall(opr0);
11562cab237bSDimitry Andric     return true;
11572cab237bSDimitry Andric   }
11582cab237bSDimitry Andric   if (ci_opr1 == 2) {  // rootn(x, 2) = sqrt(x)
11592cab237bSDimitry Andric     std::vector<const Type*> ParamsTys;
11602cab237bSDimitry Andric     ParamsTys.push_back(opr0->getType());
11612cab237bSDimitry Andric     Module *M = CI->getModule();
11622cab237bSDimitry Andric     if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT,
11632cab237bSDimitry Andric                                                         FInfo))) {
11644ba319b5SDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
11652cab237bSDimitry Andric       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
11662cab237bSDimitry Andric       replaceCall(nval);
11672cab237bSDimitry Andric       return true;
11682cab237bSDimitry Andric     }
11692cab237bSDimitry Andric   } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
11702cab237bSDimitry Andric     Module *M = CI->getModule();
11712cab237bSDimitry Andric     if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT,
11722cab237bSDimitry Andric                                                         FInfo))) {
11734ba319b5SDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
11742cab237bSDimitry Andric       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
11752cab237bSDimitry Andric       replaceCall(nval);
11762cab237bSDimitry Andric       return true;
11772cab237bSDimitry Andric     }
11782cab237bSDimitry Andric   } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
11794ba319b5SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
11802cab237bSDimitry Andric     Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
11812cab237bSDimitry Andric                                opr0,
11822cab237bSDimitry Andric                                "__rootn2div");
11832cab237bSDimitry Andric     replaceCall(nval);
11842cab237bSDimitry Andric     return true;
11852cab237bSDimitry Andric   } else if (ci_opr1 == -2) {  // rootn(x, -2) = rsqrt(x)
11862cab237bSDimitry Andric     std::vector<const Type*> ParamsTys;
11872cab237bSDimitry Andric     ParamsTys.push_back(opr0->getType());
11882cab237bSDimitry Andric     Module *M = CI->getModule();
11892cab237bSDimitry Andric     if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT,
11902cab237bSDimitry Andric                                                         FInfo))) {
11914ba319b5SDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
11924ba319b5SDimitry Andric                         << ")\n");
11932cab237bSDimitry Andric       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
11942cab237bSDimitry Andric       replaceCall(nval);
11952cab237bSDimitry Andric       return true;
11962cab237bSDimitry Andric     }
11972cab237bSDimitry Andric   }
11982cab237bSDimitry Andric   return false;
11992cab237bSDimitry Andric }
12002cab237bSDimitry Andric 
fold_fma_mad(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)12012cab237bSDimitry Andric bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
12022cab237bSDimitry Andric                                   const FuncInfo &FInfo) {
12032cab237bSDimitry Andric   Value *opr0 = CI->getArgOperand(0);
12042cab237bSDimitry Andric   Value *opr1 = CI->getArgOperand(1);
12052cab237bSDimitry Andric   Value *opr2 = CI->getArgOperand(2);
12062cab237bSDimitry Andric 
12072cab237bSDimitry Andric   ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
12082cab237bSDimitry Andric   ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
12092cab237bSDimitry Andric   if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
12102cab237bSDimitry Andric     // fma/mad(a, b, c) = c if a=0 || b=0
12114ba319b5SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
12122cab237bSDimitry Andric     replaceCall(opr2);
12132cab237bSDimitry Andric     return true;
12142cab237bSDimitry Andric   }
12152cab237bSDimitry Andric   if (CF0 && CF0->isExactlyValue(1.0f)) {
12162cab237bSDimitry Andric     // fma/mad(a, b, c) = b+c if a=1
12174ba319b5SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
12184ba319b5SDimitry Andric                       << "\n");
12192cab237bSDimitry Andric     Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
12202cab237bSDimitry Andric     replaceCall(nval);
12212cab237bSDimitry Andric     return true;
12222cab237bSDimitry Andric   }
12232cab237bSDimitry Andric   if (CF1 && CF1->isExactlyValue(1.0f)) {
12242cab237bSDimitry Andric     // fma/mad(a, b, c) = a+c if b=1
12254ba319b5SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
12264ba319b5SDimitry Andric                       << "\n");
12272cab237bSDimitry Andric     Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
12282cab237bSDimitry Andric     replaceCall(nval);
12292cab237bSDimitry Andric     return true;
12302cab237bSDimitry Andric   }
12312cab237bSDimitry Andric   if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
12322cab237bSDimitry Andric     if (CF->isZero()) {
12332cab237bSDimitry Andric       // fma/mad(a, b, c) = a*b if c=0
12344ba319b5SDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
12354ba319b5SDimitry Andric                         << *opr1 << "\n");
12362cab237bSDimitry Andric       Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
12372cab237bSDimitry Andric       replaceCall(nval);
12382cab237bSDimitry Andric       return true;
12392cab237bSDimitry Andric     }
12402cab237bSDimitry Andric   }
12412cab237bSDimitry Andric 
12422cab237bSDimitry Andric   return false;
12432cab237bSDimitry Andric }
12442cab237bSDimitry Andric 
12452cab237bSDimitry Andric // Get a scalar native builtin signle argument FP function
getNativeFunction(Module * M,const FuncInfo & FInfo)12462cab237bSDimitry Andric Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) {
12472cab237bSDimitry Andric   if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
12482cab237bSDimitry Andric     return nullptr;
12492cab237bSDimitry Andric   FuncInfo nf = FInfo;
12502cab237bSDimitry Andric   nf.setPrefix(AMDGPULibFunc::NATIVE);
12512cab237bSDimitry Andric   return getFunction(M, nf);
12522cab237bSDimitry Andric }
12532cab237bSDimitry Andric 
12542cab237bSDimitry Andric // fold sqrt -> native_sqrt (x)
fold_sqrt(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)12552cab237bSDimitry Andric bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
12562cab237bSDimitry Andric                                const FuncInfo &FInfo) {
12572cab237bSDimitry Andric   if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
12582cab237bSDimitry Andric       (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
12592cab237bSDimitry Andric     if (Constant *FPExpr = getNativeFunction(
12602cab237bSDimitry Andric         CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
12612cab237bSDimitry Andric       Value *opr0 = CI->getArgOperand(0);
12624ba319b5SDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
12632cab237bSDimitry Andric                         << "sqrt(" << *opr0 << ")\n");
12642cab237bSDimitry Andric       Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
12652cab237bSDimitry Andric       replaceCall(nval);
12662cab237bSDimitry Andric       return true;
12672cab237bSDimitry Andric     }
12682cab237bSDimitry Andric   }
12692cab237bSDimitry Andric   return false;
12702cab237bSDimitry Andric }
12712cab237bSDimitry Andric 
12722cab237bSDimitry Andric // fold sin, cos -> sincos.
fold_sincos(CallInst * CI,IRBuilder<> & B,AliasAnalysis * AA)12732cab237bSDimitry Andric bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
12742cab237bSDimitry Andric                                  AliasAnalysis *AA) {
12752cab237bSDimitry Andric   AMDGPULibFunc fInfo;
12762cab237bSDimitry Andric   if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
12772cab237bSDimitry Andric     return false;
12782cab237bSDimitry Andric 
12792cab237bSDimitry Andric   assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
12802cab237bSDimitry Andric          fInfo.getId() == AMDGPULibFunc::EI_COS);
12812cab237bSDimitry Andric   bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
12822cab237bSDimitry Andric 
12832cab237bSDimitry Andric   Value *CArgVal = CI->getArgOperand(0);
12842cab237bSDimitry Andric   BasicBlock * const CBB = CI->getParent();
12852cab237bSDimitry Andric 
12862cab237bSDimitry Andric   int const MaxScan = 30;
12872cab237bSDimitry Andric 
12882cab237bSDimitry Andric   { // fold in load value.
12892cab237bSDimitry Andric     LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
12902cab237bSDimitry Andric     if (LI && LI->getParent() == CBB) {
12912cab237bSDimitry Andric       BasicBlock::iterator BBI = LI->getIterator();
12922cab237bSDimitry Andric       Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
12932cab237bSDimitry Andric       if (AvailableVal) {
12942cab237bSDimitry Andric         CArgVal->replaceAllUsesWith(AvailableVal);
12952cab237bSDimitry Andric         if (CArgVal->getNumUses() == 0)
12962cab237bSDimitry Andric           LI->eraseFromParent();
12972cab237bSDimitry Andric         CArgVal = CI->getArgOperand(0);
12982cab237bSDimitry Andric       }
12992cab237bSDimitry Andric     }
13002cab237bSDimitry Andric   }
13012cab237bSDimitry Andric 
13022cab237bSDimitry Andric   Module *M = CI->getModule();
13032cab237bSDimitry Andric   fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
13042cab237bSDimitry Andric   std::string const PairName = fInfo.mangle();
13052cab237bSDimitry Andric 
13062cab237bSDimitry Andric   CallInst *UI = nullptr;
13072cab237bSDimitry Andric   for (User* U : CArgVal->users()) {
13082cab237bSDimitry Andric     CallInst *XI = dyn_cast_or_null<CallInst>(U);
13092cab237bSDimitry Andric     if (!XI || XI == CI || XI->getParent() != CBB)
13102cab237bSDimitry Andric       continue;
13112cab237bSDimitry Andric 
13122cab237bSDimitry Andric     Function *UCallee = XI->getCalledFunction();
13132cab237bSDimitry Andric     if (!UCallee || !UCallee->getName().equals(PairName))
13142cab237bSDimitry Andric       continue;
13152cab237bSDimitry Andric 
13162cab237bSDimitry Andric     BasicBlock::iterator BBI = CI->getIterator();
13172cab237bSDimitry Andric     if (BBI == CI->getParent()->begin())
13182cab237bSDimitry Andric       break;
13192cab237bSDimitry Andric     --BBI;
13202cab237bSDimitry Andric     for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
13212cab237bSDimitry Andric       if (cast<Instruction>(BBI) == XI) {
13222cab237bSDimitry Andric         UI = XI;
13232cab237bSDimitry Andric         break;
13242cab237bSDimitry Andric       }
13252cab237bSDimitry Andric     }
13262cab237bSDimitry Andric     if (UI) break;
13272cab237bSDimitry Andric   }
13282cab237bSDimitry Andric 
13292cab237bSDimitry Andric   if (!UI) return false;
13302cab237bSDimitry Andric 
13312cab237bSDimitry Andric   // Merge the sin and cos.
13322cab237bSDimitry Andric 
13332cab237bSDimitry Andric   // for OpenCL 2.0 we have only generic implementation of sincos
13342cab237bSDimitry Andric   // function.
13352cab237bSDimitry Andric   AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
1336*b5893f02SDimitry Andric   nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
13372cab237bSDimitry Andric   Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
13382cab237bSDimitry Andric   if (!Fsincos) return false;
13392cab237bSDimitry Andric 
13402cab237bSDimitry Andric   BasicBlock::iterator ItOld = B.GetInsertPoint();
13412cab237bSDimitry Andric   AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
13422cab237bSDimitry Andric   B.SetInsertPoint(UI);
13432cab237bSDimitry Andric 
13442cab237bSDimitry Andric   Value *P = Alloc;
13452cab237bSDimitry Andric   Type *PTy = Fsincos->getFunctionType()->getParamType(1);
13462cab237bSDimitry Andric   // The allocaInst allocates the memory in private address space. This need
13472cab237bSDimitry Andric   // to be bitcasted to point to the address space of cos pointer type.
13482cab237bSDimitry Andric   // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1349*b5893f02SDimitry Andric   if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
13502cab237bSDimitry Andric     P = B.CreateAddrSpaceCast(Alloc, PTy);
13512cab237bSDimitry Andric   CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
13522cab237bSDimitry Andric 
13534ba319b5SDimitry Andric   LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
13544ba319b5SDimitry Andric                     << *Call << "\n");
13552cab237bSDimitry Andric 
13562cab237bSDimitry Andric   if (!isSin) { // CI->cos, UI->sin
13572cab237bSDimitry Andric     B.SetInsertPoint(&*ItOld);
13582cab237bSDimitry Andric     UI->replaceAllUsesWith(&*Call);
13592cab237bSDimitry Andric     Instruction *Reload = B.CreateLoad(Alloc);
13602cab237bSDimitry Andric     CI->replaceAllUsesWith(Reload);
13612cab237bSDimitry Andric     UI->eraseFromParent();
13622cab237bSDimitry Andric     CI->eraseFromParent();
13632cab237bSDimitry Andric   } else { // CI->sin, UI->cos
13642cab237bSDimitry Andric     Instruction *Reload = B.CreateLoad(Alloc);
13652cab237bSDimitry Andric     UI->replaceAllUsesWith(Reload);
13662cab237bSDimitry Andric     CI->replaceAllUsesWith(Call);
13672cab237bSDimitry Andric     UI->eraseFromParent();
13682cab237bSDimitry Andric     CI->eraseFromParent();
13692cab237bSDimitry Andric   }
13702cab237bSDimitry Andric   return true;
13712cab237bSDimitry Andric }
13722cab237bSDimitry Andric 
13732cab237bSDimitry Andric // Get insertion point at entry.
getEntryIns(CallInst * UI)13742cab237bSDimitry Andric BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
13752cab237bSDimitry Andric   Function * Func = UI->getParent()->getParent();
13762cab237bSDimitry Andric   BasicBlock * BB = &Func->getEntryBlock();
13772cab237bSDimitry Andric   assert(BB && "Entry block not found!");
13782cab237bSDimitry Andric   BasicBlock::iterator ItNew = BB->begin();
13792cab237bSDimitry Andric   return ItNew;
13802cab237bSDimitry Andric }
13812cab237bSDimitry Andric 
13822cab237bSDimitry Andric // Insert a AllocsInst at the beginning of function entry block.
insertAlloca(CallInst * UI,IRBuilder<> & B,const char * prefix)13832cab237bSDimitry Andric AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
13842cab237bSDimitry Andric                                          const char *prefix) {
13852cab237bSDimitry Andric   BasicBlock::iterator ItNew = getEntryIns(UI);
13862cab237bSDimitry Andric   Function *UCallee = UI->getCalledFunction();
13872cab237bSDimitry Andric   Type *RetType = UCallee->getReturnType();
13882cab237bSDimitry Andric   B.SetInsertPoint(&*ItNew);
13892cab237bSDimitry Andric   AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
13902cab237bSDimitry Andric     std::string(prefix) + UI->getName());
13912cab237bSDimitry Andric   Alloc->setAlignment(UCallee->getParent()->getDataLayout()
13922cab237bSDimitry Andric                        .getTypeAllocSize(RetType));
13932cab237bSDimitry Andric   return Alloc;
13942cab237bSDimitry Andric }
13952cab237bSDimitry Andric 
evaluateScalarMathFunc(FuncInfo & FInfo,double & Res0,double & Res1,Constant * copr0,Constant * copr1,Constant * copr2)13962cab237bSDimitry Andric bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
13972cab237bSDimitry Andric                                             double& Res0, double& Res1,
13982cab237bSDimitry Andric                                             Constant *copr0, Constant *copr1,
13992cab237bSDimitry Andric                                             Constant *copr2) {
14002cab237bSDimitry Andric   // By default, opr0/opr1/opr3 holds values of float/double type.
14012cab237bSDimitry Andric   // If they are not float/double, each function has to its
14022cab237bSDimitry Andric   // operand separately.
14032cab237bSDimitry Andric   double opr0=0.0, opr1=0.0, opr2=0.0;
14042cab237bSDimitry Andric   ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
14052cab237bSDimitry Andric   ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
14062cab237bSDimitry Andric   ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
14072cab237bSDimitry Andric   if (fpopr0) {
14082cab237bSDimitry Andric     opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14092cab237bSDimitry Andric              ? fpopr0->getValueAPF().convertToDouble()
14102cab237bSDimitry Andric              : (double)fpopr0->getValueAPF().convertToFloat();
14112cab237bSDimitry Andric   }
14122cab237bSDimitry Andric 
14132cab237bSDimitry Andric   if (fpopr1) {
14142cab237bSDimitry Andric     opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14152cab237bSDimitry Andric              ? fpopr1->getValueAPF().convertToDouble()
14162cab237bSDimitry Andric              : (double)fpopr1->getValueAPF().convertToFloat();
14172cab237bSDimitry Andric   }
14182cab237bSDimitry Andric 
14192cab237bSDimitry Andric   if (fpopr2) {
14202cab237bSDimitry Andric     opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14212cab237bSDimitry Andric              ? fpopr2->getValueAPF().convertToDouble()
14222cab237bSDimitry Andric              : (double)fpopr2->getValueAPF().convertToFloat();
14232cab237bSDimitry Andric   }
14242cab237bSDimitry Andric 
14252cab237bSDimitry Andric   switch (FInfo.getId()) {
14262cab237bSDimitry Andric   default : return false;
14272cab237bSDimitry Andric 
14282cab237bSDimitry Andric   case AMDGPULibFunc::EI_ACOS:
14292cab237bSDimitry Andric     Res0 = acos(opr0);
14302cab237bSDimitry Andric     return true;
14312cab237bSDimitry Andric 
14322cab237bSDimitry Andric   case AMDGPULibFunc::EI_ACOSH:
14332cab237bSDimitry Andric     // acosh(x) == log(x + sqrt(x*x - 1))
14342cab237bSDimitry Andric     Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
14352cab237bSDimitry Andric     return true;
14362cab237bSDimitry Andric 
14372cab237bSDimitry Andric   case AMDGPULibFunc::EI_ACOSPI:
14382cab237bSDimitry Andric     Res0 = acos(opr0) / MATH_PI;
14392cab237bSDimitry Andric     return true;
14402cab237bSDimitry Andric 
14412cab237bSDimitry Andric   case AMDGPULibFunc::EI_ASIN:
14422cab237bSDimitry Andric     Res0 = asin(opr0);
14432cab237bSDimitry Andric     return true;
14442cab237bSDimitry Andric 
14452cab237bSDimitry Andric   case AMDGPULibFunc::EI_ASINH:
14462cab237bSDimitry Andric     // asinh(x) == log(x + sqrt(x*x + 1))
14472cab237bSDimitry Andric     Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
14482cab237bSDimitry Andric     return true;
14492cab237bSDimitry Andric 
14502cab237bSDimitry Andric   case AMDGPULibFunc::EI_ASINPI:
14512cab237bSDimitry Andric     Res0 = asin(opr0) / MATH_PI;
14522cab237bSDimitry Andric     return true;
14532cab237bSDimitry Andric 
14542cab237bSDimitry Andric   case AMDGPULibFunc::EI_ATAN:
14552cab237bSDimitry Andric     Res0 = atan(opr0);
14562cab237bSDimitry Andric     return true;
14572cab237bSDimitry Andric 
14582cab237bSDimitry Andric   case AMDGPULibFunc::EI_ATANH:
14592cab237bSDimitry Andric     // atanh(x) == (log(x+1) - log(x-1))/2;
14602cab237bSDimitry Andric     Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
14612cab237bSDimitry Andric     return true;
14622cab237bSDimitry Andric 
14632cab237bSDimitry Andric   case AMDGPULibFunc::EI_ATANPI:
14642cab237bSDimitry Andric     Res0 = atan(opr0) / MATH_PI;
14652cab237bSDimitry Andric     return true;
14662cab237bSDimitry Andric 
14672cab237bSDimitry Andric   case AMDGPULibFunc::EI_CBRT:
14682cab237bSDimitry Andric     Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
14692cab237bSDimitry Andric     return true;
14702cab237bSDimitry Andric 
14712cab237bSDimitry Andric   case AMDGPULibFunc::EI_COS:
14722cab237bSDimitry Andric     Res0 = cos(opr0);
14732cab237bSDimitry Andric     return true;
14742cab237bSDimitry Andric 
14752cab237bSDimitry Andric   case AMDGPULibFunc::EI_COSH:
14762cab237bSDimitry Andric     Res0 = cosh(opr0);
14772cab237bSDimitry Andric     return true;
14782cab237bSDimitry Andric 
14792cab237bSDimitry Andric   case AMDGPULibFunc::EI_COSPI:
14802cab237bSDimitry Andric     Res0 = cos(MATH_PI * opr0);
14812cab237bSDimitry Andric     return true;
14822cab237bSDimitry Andric 
14832cab237bSDimitry Andric   case AMDGPULibFunc::EI_EXP:
14842cab237bSDimitry Andric     Res0 = exp(opr0);
14852cab237bSDimitry Andric     return true;
14862cab237bSDimitry Andric 
14872cab237bSDimitry Andric   case AMDGPULibFunc::EI_EXP2:
14882cab237bSDimitry Andric     Res0 = pow(2.0, opr0);
14892cab237bSDimitry Andric     return true;
14902cab237bSDimitry Andric 
14912cab237bSDimitry Andric   case AMDGPULibFunc::EI_EXP10:
14922cab237bSDimitry Andric     Res0 = pow(10.0, opr0);
14932cab237bSDimitry Andric     return true;
14942cab237bSDimitry Andric 
14952cab237bSDimitry Andric   case AMDGPULibFunc::EI_EXPM1:
14962cab237bSDimitry Andric     Res0 = exp(opr0) - 1.0;
14972cab237bSDimitry Andric     return true;
14982cab237bSDimitry Andric 
14992cab237bSDimitry Andric   case AMDGPULibFunc::EI_LOG:
15002cab237bSDimitry Andric     Res0 = log(opr0);
15012cab237bSDimitry Andric     return true;
15022cab237bSDimitry Andric 
15032cab237bSDimitry Andric   case AMDGPULibFunc::EI_LOG2:
15042cab237bSDimitry Andric     Res0 = log(opr0) / log(2.0);
15052cab237bSDimitry Andric     return true;
15062cab237bSDimitry Andric 
15072cab237bSDimitry Andric   case AMDGPULibFunc::EI_LOG10:
15082cab237bSDimitry Andric     Res0 = log(opr0) / log(10.0);
15092cab237bSDimitry Andric     return true;
15102cab237bSDimitry Andric 
15112cab237bSDimitry Andric   case AMDGPULibFunc::EI_RSQRT:
15122cab237bSDimitry Andric     Res0 = 1.0 / sqrt(opr0);
15132cab237bSDimitry Andric     return true;
15142cab237bSDimitry Andric 
15152cab237bSDimitry Andric   case AMDGPULibFunc::EI_SIN:
15162cab237bSDimitry Andric     Res0 = sin(opr0);
15172cab237bSDimitry Andric     return true;
15182cab237bSDimitry Andric 
15192cab237bSDimitry Andric   case AMDGPULibFunc::EI_SINH:
15202cab237bSDimitry Andric     Res0 = sinh(opr0);
15212cab237bSDimitry Andric     return true;
15222cab237bSDimitry Andric 
15232cab237bSDimitry Andric   case AMDGPULibFunc::EI_SINPI:
15242cab237bSDimitry Andric     Res0 = sin(MATH_PI * opr0);
15252cab237bSDimitry Andric     return true;
15262cab237bSDimitry Andric 
15272cab237bSDimitry Andric   case AMDGPULibFunc::EI_SQRT:
15282cab237bSDimitry Andric     Res0 = sqrt(opr0);
15292cab237bSDimitry Andric     return true;
15302cab237bSDimitry Andric 
15312cab237bSDimitry Andric   case AMDGPULibFunc::EI_TAN:
15322cab237bSDimitry Andric     Res0 = tan(opr0);
15332cab237bSDimitry Andric     return true;
15342cab237bSDimitry Andric 
15352cab237bSDimitry Andric   case AMDGPULibFunc::EI_TANH:
15362cab237bSDimitry Andric     Res0 = tanh(opr0);
15372cab237bSDimitry Andric     return true;
15382cab237bSDimitry Andric 
15392cab237bSDimitry Andric   case AMDGPULibFunc::EI_TANPI:
15402cab237bSDimitry Andric     Res0 = tan(MATH_PI * opr0);
15412cab237bSDimitry Andric     return true;
15422cab237bSDimitry Andric 
15432cab237bSDimitry Andric   case AMDGPULibFunc::EI_RECIP:
15442cab237bSDimitry Andric     Res0 = 1.0 / opr0;
15452cab237bSDimitry Andric     return true;
15462cab237bSDimitry Andric 
15472cab237bSDimitry Andric   // two-arg functions
15482cab237bSDimitry Andric   case AMDGPULibFunc::EI_DIVIDE:
15492cab237bSDimitry Andric     Res0 = opr0 / opr1;
15502cab237bSDimitry Andric     return true;
15512cab237bSDimitry Andric 
15522cab237bSDimitry Andric   case AMDGPULibFunc::EI_POW:
15532cab237bSDimitry Andric   case AMDGPULibFunc::EI_POWR:
15542cab237bSDimitry Andric     Res0 = pow(opr0, opr1);
15552cab237bSDimitry Andric     return true;
15562cab237bSDimitry Andric 
15572cab237bSDimitry Andric   case AMDGPULibFunc::EI_POWN: {
15582cab237bSDimitry Andric     if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
15592cab237bSDimitry Andric       double val = (double)iopr1->getSExtValue();
15602cab237bSDimitry Andric       Res0 = pow(opr0, val);
15612cab237bSDimitry Andric       return true;
15622cab237bSDimitry Andric     }
15632cab237bSDimitry Andric     return false;
15642cab237bSDimitry Andric   }
15652cab237bSDimitry Andric 
15662cab237bSDimitry Andric   case AMDGPULibFunc::EI_ROOTN: {
15672cab237bSDimitry Andric     if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
15682cab237bSDimitry Andric       double val = (double)iopr1->getSExtValue();
15692cab237bSDimitry Andric       Res0 = pow(opr0, 1.0 / val);
15702cab237bSDimitry Andric       return true;
15712cab237bSDimitry Andric     }
15722cab237bSDimitry Andric     return false;
15732cab237bSDimitry Andric   }
15742cab237bSDimitry Andric 
15752cab237bSDimitry Andric   // with ptr arg
15762cab237bSDimitry Andric   case AMDGPULibFunc::EI_SINCOS:
15772cab237bSDimitry Andric     Res0 = sin(opr0);
15782cab237bSDimitry Andric     Res1 = cos(opr0);
15792cab237bSDimitry Andric     return true;
15802cab237bSDimitry Andric 
15812cab237bSDimitry Andric   // three-arg functions
15822cab237bSDimitry Andric   case AMDGPULibFunc::EI_FMA:
15832cab237bSDimitry Andric   case AMDGPULibFunc::EI_MAD:
15842cab237bSDimitry Andric     Res0 = opr0 * opr1 + opr2;
15852cab237bSDimitry Andric     return true;
15862cab237bSDimitry Andric   }
15872cab237bSDimitry Andric 
15882cab237bSDimitry Andric   return false;
15892cab237bSDimitry Andric }
15902cab237bSDimitry Andric 
evaluateCall(CallInst * aCI,FuncInfo & FInfo)15912cab237bSDimitry Andric bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
15922cab237bSDimitry Andric   int numArgs = (int)aCI->getNumArgOperands();
15932cab237bSDimitry Andric   if (numArgs > 3)
15942cab237bSDimitry Andric     return false;
15952cab237bSDimitry Andric 
15962cab237bSDimitry Andric   Constant *copr0 = nullptr;
15972cab237bSDimitry Andric   Constant *copr1 = nullptr;
15982cab237bSDimitry Andric   Constant *copr2 = nullptr;
15992cab237bSDimitry Andric   if (numArgs > 0) {
16002cab237bSDimitry Andric     if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
16012cab237bSDimitry Andric       return false;
16022cab237bSDimitry Andric   }
16032cab237bSDimitry Andric 
16042cab237bSDimitry Andric   if (numArgs > 1) {
16052cab237bSDimitry Andric     if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
16062cab237bSDimitry Andric       if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
16072cab237bSDimitry Andric         return false;
16082cab237bSDimitry Andric     }
16092cab237bSDimitry Andric   }
16102cab237bSDimitry Andric 
16112cab237bSDimitry Andric   if (numArgs > 2) {
16122cab237bSDimitry Andric     if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
16132cab237bSDimitry Andric       return false;
16142cab237bSDimitry Andric   }
16152cab237bSDimitry Andric 
16162cab237bSDimitry Andric   // At this point, all arguments to aCI are constants.
16172cab237bSDimitry Andric 
16182cab237bSDimitry Andric   // max vector size is 16, and sincos will generate two results.
16192cab237bSDimitry Andric   double DVal0[16], DVal1[16];
16202cab237bSDimitry Andric   bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
16212cab237bSDimitry Andric   if (getVecSize(FInfo) == 1) {
16222cab237bSDimitry Andric     if (!evaluateScalarMathFunc(FInfo, DVal0[0],
16232cab237bSDimitry Andric                                 DVal1[0], copr0, copr1, copr2)) {
16242cab237bSDimitry Andric       return false;
16252cab237bSDimitry Andric     }
16262cab237bSDimitry Andric   } else {
16272cab237bSDimitry Andric     ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
16282cab237bSDimitry Andric     ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
16292cab237bSDimitry Andric     ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
16302cab237bSDimitry Andric     for (int i=0; i < getVecSize(FInfo); ++i) {
16312cab237bSDimitry Andric       Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
16322cab237bSDimitry Andric       Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
16332cab237bSDimitry Andric       Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
16342cab237bSDimitry Andric       if (!evaluateScalarMathFunc(FInfo, DVal0[i],
16352cab237bSDimitry Andric                                   DVal1[i], celt0, celt1, celt2)) {
16362cab237bSDimitry Andric         return false;
16372cab237bSDimitry Andric       }
16382cab237bSDimitry Andric     }
16392cab237bSDimitry Andric   }
16402cab237bSDimitry Andric 
16412cab237bSDimitry Andric   LLVMContext &context = CI->getParent()->getParent()->getContext();
16422cab237bSDimitry Andric   Constant *nval0, *nval1;
16432cab237bSDimitry Andric   if (getVecSize(FInfo) == 1) {
16442cab237bSDimitry Andric     nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
16452cab237bSDimitry Andric     if (hasTwoResults)
16462cab237bSDimitry Andric       nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
16472cab237bSDimitry Andric   } else {
16482cab237bSDimitry Andric     if (getArgType(FInfo) == AMDGPULibFunc::F32) {
16492cab237bSDimitry Andric       SmallVector <float, 0> FVal0, FVal1;
16502cab237bSDimitry Andric       for (int i=0; i < getVecSize(FInfo); ++i)
16512cab237bSDimitry Andric         FVal0.push_back((float)DVal0[i]);
16522cab237bSDimitry Andric       ArrayRef<float> tmp0(FVal0);
16532cab237bSDimitry Andric       nval0 = ConstantDataVector::get(context, tmp0);
16542cab237bSDimitry Andric       if (hasTwoResults) {
16552cab237bSDimitry Andric         for (int i=0; i < getVecSize(FInfo); ++i)
16562cab237bSDimitry Andric           FVal1.push_back((float)DVal1[i]);
16572cab237bSDimitry Andric         ArrayRef<float> tmp1(FVal1);
16582cab237bSDimitry Andric         nval1 = ConstantDataVector::get(context, tmp1);
16592cab237bSDimitry Andric       }
16602cab237bSDimitry Andric     } else {
16612cab237bSDimitry Andric       ArrayRef<double> tmp0(DVal0);
16622cab237bSDimitry Andric       nval0 = ConstantDataVector::get(context, tmp0);
16632cab237bSDimitry Andric       if (hasTwoResults) {
16642cab237bSDimitry Andric         ArrayRef<double> tmp1(DVal1);
16652cab237bSDimitry Andric         nval1 = ConstantDataVector::get(context, tmp1);
16662cab237bSDimitry Andric       }
16672cab237bSDimitry Andric     }
16682cab237bSDimitry Andric   }
16692cab237bSDimitry Andric 
16702cab237bSDimitry Andric   if (hasTwoResults) {
16712cab237bSDimitry Andric     // sincos
16722cab237bSDimitry Andric     assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
16732cab237bSDimitry Andric            "math function with ptr arg not supported yet");
16742cab237bSDimitry Andric     new StoreInst(nval1, aCI->getArgOperand(1), aCI);
16752cab237bSDimitry Andric   }
16762cab237bSDimitry Andric 
16772cab237bSDimitry Andric   replaceCall(nval0);
16782cab237bSDimitry Andric   return true;
16792cab237bSDimitry Andric }
16802cab237bSDimitry Andric 
16812cab237bSDimitry Andric // Public interface to the Simplify LibCalls pass.
createAMDGPUSimplifyLibCallsPass(const TargetOptions & Opt)16822cab237bSDimitry Andric FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
16832cab237bSDimitry Andric   return new AMDGPUSimplifyLibCalls(Opt);
16842cab237bSDimitry Andric }
16852cab237bSDimitry Andric 
createAMDGPUUseNativeCallsPass()16862cab237bSDimitry Andric FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
16872cab237bSDimitry Andric   return new AMDGPUUseNativeCalls();
16882cab237bSDimitry Andric }
16892cab237bSDimitry Andric 
setFastFlags(Function & F,const TargetOptions & Options)16902cab237bSDimitry Andric static bool setFastFlags(Function &F, const TargetOptions &Options) {
16912cab237bSDimitry Andric   AttrBuilder B;
16922cab237bSDimitry Andric 
16932cab237bSDimitry Andric   if (Options.UnsafeFPMath || Options.NoInfsFPMath)
16942cab237bSDimitry Andric     B.addAttribute("no-infs-fp-math", "true");
16952cab237bSDimitry Andric   if (Options.UnsafeFPMath || Options.NoNaNsFPMath)
16962cab237bSDimitry Andric     B.addAttribute("no-nans-fp-math", "true");
16972cab237bSDimitry Andric   if (Options.UnsafeFPMath) {
16982cab237bSDimitry Andric     B.addAttribute("less-precise-fpmad", "true");
16992cab237bSDimitry Andric     B.addAttribute("unsafe-fp-math", "true");
17002cab237bSDimitry Andric   }
17012cab237bSDimitry Andric 
17022cab237bSDimitry Andric   if (!B.hasAttributes())
17032cab237bSDimitry Andric     return false;
17042cab237bSDimitry Andric 
17052cab237bSDimitry Andric   F.addAttributes(AttributeList::FunctionIndex, B);
17062cab237bSDimitry Andric 
17072cab237bSDimitry Andric   return true;
17082cab237bSDimitry Andric }
17092cab237bSDimitry Andric 
runOnFunction(Function & F)17102cab237bSDimitry Andric bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
17112cab237bSDimitry Andric   if (skipFunction(F))
17122cab237bSDimitry Andric     return false;
17132cab237bSDimitry Andric 
17142cab237bSDimitry Andric   bool Changed = false;
17152cab237bSDimitry Andric   auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
17162cab237bSDimitry Andric 
17174ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "AMDIC: process function ";
17184ba319b5SDimitry Andric              F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
17192cab237bSDimitry Andric 
17202cab237bSDimitry Andric   if (!EnablePreLink)
17212cab237bSDimitry Andric     Changed |= setFastFlags(F, Options);
17222cab237bSDimitry Andric 
17232cab237bSDimitry Andric   for (auto &BB : F) {
17242cab237bSDimitry Andric     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
17252cab237bSDimitry Andric       // Ignore non-calls.
17262cab237bSDimitry Andric       CallInst *CI = dyn_cast<CallInst>(I);
17272cab237bSDimitry Andric       ++I;
17282cab237bSDimitry Andric       if (!CI) continue;
17292cab237bSDimitry Andric 
17302cab237bSDimitry Andric       // Ignore indirect calls.
17312cab237bSDimitry Andric       Function *Callee = CI->getCalledFunction();
17322cab237bSDimitry Andric       if (Callee == 0) continue;
17332cab237bSDimitry Andric 
17344ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
17352cab237bSDimitry Andric                  dbgs().flush());
17362cab237bSDimitry Andric       if(Simplifier.fold(CI, AA))
17372cab237bSDimitry Andric         Changed = true;
17382cab237bSDimitry Andric     }
17392cab237bSDimitry Andric   }
17402cab237bSDimitry Andric   return Changed;
17412cab237bSDimitry Andric }
17422cab237bSDimitry Andric 
runOnFunction(Function & F)17432cab237bSDimitry Andric bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
17442cab237bSDimitry Andric   if (skipFunction(F) || UseNative.empty())
17452cab237bSDimitry Andric     return false;
17462cab237bSDimitry Andric 
17472cab237bSDimitry Andric   bool Changed = false;
17482cab237bSDimitry Andric   for (auto &BB : F) {
17492cab237bSDimitry Andric     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
17502cab237bSDimitry Andric       // Ignore non-calls.
17512cab237bSDimitry Andric       CallInst *CI = dyn_cast<CallInst>(I);
17522cab237bSDimitry Andric       ++I;
17532cab237bSDimitry Andric       if (!CI) continue;
17542cab237bSDimitry Andric 
17552cab237bSDimitry Andric       // Ignore indirect calls.
17562cab237bSDimitry Andric       Function *Callee = CI->getCalledFunction();
17572cab237bSDimitry Andric       if (Callee == 0) continue;
17582cab237bSDimitry Andric 
17592cab237bSDimitry Andric       if(Simplifier.useNative(CI))
17602cab237bSDimitry Andric         Changed = true;
17612cab237bSDimitry Andric     }
17622cab237bSDimitry Andric   }
17632cab237bSDimitry Andric   return Changed;
17642cab237bSDimitry Andric }
1765