17f37794eSStanislav Mekhanoshin //===- AMDGPULibCalls.cpp -------------------------------------------------===//
27f37794eSStanislav Mekhanoshin //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67f37794eSStanislav Mekhanoshin //
77f37794eSStanislav Mekhanoshin //===----------------------------------------------------------------------===//
87f37794eSStanislav Mekhanoshin //
97f37794eSStanislav Mekhanoshin /// \file
105f8f34e4SAdrian Prantl /// This file does AMD library function optimizations.
117f37794eSStanislav Mekhanoshin //
127f37794eSStanislav Mekhanoshin //===----------------------------------------------------------------------===//
137f37794eSStanislav Mekhanoshin 
147f37794eSStanislav Mekhanoshin #include "AMDGPU.h"
157f37794eSStanislav Mekhanoshin #include "AMDGPULibFunc.h"
16560d7e04Sdfukalov #include "GCNSubtarget.h"
177f37794eSStanislav Mekhanoshin #include "llvm/Analysis/AliasAnalysis.h"
187f37794eSStanislav Mekhanoshin #include "llvm/Analysis/Loads.h"
196a87e9b0Sdfukalov #include "llvm/IR/IntrinsicsAMDGPU.h"
2099142003SNikita Popov #include "llvm/IR/IRBuilder.h"
2105da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
22a9191c84SStanislav Mekhanoshin #include "llvm/Target/TargetMachine.h"
2305da2fe5SReid Kleckner 
2405da2fe5SReid Kleckner #define DEBUG_TYPE "amdgpu-simplifylib"
257f37794eSStanislav Mekhanoshin 
267f37794eSStanislav Mekhanoshin using namespace llvm;
277f37794eSStanislav Mekhanoshin 
287f37794eSStanislav Mekhanoshin static cl::opt<bool> EnablePreLink("amdgpu-prelink",
297f37794eSStanislav Mekhanoshin   cl::desc("Enable pre-link mode optimizations"),
307f37794eSStanislav Mekhanoshin   cl::init(false),
317f37794eSStanislav Mekhanoshin   cl::Hidden);
327f37794eSStanislav Mekhanoshin 
337f37794eSStanislav Mekhanoshin static cl::list<std::string> UseNative("amdgpu-use-native",
347f37794eSStanislav Mekhanoshin   cl::desc("Comma separated list of functions to replace with native, or all"),
357f37794eSStanislav Mekhanoshin   cl::CommaSeparated, cl::ValueOptional,
367f37794eSStanislav Mekhanoshin   cl::Hidden);
377f37794eSStanislav Mekhanoshin 
38c57a9dc4SEvandro Menezes #define MATH_PI      numbers::pi
39c57a9dc4SEvandro Menezes #define MATH_E       numbers::e
40c57a9dc4SEvandro Menezes #define MATH_SQRT2   numbers::sqrt2
41c57a9dc4SEvandro Menezes #define MATH_SQRT1_2 numbers::inv_sqrt2
427f37794eSStanislav Mekhanoshin 
437f37794eSStanislav Mekhanoshin namespace llvm {
447f37794eSStanislav Mekhanoshin 
457f37794eSStanislav Mekhanoshin class AMDGPULibCalls {
467f37794eSStanislav Mekhanoshin private:
477f37794eSStanislav Mekhanoshin 
487f37794eSStanislav Mekhanoshin   typedef llvm::AMDGPULibFunc FuncInfo;
497f37794eSStanislav Mekhanoshin 
50a9191c84SStanislav Mekhanoshin   const TargetMachine *TM;
51a9191c84SStanislav Mekhanoshin 
527f37794eSStanislav Mekhanoshin   // -fuse-native.
537f37794eSStanislav Mekhanoshin   bool AllNative = false;
547f37794eSStanislav Mekhanoshin 
557f37794eSStanislav Mekhanoshin   bool useNativeFunc(const StringRef F) const;
567f37794eSStanislav Mekhanoshin 
57*dc6e8dfdSJacob Lambert   // Return a pointer (pointer expr) to the function if function definition with
587f37794eSStanislav Mekhanoshin   // "FuncName" exists. It may create a new function prototype in pre-link mode.
5913680223SJames Y Knight   FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
607f37794eSStanislav Mekhanoshin 
617f37794eSStanislav Mekhanoshin   // Replace a normal function with its native version.
627f37794eSStanislav Mekhanoshin   bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
637f37794eSStanislav Mekhanoshin 
647f37794eSStanislav Mekhanoshin   bool parseFunctionName(const StringRef& FMangledName,
657f37794eSStanislav Mekhanoshin                          FuncInfo *FInfo=nullptr /*out*/);
667f37794eSStanislav Mekhanoshin 
677f37794eSStanislav Mekhanoshin   bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
687f37794eSStanislav Mekhanoshin 
697f37794eSStanislav Mekhanoshin   /* Specialized optimizations */
707f37794eSStanislav Mekhanoshin 
717f37794eSStanislav Mekhanoshin   // recip (half or native)
727f37794eSStanislav Mekhanoshin   bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
737f37794eSStanislav Mekhanoshin 
747f37794eSStanislav Mekhanoshin   // divide (half or native)
757f37794eSStanislav Mekhanoshin   bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
767f37794eSStanislav Mekhanoshin 
777f37794eSStanislav Mekhanoshin   // pow/powr/pown
787f37794eSStanislav Mekhanoshin   bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
797f37794eSStanislav Mekhanoshin 
807f37794eSStanislav Mekhanoshin   // rootn
817f37794eSStanislav Mekhanoshin   bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
827f37794eSStanislav Mekhanoshin 
837f37794eSStanislav Mekhanoshin   // fma/mad
847f37794eSStanislav Mekhanoshin   bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
857f37794eSStanislav Mekhanoshin 
867f37794eSStanislav Mekhanoshin   // -fuse-native for sincos
877f37794eSStanislav Mekhanoshin   bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
887f37794eSStanislav Mekhanoshin 
897f37794eSStanislav Mekhanoshin   // evaluate calls if calls' arguments are constants.
907f37794eSStanislav Mekhanoshin   bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
917f37794eSStanislav Mekhanoshin     double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
927f37794eSStanislav Mekhanoshin   bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);
937f37794eSStanislav Mekhanoshin 
947f37794eSStanislav Mekhanoshin   // exp
957f37794eSStanislav Mekhanoshin   bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
967f37794eSStanislav Mekhanoshin 
977f37794eSStanislav Mekhanoshin   // exp2
987f37794eSStanislav Mekhanoshin   bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
997f37794eSStanislav Mekhanoshin 
1007f37794eSStanislav Mekhanoshin   // exp10
1017f37794eSStanislav Mekhanoshin   bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1027f37794eSStanislav Mekhanoshin 
1037f37794eSStanislav Mekhanoshin   // log
1047f37794eSStanislav Mekhanoshin   bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1057f37794eSStanislav Mekhanoshin 
1067f37794eSStanislav Mekhanoshin   // log2
1077f37794eSStanislav Mekhanoshin   bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1087f37794eSStanislav Mekhanoshin 
1097f37794eSStanislav Mekhanoshin   // log10
1107f37794eSStanislav Mekhanoshin   bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1117f37794eSStanislav Mekhanoshin 
1127f37794eSStanislav Mekhanoshin   // sqrt
1137f37794eSStanislav Mekhanoshin   bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1147f37794eSStanislav Mekhanoshin 
1157f37794eSStanislav Mekhanoshin   // sin/cos
1167f37794eSStanislav Mekhanoshin   bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
1177f37794eSStanislav Mekhanoshin 
118fc5121a7SYaxun Liu   // __read_pipe/__write_pipe
119fc5121a7SYaxun Liu   bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
120fc5121a7SYaxun Liu 
121a9191c84SStanislav Mekhanoshin   // llvm.amdgcn.wavefrontsize
122a9191c84SStanislav Mekhanoshin   bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
123a9191c84SStanislav Mekhanoshin 
1247f37794eSStanislav Mekhanoshin   // Get insertion point at entry.
1257f37794eSStanislav Mekhanoshin   BasicBlock::iterator getEntryIns(CallInst * UI);
1267f37794eSStanislav Mekhanoshin   // Insert an Alloc instruction.
1277f37794eSStanislav Mekhanoshin   AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
1287f37794eSStanislav Mekhanoshin   // Get a scalar native builtin signle argument FP function
12913680223SJames Y Knight   FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
1307f37794eSStanislav Mekhanoshin 
1317f37794eSStanislav Mekhanoshin protected:
1327f37794eSStanislav Mekhanoshin   CallInst *CI;
1337f37794eSStanislav Mekhanoshin 
1347f37794eSStanislav Mekhanoshin   bool isUnsafeMath(const CallInst *CI) const;
1357f37794eSStanislav Mekhanoshin 
1367f37794eSStanislav Mekhanoshin   void replaceCall(Value *With) {
1377f37794eSStanislav Mekhanoshin     CI->replaceAllUsesWith(With);
1387f37794eSStanislav Mekhanoshin     CI->eraseFromParent();
1397f37794eSStanislav Mekhanoshin   }
1407f37794eSStanislav Mekhanoshin 
1417f37794eSStanislav Mekhanoshin public:
142a9191c84SStanislav Mekhanoshin   AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
143a9191c84SStanislav Mekhanoshin 
1447f37794eSStanislav Mekhanoshin   bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
1457f37794eSStanislav Mekhanoshin 
1467f37794eSStanislav Mekhanoshin   void initNativeFuncs();
1477f37794eSStanislav Mekhanoshin 
1487f37794eSStanislav Mekhanoshin   // Replace a normal math function call with that native version
1497f37794eSStanislav Mekhanoshin   bool useNative(CallInst *CI);
1507f37794eSStanislav Mekhanoshin };
1517f37794eSStanislav Mekhanoshin 
1527f37794eSStanislav Mekhanoshin } // end llvm namespace
1537f37794eSStanislav Mekhanoshin 
1547f37794eSStanislav Mekhanoshin namespace {
1557f37794eSStanislav Mekhanoshin 
1567f37794eSStanislav Mekhanoshin   class AMDGPUSimplifyLibCalls : public FunctionPass {
1577f37794eSStanislav Mekhanoshin 
158a9191c84SStanislav Mekhanoshin   AMDGPULibCalls Simplifier;
159a9191c84SStanislav Mekhanoshin 
1607f37794eSStanislav Mekhanoshin   public:
1617f37794eSStanislav Mekhanoshin     static char ID; // Pass identification
1627f37794eSStanislav Mekhanoshin 
163348735b7SMatt Arsenault     AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
164348735b7SMatt Arsenault       : FunctionPass(ID), Simplifier(TM) {
1657f37794eSStanislav Mekhanoshin       initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
1667f37794eSStanislav Mekhanoshin     }
1677f37794eSStanislav Mekhanoshin 
1687f37794eSStanislav Mekhanoshin     void getAnalysisUsage(AnalysisUsage &AU) const override {
1697f37794eSStanislav Mekhanoshin       AU.addRequired<AAResultsWrapperPass>();
1707f37794eSStanislav Mekhanoshin     }
1717f37794eSStanislav Mekhanoshin 
1727f37794eSStanislav Mekhanoshin     bool runOnFunction(Function &M) override;
1737f37794eSStanislav Mekhanoshin   };
1747f37794eSStanislav Mekhanoshin 
1757f37794eSStanislav Mekhanoshin   class AMDGPUUseNativeCalls : public FunctionPass {
1767f37794eSStanislav Mekhanoshin 
1777f37794eSStanislav Mekhanoshin   AMDGPULibCalls Simplifier;
1787f37794eSStanislav Mekhanoshin 
1797f37794eSStanislav Mekhanoshin   public:
1807f37794eSStanislav Mekhanoshin     static char ID; // Pass identification
1817f37794eSStanislav Mekhanoshin 
1827f37794eSStanislav Mekhanoshin     AMDGPUUseNativeCalls() : FunctionPass(ID) {
1837f37794eSStanislav Mekhanoshin       initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
1847f37794eSStanislav Mekhanoshin       Simplifier.initNativeFuncs();
1857f37794eSStanislav Mekhanoshin     }
1867f37794eSStanislav Mekhanoshin 
1877f37794eSStanislav Mekhanoshin     bool runOnFunction(Function &F) override;
1887f37794eSStanislav Mekhanoshin   };
1897f37794eSStanislav Mekhanoshin 
1907f37794eSStanislav Mekhanoshin } // end anonymous namespace.
1917f37794eSStanislav Mekhanoshin 
1927f37794eSStanislav Mekhanoshin char AMDGPUSimplifyLibCalls::ID = 0;
1937f37794eSStanislav Mekhanoshin char AMDGPUUseNativeCalls::ID = 0;
1947f37794eSStanislav Mekhanoshin 
1957f37794eSStanislav Mekhanoshin INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
1967f37794eSStanislav Mekhanoshin                       "Simplify well-known AMD library calls", false, false)
1977f37794eSStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1987f37794eSStanislav Mekhanoshin INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
1997f37794eSStanislav Mekhanoshin                     "Simplify well-known AMD library calls", false, false)
2007f37794eSStanislav Mekhanoshin 
2017f37794eSStanislav Mekhanoshin INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
2027f37794eSStanislav Mekhanoshin                 "Replace builtin math calls with that native versions.",
2037f37794eSStanislav Mekhanoshin                 false, false)
2047f37794eSStanislav Mekhanoshin 
2057f37794eSStanislav Mekhanoshin template <typename IRB>
20613680223SJames Y Knight static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
20751ebcaafSBenjamin Kramer                               const Twine &Name = "") {
2087f37794eSStanislav Mekhanoshin   CallInst *R = B.CreateCall(Callee, Arg, Name);
20913680223SJames Y Knight   if (Function *F = dyn_cast<Function>(Callee.getCallee()))
2107f37794eSStanislav Mekhanoshin     R->setCallingConv(F->getCallingConv());
2117f37794eSStanislav Mekhanoshin   return R;
2127f37794eSStanislav Mekhanoshin }
2137f37794eSStanislav Mekhanoshin 
2147f37794eSStanislav Mekhanoshin template <typename IRB>
21513680223SJames Y Knight static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
21613680223SJames Y Knight                                Value *Arg2, const Twine &Name = "") {
2177f37794eSStanislav Mekhanoshin   CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
21813680223SJames Y Knight   if (Function *F = dyn_cast<Function>(Callee.getCallee()))
2197f37794eSStanislav Mekhanoshin     R->setCallingConv(F->getCallingConv());
2207f37794eSStanislav Mekhanoshin   return R;
2217f37794eSStanislav Mekhanoshin }
2227f37794eSStanislav Mekhanoshin 
2237f37794eSStanislav Mekhanoshin //  Data structures for table-driven optimizations.
2247f37794eSStanislav Mekhanoshin //  FuncTbl works for both f32 and f64 functions with 1 input argument
2257f37794eSStanislav Mekhanoshin 
2267f37794eSStanislav Mekhanoshin struct TableEntry {
2277f37794eSStanislav Mekhanoshin   double   result;
2287f37794eSStanislav Mekhanoshin   double   input;
2297f37794eSStanislav Mekhanoshin };
2307f37794eSStanislav Mekhanoshin 
2317f37794eSStanislav Mekhanoshin /* a list of {result, input} */
2327f37794eSStanislav Mekhanoshin static const TableEntry tbl_acos[] = {
2337f37794eSStanislav Mekhanoshin   {MATH_PI / 2.0, 0.0},
2347f37794eSStanislav Mekhanoshin   {MATH_PI / 2.0, -0.0},
2357f37794eSStanislav Mekhanoshin   {0.0, 1.0},
2367f37794eSStanislav Mekhanoshin   {MATH_PI, -1.0}
2377f37794eSStanislav Mekhanoshin };
2387f37794eSStanislav Mekhanoshin static const TableEntry tbl_acosh[] = {
2397f37794eSStanislav Mekhanoshin   {0.0, 1.0}
2407f37794eSStanislav Mekhanoshin };
2417f37794eSStanislav Mekhanoshin static const TableEntry tbl_acospi[] = {
2427f37794eSStanislav Mekhanoshin   {0.5, 0.0},
2437f37794eSStanislav Mekhanoshin   {0.5, -0.0},
2447f37794eSStanislav Mekhanoshin   {0.0, 1.0},
2457f37794eSStanislav Mekhanoshin   {1.0, -1.0}
2467f37794eSStanislav Mekhanoshin };
2477f37794eSStanislav Mekhanoshin static const TableEntry tbl_asin[] = {
2487f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2497f37794eSStanislav Mekhanoshin   {-0.0, -0.0},
2507f37794eSStanislav Mekhanoshin   {MATH_PI / 2.0, 1.0},
2517f37794eSStanislav Mekhanoshin   {-MATH_PI / 2.0, -1.0}
2527f37794eSStanislav Mekhanoshin };
2537f37794eSStanislav Mekhanoshin static const TableEntry tbl_asinh[] = {
2547f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2557f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
2567f37794eSStanislav Mekhanoshin };
2577f37794eSStanislav Mekhanoshin static const TableEntry tbl_asinpi[] = {
2587f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2597f37794eSStanislav Mekhanoshin   {-0.0, -0.0},
2607f37794eSStanislav Mekhanoshin   {0.5, 1.0},
2617f37794eSStanislav Mekhanoshin   {-0.5, -1.0}
2627f37794eSStanislav Mekhanoshin };
2637f37794eSStanislav Mekhanoshin static const TableEntry tbl_atan[] = {
2647f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2657f37794eSStanislav Mekhanoshin   {-0.0, -0.0},
2667f37794eSStanislav Mekhanoshin   {MATH_PI / 4.0, 1.0},
2677f37794eSStanislav Mekhanoshin   {-MATH_PI / 4.0, -1.0}
2687f37794eSStanislav Mekhanoshin };
2697f37794eSStanislav Mekhanoshin static const TableEntry tbl_atanh[] = {
2707f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2717f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
2727f37794eSStanislav Mekhanoshin };
2737f37794eSStanislav Mekhanoshin static const TableEntry tbl_atanpi[] = {
2747f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2757f37794eSStanislav Mekhanoshin   {-0.0, -0.0},
2767f37794eSStanislav Mekhanoshin   {0.25, 1.0},
2777f37794eSStanislav Mekhanoshin   {-0.25, -1.0}
2787f37794eSStanislav Mekhanoshin };
2797f37794eSStanislav Mekhanoshin static const TableEntry tbl_cbrt[] = {
2807f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2817f37794eSStanislav Mekhanoshin   {-0.0, -0.0},
2827f37794eSStanislav Mekhanoshin   {1.0, 1.0},
2837f37794eSStanislav Mekhanoshin   {-1.0, -1.0},
2847f37794eSStanislav Mekhanoshin };
2857f37794eSStanislav Mekhanoshin static const TableEntry tbl_cos[] = {
2867f37794eSStanislav Mekhanoshin   {1.0, 0.0},
2877f37794eSStanislav Mekhanoshin   {1.0, -0.0}
2887f37794eSStanislav Mekhanoshin };
2897f37794eSStanislav Mekhanoshin static const TableEntry tbl_cosh[] = {
2907f37794eSStanislav Mekhanoshin   {1.0, 0.0},
2917f37794eSStanislav Mekhanoshin   {1.0, -0.0}
2927f37794eSStanislav Mekhanoshin };
2937f37794eSStanislav Mekhanoshin static const TableEntry tbl_cospi[] = {
2947f37794eSStanislav Mekhanoshin   {1.0, 0.0},
2957f37794eSStanislav Mekhanoshin   {1.0, -0.0}
2967f37794eSStanislav Mekhanoshin };
2977f37794eSStanislav Mekhanoshin static const TableEntry tbl_erfc[] = {
2987f37794eSStanislav Mekhanoshin   {1.0, 0.0},
2997f37794eSStanislav Mekhanoshin   {1.0, -0.0}
3007f37794eSStanislav Mekhanoshin };
3017f37794eSStanislav Mekhanoshin static const TableEntry tbl_erf[] = {
3027f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3037f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3047f37794eSStanislav Mekhanoshin };
3057f37794eSStanislav Mekhanoshin static const TableEntry tbl_exp[] = {
3067f37794eSStanislav Mekhanoshin   {1.0, 0.0},
3077f37794eSStanislav Mekhanoshin   {1.0, -0.0},
3087f37794eSStanislav Mekhanoshin   {MATH_E, 1.0}
3097f37794eSStanislav Mekhanoshin };
3107f37794eSStanislav Mekhanoshin static const TableEntry tbl_exp2[] = {
3117f37794eSStanislav Mekhanoshin   {1.0, 0.0},
3127f37794eSStanislav Mekhanoshin   {1.0, -0.0},
3137f37794eSStanislav Mekhanoshin   {2.0, 1.0}
3147f37794eSStanislav Mekhanoshin };
3157f37794eSStanislav Mekhanoshin static const TableEntry tbl_exp10[] = {
3167f37794eSStanislav Mekhanoshin   {1.0, 0.0},
3177f37794eSStanislav Mekhanoshin   {1.0, -0.0},
3187f37794eSStanislav Mekhanoshin   {10.0, 1.0}
3197f37794eSStanislav Mekhanoshin };
3207f37794eSStanislav Mekhanoshin static const TableEntry tbl_expm1[] = {
3217f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3227f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3237f37794eSStanislav Mekhanoshin };
3247f37794eSStanislav Mekhanoshin static const TableEntry tbl_log[] = {
3257f37794eSStanislav Mekhanoshin   {0.0, 1.0},
3267f37794eSStanislav Mekhanoshin   {1.0, MATH_E}
3277f37794eSStanislav Mekhanoshin };
3287f37794eSStanislav Mekhanoshin static const TableEntry tbl_log2[] = {
3297f37794eSStanislav Mekhanoshin   {0.0, 1.0},
3307f37794eSStanislav Mekhanoshin   {1.0, 2.0}
3317f37794eSStanislav Mekhanoshin };
3327f37794eSStanislav Mekhanoshin static const TableEntry tbl_log10[] = {
3337f37794eSStanislav Mekhanoshin   {0.0, 1.0},
3347f37794eSStanislav Mekhanoshin   {1.0, 10.0}
3357f37794eSStanislav Mekhanoshin };
3367f37794eSStanislav Mekhanoshin static const TableEntry tbl_rsqrt[] = {
3377f37794eSStanislav Mekhanoshin   {1.0, 1.0},
338c57a9dc4SEvandro Menezes   {MATH_SQRT1_2, 2.0}
3397f37794eSStanislav Mekhanoshin };
3407f37794eSStanislav Mekhanoshin static const TableEntry tbl_sin[] = {
3417f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3427f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3437f37794eSStanislav Mekhanoshin };
3447f37794eSStanislav Mekhanoshin static const TableEntry tbl_sinh[] = {
3457f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3467f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3477f37794eSStanislav Mekhanoshin };
3487f37794eSStanislav Mekhanoshin static const TableEntry tbl_sinpi[] = {
3497f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3507f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3517f37794eSStanislav Mekhanoshin };
3527f37794eSStanislav Mekhanoshin static const TableEntry tbl_sqrt[] = {
3537f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3547f37794eSStanislav Mekhanoshin   {1.0, 1.0},
3557f37794eSStanislav Mekhanoshin   {MATH_SQRT2, 2.0}
3567f37794eSStanislav Mekhanoshin };
3577f37794eSStanislav Mekhanoshin static const TableEntry tbl_tan[] = {
3587f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3597f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3607f37794eSStanislav Mekhanoshin };
3617f37794eSStanislav Mekhanoshin static const TableEntry tbl_tanh[] = {
3627f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3637f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3647f37794eSStanislav Mekhanoshin };
3657f37794eSStanislav Mekhanoshin static const TableEntry tbl_tanpi[] = {
3667f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3677f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3687f37794eSStanislav Mekhanoshin };
3697f37794eSStanislav Mekhanoshin static const TableEntry tbl_tgamma[] = {
3707f37794eSStanislav Mekhanoshin   {1.0, 1.0},
3717f37794eSStanislav Mekhanoshin   {1.0, 2.0},
3727f37794eSStanislav Mekhanoshin   {2.0, 3.0},
3737f37794eSStanislav Mekhanoshin   {6.0, 4.0}
3747f37794eSStanislav Mekhanoshin };
3757f37794eSStanislav Mekhanoshin 
3767f37794eSStanislav Mekhanoshin static bool HasNative(AMDGPULibFunc::EFuncId id) {
3777f37794eSStanislav Mekhanoshin   switch(id) {
3787f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_DIVIDE:
3797f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COS:
3807f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP:
3817f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP2:
3827f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP10:
3837f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG:
3847f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG2:
3857f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG10:
3867f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POWR:
3877f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_RECIP:
3887f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_RSQRT:
3897f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SIN:
3907f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SINCOS:
3917f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SQRT:
3927f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TAN:
3937f37794eSStanislav Mekhanoshin     return true;
3947f37794eSStanislav Mekhanoshin   default:;
3957f37794eSStanislav Mekhanoshin   }
3967f37794eSStanislav Mekhanoshin   return false;
3977f37794eSStanislav Mekhanoshin }
3987f37794eSStanislav Mekhanoshin 
3997f37794eSStanislav Mekhanoshin struct TableRef {
4007f37794eSStanislav Mekhanoshin   size_t size;
4017f37794eSStanislav Mekhanoshin   const TableEntry *table; // variable size: from 0 to (size - 1)
4027f37794eSStanislav Mekhanoshin 
4037f37794eSStanislav Mekhanoshin   TableRef() : size(0), table(nullptr) {}
4047f37794eSStanislav Mekhanoshin 
4057f37794eSStanislav Mekhanoshin   template <size_t N>
4067f37794eSStanislav Mekhanoshin   TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
4077f37794eSStanislav Mekhanoshin };
4087f37794eSStanislav Mekhanoshin 
4097f37794eSStanislav Mekhanoshin static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
4107f37794eSStanislav Mekhanoshin   switch(id) {
4117f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ACOS:    return TableRef(tbl_acos);
4127f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ACOSH:   return TableRef(tbl_acosh);
4137f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ACOSPI:  return TableRef(tbl_acospi);
4147f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ASIN:    return TableRef(tbl_asin);
4157f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ASINH:   return TableRef(tbl_asinh);
4167f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ASINPI:  return TableRef(tbl_asinpi);
4177f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ATAN:    return TableRef(tbl_atan);
4187f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ATANH:   return TableRef(tbl_atanh);
4197f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ATANPI:  return TableRef(tbl_atanpi);
4207f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_CBRT:    return TableRef(tbl_cbrt);
4217f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NCOS:
4227f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COS:     return TableRef(tbl_cos);
4237f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COSH:    return TableRef(tbl_cosh);
4247f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COSPI:   return TableRef(tbl_cospi);
4257f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ERFC:    return TableRef(tbl_erfc);
4267f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ERF:     return TableRef(tbl_erf);
4277f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP:     return TableRef(tbl_exp);
4287f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NEXP2:
4297f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP2:    return TableRef(tbl_exp2);
4307f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP10:   return TableRef(tbl_exp10);
4317f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXPM1:   return TableRef(tbl_expm1);
4327f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG:     return TableRef(tbl_log);
4337f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NLOG2:
4347f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG2:    return TableRef(tbl_log2);
4357f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG10:   return TableRef(tbl_log10);
4367f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NRSQRT:
4377f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_RSQRT:   return TableRef(tbl_rsqrt);
4387f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NSIN:
4397f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SIN:     return TableRef(tbl_sin);
4407f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SINH:    return TableRef(tbl_sinh);
4417f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SINPI:   return TableRef(tbl_sinpi);
4427f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NSQRT:
4437f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SQRT:    return TableRef(tbl_sqrt);
4447f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TAN:     return TableRef(tbl_tan);
4457f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TANH:    return TableRef(tbl_tanh);
4467f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TANPI:   return TableRef(tbl_tanpi);
4477f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TGAMMA:  return TableRef(tbl_tgamma);
4487f37794eSStanislav Mekhanoshin   default:;
4497f37794eSStanislav Mekhanoshin   }
4507f37794eSStanislav Mekhanoshin   return TableRef();
4517f37794eSStanislav Mekhanoshin }
4527f37794eSStanislav Mekhanoshin 
4537f37794eSStanislav Mekhanoshin static inline int getVecSize(const AMDGPULibFunc& FInfo) {
454fc5121a7SYaxun Liu   return FInfo.getLeads()[0].VectorSize;
4557f37794eSStanislav Mekhanoshin }
4567f37794eSStanislav Mekhanoshin 
4577f37794eSStanislav Mekhanoshin static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
458fc5121a7SYaxun Liu   return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
4597f37794eSStanislav Mekhanoshin }
4607f37794eSStanislav Mekhanoshin 
46113680223SJames Y Knight FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
4627f37794eSStanislav Mekhanoshin   // If we are doing PreLinkOpt, the function is external. So it is safe to
4637f37794eSStanislav Mekhanoshin   // use getOrInsertFunction() at this stage.
4647f37794eSStanislav Mekhanoshin 
4657f37794eSStanislav Mekhanoshin   return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
4667f37794eSStanislav Mekhanoshin                        : AMDGPULibFunc::getFunction(M, fInfo);
4677f37794eSStanislav Mekhanoshin }
4687f37794eSStanislav Mekhanoshin 
4697f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
4707f37794eSStanislav Mekhanoshin                                     FuncInfo *FInfo) {
4717f37794eSStanislav Mekhanoshin   return AMDGPULibFunc::parse(FMangledName, *FInfo);
4727f37794eSStanislav Mekhanoshin }
4737f37794eSStanislav Mekhanoshin 
4747f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
4757f37794eSStanislav Mekhanoshin   if (auto Op = dyn_cast<FPMathOperator>(CI))
476629c4115SSanjay Patel     if (Op->isFast())
4777f37794eSStanislav Mekhanoshin       return true;
4787f37794eSStanislav Mekhanoshin   const Function *F = CI->getParent()->getParent();
4797f37794eSStanislav Mekhanoshin   Attribute Attr = F->getFnAttribute("unsafe-fp-math");
480d6de1e1aSSerge Guelton   return Attr.getValueAsBool();
4817f37794eSStanislav Mekhanoshin }
4827f37794eSStanislav Mekhanoshin 
4837f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
484902cbcd5SKazu Hirata   return AllNative || llvm::is_contained(UseNative, F);
4857f37794eSStanislav Mekhanoshin }
4867f37794eSStanislav Mekhanoshin 
4877f37794eSStanislav Mekhanoshin void AMDGPULibCalls::initNativeFuncs() {
4887f37794eSStanislav Mekhanoshin   AllNative = useNativeFunc("all") ||
4897f37794eSStanislav Mekhanoshin               (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
4907f37794eSStanislav Mekhanoshin                UseNative.begin()->empty());
4917f37794eSStanislav Mekhanoshin }
4927f37794eSStanislav Mekhanoshin 
4937f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
4947f37794eSStanislav Mekhanoshin   bool native_sin = useNativeFunc("sin");
4957f37794eSStanislav Mekhanoshin   bool native_cos = useNativeFunc("cos");
4967f37794eSStanislav Mekhanoshin 
4977f37794eSStanislav Mekhanoshin   if (native_sin && native_cos) {
4987f37794eSStanislav Mekhanoshin     Module *M = aCI->getModule();
4997f37794eSStanislav Mekhanoshin     Value *opr0 = aCI->getArgOperand(0);
5007f37794eSStanislav Mekhanoshin 
5017f37794eSStanislav Mekhanoshin     AMDGPULibFunc nf;
502fc5121a7SYaxun Liu     nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
503fc5121a7SYaxun Liu     nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
5047f37794eSStanislav Mekhanoshin 
5057f37794eSStanislav Mekhanoshin     nf.setPrefix(AMDGPULibFunc::NATIVE);
5067f37794eSStanislav Mekhanoshin     nf.setId(AMDGPULibFunc::EI_SIN);
50713680223SJames Y Knight     FunctionCallee sinExpr = getFunction(M, nf);
5087f37794eSStanislav Mekhanoshin 
5097f37794eSStanislav Mekhanoshin     nf.setPrefix(AMDGPULibFunc::NATIVE);
5107f37794eSStanislav Mekhanoshin     nf.setId(AMDGPULibFunc::EI_COS);
51113680223SJames Y Knight     FunctionCallee cosExpr = getFunction(M, nf);
5127f37794eSStanislav Mekhanoshin     if (sinExpr && cosExpr) {
5137f37794eSStanislav Mekhanoshin       Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
5147f37794eSStanislav Mekhanoshin       Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
5157f37794eSStanislav Mekhanoshin       new StoreInst(cosval, aCI->getArgOperand(1), aCI);
5167f37794eSStanislav Mekhanoshin 
5177f37794eSStanislav Mekhanoshin       DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
5187f37794eSStanislav Mekhanoshin                                           << " with native version of sin/cos");
5197f37794eSStanislav Mekhanoshin 
5207f37794eSStanislav Mekhanoshin       replaceCall(sinval);
5217f37794eSStanislav Mekhanoshin       return true;
5227f37794eSStanislav Mekhanoshin     }
5237f37794eSStanislav Mekhanoshin   }
5247f37794eSStanislav Mekhanoshin   return false;
5257f37794eSStanislav Mekhanoshin }
5267f37794eSStanislav Mekhanoshin 
5277f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::useNative(CallInst *aCI) {
5287f37794eSStanislav Mekhanoshin   CI = aCI;
5297f37794eSStanislav Mekhanoshin   Function *Callee = aCI->getCalledFunction();
5307f37794eSStanislav Mekhanoshin 
5317f37794eSStanislav Mekhanoshin   FuncInfo FInfo;
532fc5121a7SYaxun Liu   if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() ||
5337f37794eSStanislav Mekhanoshin       FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
534fc5121a7SYaxun Liu       getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
5357f37794eSStanislav Mekhanoshin       !(AllNative || useNativeFunc(FInfo.getName()))) {
5367f37794eSStanislav Mekhanoshin     return false;
5377f37794eSStanislav Mekhanoshin   }
5387f37794eSStanislav Mekhanoshin 
5397f37794eSStanislav Mekhanoshin   if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
5407f37794eSStanislav Mekhanoshin     return sincosUseNative(aCI, FInfo);
5417f37794eSStanislav Mekhanoshin 
5427f37794eSStanislav Mekhanoshin   FInfo.setPrefix(AMDGPULibFunc::NATIVE);
54313680223SJames Y Knight   FunctionCallee F = getFunction(aCI->getModule(), FInfo);
5447f37794eSStanislav Mekhanoshin   if (!F)
5457f37794eSStanislav Mekhanoshin     return false;
5467f37794eSStanislav Mekhanoshin 
5477f37794eSStanislav Mekhanoshin   aCI->setCalledFunction(F);
5487f37794eSStanislav Mekhanoshin   DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
5497f37794eSStanislav Mekhanoshin                                       << " with native version");
5507f37794eSStanislav Mekhanoshin   return true;
5517f37794eSStanislav Mekhanoshin }
5527f37794eSStanislav Mekhanoshin 
553fc5121a7SYaxun Liu // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
554fc5121a7SYaxun Liu // builtin, with appended type size and alignment arguments, where 2 or 4
555fc5121a7SYaxun Liu // indicates the original number of arguments. The library has optimized version
556fc5121a7SYaxun Liu // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
557fc5121a7SYaxun Liu // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
558fc5121a7SYaxun Liu // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
559fc5121a7SYaxun Liu // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
560fc5121a7SYaxun Liu bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
561fc5121a7SYaxun Liu                                           FuncInfo &FInfo) {
562fc5121a7SYaxun Liu   auto *Callee = CI->getCalledFunction();
563fc5121a7SYaxun Liu   if (!Callee->isDeclaration())
564fc5121a7SYaxun Liu     return false;
565fc5121a7SYaxun Liu 
566fc5121a7SYaxun Liu   assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
567fc5121a7SYaxun Liu   auto *M = Callee->getParent();
568fc5121a7SYaxun Liu   auto &Ctx = M->getContext();
569adcd0268SBenjamin Kramer   std::string Name = std::string(Callee->getName());
570fc5121a7SYaxun Liu   auto NumArg = CI->getNumArgOperands();
571fc5121a7SYaxun Liu   if (NumArg != 4 && NumArg != 6)
572fc5121a7SYaxun Liu     return false;
573fc5121a7SYaxun Liu   auto *PacketSize = CI->getArgOperand(NumArg - 2);
574fc5121a7SYaxun Liu   auto *PacketAlign = CI->getArgOperand(NumArg - 1);
575fc5121a7SYaxun Liu   if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
576fc5121a7SYaxun Liu     return false;
577fc5121a7SYaxun Liu   unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
57887e2751cSGuillaume Chatelet   Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
57987e2751cSGuillaume Chatelet   if (Alignment != Size)
580fc5121a7SYaxun Liu     return false;
581fc5121a7SYaxun Liu 
582fc5121a7SYaxun Liu   Type *PtrElemTy;
583fc5121a7SYaxun Liu   if (Size <= 8)
584fc5121a7SYaxun Liu     PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
585fc5121a7SYaxun Liu   else
586aad93654SChristopher Tetreault     PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
587fc5121a7SYaxun Liu   unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
588fc5121a7SYaxun Liu   auto PtrArg = CI->getArgOperand(PtrArgLoc);
589fc5121a7SYaxun Liu   unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
590fc5121a7SYaxun Liu   auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
591fc5121a7SYaxun Liu 
592fc5121a7SYaxun Liu   SmallVector<llvm::Type *, 6> ArgTys;
593fc5121a7SYaxun Liu   for (unsigned I = 0; I != PtrArgLoc; ++I)
594fc5121a7SYaxun Liu     ArgTys.push_back(CI->getArgOperand(I)->getType());
595fc5121a7SYaxun Liu   ArgTys.push_back(PtrTy);
596fc5121a7SYaxun Liu 
597fc5121a7SYaxun Liu   Name = Name + "_" + std::to_string(Size);
598fc5121a7SYaxun Liu   auto *FTy = FunctionType::get(Callee->getReturnType(),
599fc5121a7SYaxun Liu                                 ArrayRef<Type *>(ArgTys), false);
600fc5121a7SYaxun Liu   AMDGPULibFunc NewLibFunc(Name, FTy);
60113680223SJames Y Knight   FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
602fc5121a7SYaxun Liu   if (!F)
603fc5121a7SYaxun Liu     return false;
604fc5121a7SYaxun Liu 
605fc5121a7SYaxun Liu   auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
606fc5121a7SYaxun Liu   SmallVector<Value *, 6> Args;
607fc5121a7SYaxun Liu   for (unsigned I = 0; I != PtrArgLoc; ++I)
608fc5121a7SYaxun Liu     Args.push_back(CI->getArgOperand(I));
609fc5121a7SYaxun Liu   Args.push_back(BCast);
610fc5121a7SYaxun Liu 
611fc5121a7SYaxun Liu   auto *NCI = B.CreateCall(F, Args);
612fc5121a7SYaxun Liu   NCI->setAttributes(CI->getAttributes());
613fc5121a7SYaxun Liu   CI->replaceAllUsesWith(NCI);
614fc5121a7SYaxun Liu   CI->dropAllReferences();
615fc5121a7SYaxun Liu   CI->eraseFromParent();
616fc5121a7SYaxun Liu 
617fc5121a7SYaxun Liu   return true;
618fc5121a7SYaxun Liu }
619fc5121a7SYaxun Liu 
6207f37794eSStanislav Mekhanoshin // This function returns false if no change; return true otherwise.
6217f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
6227f37794eSStanislav Mekhanoshin   this->CI = CI;
6237f37794eSStanislav Mekhanoshin   Function *Callee = CI->getCalledFunction();
6247f37794eSStanislav Mekhanoshin 
6257f37794eSStanislav Mekhanoshin   // Ignore indirect calls.
6267f37794eSStanislav Mekhanoshin   if (Callee == 0) return false;
6277f37794eSStanislav Mekhanoshin 
6287f37794eSStanislav Mekhanoshin   BasicBlock *BB = CI->getParent();
6297f37794eSStanislav Mekhanoshin   LLVMContext &Context = CI->getParent()->getContext();
6307f37794eSStanislav Mekhanoshin   IRBuilder<> B(Context);
6317f37794eSStanislav Mekhanoshin 
6327f37794eSStanislav Mekhanoshin   // Set the builder to the instruction after the call.
6337f37794eSStanislav Mekhanoshin   B.SetInsertPoint(BB, CI->getIterator());
6347f37794eSStanislav Mekhanoshin 
6357f37794eSStanislav Mekhanoshin   // Copy fast flags from the original call.
6367f37794eSStanislav Mekhanoshin   if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
6377f37794eSStanislav Mekhanoshin     B.setFastMathFlags(FPOp->getFastMathFlags());
6387f37794eSStanislav Mekhanoshin 
639a9191c84SStanislav Mekhanoshin   switch (Callee->getIntrinsicID()) {
640a9191c84SStanislav Mekhanoshin   default:
641a9191c84SStanislav Mekhanoshin     break;
642a9191c84SStanislav Mekhanoshin   case Intrinsic::amdgcn_wavefrontsize:
643a9191c84SStanislav Mekhanoshin     return !EnablePreLink && fold_wavefrontsize(CI, B);
644a9191c84SStanislav Mekhanoshin   }
645a9191c84SStanislav Mekhanoshin 
646a9191c84SStanislav Mekhanoshin   FuncInfo FInfo;
647a9191c84SStanislav Mekhanoshin   if (!parseFunctionName(Callee->getName(), &FInfo))
648a9191c84SStanislav Mekhanoshin     return false;
649a9191c84SStanislav Mekhanoshin 
650a9191c84SStanislav Mekhanoshin   // Further check the number of arguments to see if they match.
651a9191c84SStanislav Mekhanoshin   if (CI->getNumArgOperands() != FInfo.getNumArgs())
652a9191c84SStanislav Mekhanoshin     return false;
653a9191c84SStanislav Mekhanoshin 
6547f37794eSStanislav Mekhanoshin   if (TDOFold(CI, FInfo))
6557f37794eSStanislav Mekhanoshin     return true;
6567f37794eSStanislav Mekhanoshin 
6577f37794eSStanislav Mekhanoshin   // Under unsafe-math, evaluate calls if possible.
6587f37794eSStanislav Mekhanoshin   // According to Brian Sumner, we can do this for all f32 function calls
6597f37794eSStanislav Mekhanoshin   // using host's double function calls.
6607f37794eSStanislav Mekhanoshin   if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
6617f37794eSStanislav Mekhanoshin     return true;
6627f37794eSStanislav Mekhanoshin 
663*dc6e8dfdSJacob Lambert   // Specialized optimizations for each function call
6647f37794eSStanislav Mekhanoshin   switch (FInfo.getId()) {
6657f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_RECIP:
6667f37794eSStanislav Mekhanoshin     // skip vector function
6677f37794eSStanislav Mekhanoshin     assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
6687f37794eSStanislav Mekhanoshin              FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
6697f37794eSStanislav Mekhanoshin             "recip must be an either native or half function");
6707f37794eSStanislav Mekhanoshin     return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
6717f37794eSStanislav Mekhanoshin 
6727f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_DIVIDE:
6737f37794eSStanislav Mekhanoshin     // skip vector function
6747f37794eSStanislav Mekhanoshin     assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
6757f37794eSStanislav Mekhanoshin              FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
6767f37794eSStanislav Mekhanoshin             "divide must be an either native or half function");
6777f37794eSStanislav Mekhanoshin     return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
6787f37794eSStanislav Mekhanoshin 
6797f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POW:
6807f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POWR:
6817f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POWN:
6827f37794eSStanislav Mekhanoshin     return fold_pow(CI, B, FInfo);
6837f37794eSStanislav Mekhanoshin 
6847f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ROOTN:
6857f37794eSStanislav Mekhanoshin     // skip vector function
6867f37794eSStanislav Mekhanoshin     return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
6877f37794eSStanislav Mekhanoshin 
6887f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_FMA:
6897f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_MAD:
6907f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NFMA:
6917f37794eSStanislav Mekhanoshin     // skip vector function
6927f37794eSStanislav Mekhanoshin     return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
6937f37794eSStanislav Mekhanoshin 
6947f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SQRT:
6957f37794eSStanislav Mekhanoshin     return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
6967f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COS:
6977f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SIN:
6987f37794eSStanislav Mekhanoshin     if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
6997f37794eSStanislav Mekhanoshin          getArgType(FInfo) == AMDGPULibFunc::F64)
7007f37794eSStanislav Mekhanoshin         && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
7017f37794eSStanislav Mekhanoshin       return fold_sincos(CI, B, AA);
7027f37794eSStanislav Mekhanoshin 
7037f37794eSStanislav Mekhanoshin     break;
704fc5121a7SYaxun Liu   case AMDGPULibFunc::EI_READ_PIPE_2:
705fc5121a7SYaxun Liu   case AMDGPULibFunc::EI_READ_PIPE_4:
706fc5121a7SYaxun Liu   case AMDGPULibFunc::EI_WRITE_PIPE_2:
707fc5121a7SYaxun Liu   case AMDGPULibFunc::EI_WRITE_PIPE_4:
708fc5121a7SYaxun Liu     return fold_read_write_pipe(CI, B, FInfo);
7097f37794eSStanislav Mekhanoshin 
7107f37794eSStanislav Mekhanoshin   default:
7117f37794eSStanislav Mekhanoshin     break;
7127f37794eSStanislav Mekhanoshin   }
7137f37794eSStanislav Mekhanoshin 
7147f37794eSStanislav Mekhanoshin   return false;
7157f37794eSStanislav Mekhanoshin }
7167f37794eSStanislav Mekhanoshin 
7177f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
7187f37794eSStanislav Mekhanoshin   // Table-Driven optimization
7197f37794eSStanislav Mekhanoshin   const TableRef tr = getOptTable(FInfo.getId());
7207f37794eSStanislav Mekhanoshin   if (tr.size==0)
7217f37794eSStanislav Mekhanoshin     return false;
7227f37794eSStanislav Mekhanoshin 
7237f37794eSStanislav Mekhanoshin   int const sz = (int)tr.size;
7247f37794eSStanislav Mekhanoshin   const TableEntry * const ftbl = tr.table;
7257f37794eSStanislav Mekhanoshin   Value *opr0 = CI->getArgOperand(0);
7267f37794eSStanislav Mekhanoshin 
7277f37794eSStanislav Mekhanoshin   if (getVecSize(FInfo) > 1) {
7287f37794eSStanislav Mekhanoshin     if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
7297f37794eSStanislav Mekhanoshin       SmallVector<double, 0> DVal;
7307f37794eSStanislav Mekhanoshin       for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
7317f37794eSStanislav Mekhanoshin         ConstantFP *eltval = dyn_cast<ConstantFP>(
7327f37794eSStanislav Mekhanoshin                                CV->getElementAsConstant((unsigned)eltNo));
7337f37794eSStanislav Mekhanoshin         assert(eltval && "Non-FP arguments in math function!");
7347f37794eSStanislav Mekhanoshin         bool found = false;
7357f37794eSStanislav Mekhanoshin         for (int i=0; i < sz; ++i) {
7367f37794eSStanislav Mekhanoshin           if (eltval->isExactlyValue(ftbl[i].input)) {
7377f37794eSStanislav Mekhanoshin             DVal.push_back(ftbl[i].result);
7387f37794eSStanislav Mekhanoshin             found = true;
7397f37794eSStanislav Mekhanoshin             break;
7407f37794eSStanislav Mekhanoshin           }
7417f37794eSStanislav Mekhanoshin         }
7427f37794eSStanislav Mekhanoshin         if (!found) {
7437f37794eSStanislav Mekhanoshin           // This vector constants not handled yet.
7447f37794eSStanislav Mekhanoshin           return false;
7457f37794eSStanislav Mekhanoshin         }
7467f37794eSStanislav Mekhanoshin       }
7477f37794eSStanislav Mekhanoshin       LLVMContext &context = CI->getParent()->getParent()->getContext();
7487f37794eSStanislav Mekhanoshin       Constant *nval;
7497f37794eSStanislav Mekhanoshin       if (getArgType(FInfo) == AMDGPULibFunc::F32) {
7507f37794eSStanislav Mekhanoshin         SmallVector<float, 0> FVal;
7517f37794eSStanislav Mekhanoshin         for (unsigned i = 0; i < DVal.size(); ++i) {
7527f37794eSStanislav Mekhanoshin           FVal.push_back((float)DVal[i]);
7537f37794eSStanislav Mekhanoshin         }
7547f37794eSStanislav Mekhanoshin         ArrayRef<float> tmp(FVal);
7557f37794eSStanislav Mekhanoshin         nval = ConstantDataVector::get(context, tmp);
7567f37794eSStanislav Mekhanoshin       } else { // F64
7577f37794eSStanislav Mekhanoshin         ArrayRef<double> tmp(DVal);
7587f37794eSStanislav Mekhanoshin         nval = ConstantDataVector::get(context, tmp);
7597f37794eSStanislav Mekhanoshin       }
760d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
7617f37794eSStanislav Mekhanoshin       replaceCall(nval);
7627f37794eSStanislav Mekhanoshin       return true;
7637f37794eSStanislav Mekhanoshin     }
7647f37794eSStanislav Mekhanoshin   } else {
7657f37794eSStanislav Mekhanoshin     // Scalar version
7667f37794eSStanislav Mekhanoshin     if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
7677f37794eSStanislav Mekhanoshin       for (int i = 0; i < sz; ++i) {
7687f37794eSStanislav Mekhanoshin         if (CF->isExactlyValue(ftbl[i].input)) {
7697f37794eSStanislav Mekhanoshin           Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
770d34e60caSNicola Zaghen           LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
7717f37794eSStanislav Mekhanoshin           replaceCall(nval);
7727f37794eSStanislav Mekhanoshin           return true;
7737f37794eSStanislav Mekhanoshin         }
7747f37794eSStanislav Mekhanoshin       }
7757f37794eSStanislav Mekhanoshin     }
7767f37794eSStanislav Mekhanoshin   }
7777f37794eSStanislav Mekhanoshin 
7787f37794eSStanislav Mekhanoshin   return false;
7797f37794eSStanislav Mekhanoshin }
7807f37794eSStanislav Mekhanoshin 
7817f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
7827f37794eSStanislav Mekhanoshin   Module *M = CI->getModule();
7837f37794eSStanislav Mekhanoshin   if (getArgType(FInfo) != AMDGPULibFunc::F32 ||
7847f37794eSStanislav Mekhanoshin       FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
7857f37794eSStanislav Mekhanoshin       !HasNative(FInfo.getId()))
7867f37794eSStanislav Mekhanoshin     return false;
7877f37794eSStanislav Mekhanoshin 
7887f37794eSStanislav Mekhanoshin   AMDGPULibFunc nf = FInfo;
7897f37794eSStanislav Mekhanoshin   nf.setPrefix(AMDGPULibFunc::NATIVE);
79013680223SJames Y Knight   if (FunctionCallee FPExpr = getFunction(M, nf)) {
791d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
7927f37794eSStanislav Mekhanoshin 
7937f37794eSStanislav Mekhanoshin     CI->setCalledFunction(FPExpr);
7947f37794eSStanislav Mekhanoshin 
795d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << *CI << '\n');
7967f37794eSStanislav Mekhanoshin 
7977f37794eSStanislav Mekhanoshin     return true;
7987f37794eSStanislav Mekhanoshin   }
7997f37794eSStanislav Mekhanoshin   return false;
8007f37794eSStanislav Mekhanoshin }
8017f37794eSStanislav Mekhanoshin 
8027f37794eSStanislav Mekhanoshin //  [native_]half_recip(c) ==> 1.0/c
8037f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
8047f37794eSStanislav Mekhanoshin                                 const FuncInfo &FInfo) {
8057f37794eSStanislav Mekhanoshin   Value *opr0 = CI->getArgOperand(0);
8067f37794eSStanislav Mekhanoshin   if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
8077f37794eSStanislav Mekhanoshin     // Just create a normal div. Later, InstCombine will be able
8087f37794eSStanislav Mekhanoshin     // to compute the divide into a constant (avoid check float infinity
8097f37794eSStanislav Mekhanoshin     // or subnormal at this point).
8107f37794eSStanislav Mekhanoshin     Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
8117f37794eSStanislav Mekhanoshin                                opr0,
8127f37794eSStanislav Mekhanoshin                                "recip2div");
813d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
8147f37794eSStanislav Mekhanoshin     replaceCall(nval);
8157f37794eSStanislav Mekhanoshin     return true;
8167f37794eSStanislav Mekhanoshin   }
8177f37794eSStanislav Mekhanoshin   return false;
8187f37794eSStanislav Mekhanoshin }
8197f37794eSStanislav Mekhanoshin 
8207f37794eSStanislav Mekhanoshin //  [native_]half_divide(x, c) ==> x/c
8217f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
8227f37794eSStanislav Mekhanoshin                                  const FuncInfo &FInfo) {
8237f37794eSStanislav Mekhanoshin   Value *opr0 = CI->getArgOperand(0);
8247f37794eSStanislav Mekhanoshin   Value *opr1 = CI->getArgOperand(1);
8257f37794eSStanislav Mekhanoshin   ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
8267f37794eSStanislav Mekhanoshin   ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
8277f37794eSStanislav Mekhanoshin 
8287f37794eSStanislav Mekhanoshin   if ((CF0 && CF1) ||  // both are constants
8297f37794eSStanislav Mekhanoshin       (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
8307f37794eSStanislav Mekhanoshin       // CF1 is constant && f32 divide
8317f37794eSStanislav Mekhanoshin   {
8327f37794eSStanislav Mekhanoshin     Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
8337f37794eSStanislav Mekhanoshin                                 opr1, "__div2recip");
8347f37794eSStanislav Mekhanoshin     Value *nval  = B.CreateFMul(opr0, nval1, "__div2mul");
8357f37794eSStanislav Mekhanoshin     replaceCall(nval);
8367f37794eSStanislav Mekhanoshin     return true;
8377f37794eSStanislav Mekhanoshin   }
8387f37794eSStanislav Mekhanoshin   return false;
8397f37794eSStanislav Mekhanoshin }
8407f37794eSStanislav Mekhanoshin 
8417f37794eSStanislav Mekhanoshin namespace llvm {
8427f37794eSStanislav Mekhanoshin static double log2(double V) {
843ae79a2c3SDavid Tenty #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
8447f37794eSStanislav Mekhanoshin   return ::log2(V);
8457f37794eSStanislav Mekhanoshin #else
846c57a9dc4SEvandro Menezes   return log(V) / numbers::ln2;
8477f37794eSStanislav Mekhanoshin #endif
8487f37794eSStanislav Mekhanoshin }
8497f37794eSStanislav Mekhanoshin }
8507f37794eSStanislav Mekhanoshin 
8517f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
8527f37794eSStanislav Mekhanoshin                               const FuncInfo &FInfo) {
8537f37794eSStanislav Mekhanoshin   assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
8547f37794eSStanislav Mekhanoshin           FInfo.getId() == AMDGPULibFunc::EI_POWR ||
8557f37794eSStanislav Mekhanoshin           FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
8567f37794eSStanislav Mekhanoshin          "fold_pow: encounter a wrong function call");
8577f37794eSStanislav Mekhanoshin 
8587f37794eSStanislav Mekhanoshin   Value *opr0, *opr1;
8597f37794eSStanislav Mekhanoshin   ConstantFP *CF;
8607f37794eSStanislav Mekhanoshin   ConstantInt *CINT;
8617f37794eSStanislav Mekhanoshin   ConstantAggregateZero *CZero;
8627f37794eSStanislav Mekhanoshin   Type *eltType;
8637f37794eSStanislav Mekhanoshin 
8647f37794eSStanislav Mekhanoshin   opr0 = CI->getArgOperand(0);
8657f37794eSStanislav Mekhanoshin   opr1 = CI->getArgOperand(1);
8667f37794eSStanislav Mekhanoshin   CZero = dyn_cast<ConstantAggregateZero>(opr1);
8677f37794eSStanislav Mekhanoshin   if (getVecSize(FInfo) == 1) {
8687f37794eSStanislav Mekhanoshin     eltType = opr0->getType();
8697f37794eSStanislav Mekhanoshin     CF = dyn_cast<ConstantFP>(opr1);
8707f37794eSStanislav Mekhanoshin     CINT = dyn_cast<ConstantInt>(opr1);
8717f37794eSStanislav Mekhanoshin   } else {
8727f37794eSStanislav Mekhanoshin     VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
8737f37794eSStanislav Mekhanoshin     assert(VTy && "Oprand of vector function should be of vectortype");
8747f37794eSStanislav Mekhanoshin     eltType = VTy->getElementType();
8757f37794eSStanislav Mekhanoshin     ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
8767f37794eSStanislav Mekhanoshin 
8777f37794eSStanislav Mekhanoshin     // Now, only Handle vector const whose elements have the same value.
8787f37794eSStanislav Mekhanoshin     CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
8797f37794eSStanislav Mekhanoshin     CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
8807f37794eSStanislav Mekhanoshin   }
8817f37794eSStanislav Mekhanoshin 
8827f37794eSStanislav Mekhanoshin   // No unsafe math , no constant argument, do nothing
8837f37794eSStanislav Mekhanoshin   if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
8847f37794eSStanislav Mekhanoshin     return false;
8857f37794eSStanislav Mekhanoshin 
8867f37794eSStanislav Mekhanoshin   // 0x1111111 means that we don't do anything for this call.
8877f37794eSStanislav Mekhanoshin   int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
8887f37794eSStanislav Mekhanoshin 
8897f37794eSStanislav Mekhanoshin   if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
8907f37794eSStanislav Mekhanoshin     //  pow/powr/pown(x, 0) == 1
891d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
8927f37794eSStanislav Mekhanoshin     Constant *cnval = ConstantFP::get(eltType, 1.0);
8937f37794eSStanislav Mekhanoshin     if (getVecSize(FInfo) > 1) {
8947f37794eSStanislav Mekhanoshin       cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
8957f37794eSStanislav Mekhanoshin     }
8967f37794eSStanislav Mekhanoshin     replaceCall(cnval);
8977f37794eSStanislav Mekhanoshin     return true;
8987f37794eSStanislav Mekhanoshin   }
8997f37794eSStanislav Mekhanoshin   if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
9007f37794eSStanislav Mekhanoshin     // pow/powr/pown(x, 1.0) = x
901d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
9027f37794eSStanislav Mekhanoshin     replaceCall(opr0);
9037f37794eSStanislav Mekhanoshin     return true;
9047f37794eSStanislav Mekhanoshin   }
9057f37794eSStanislav Mekhanoshin   if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
9067f37794eSStanislav Mekhanoshin     // pow/powr/pown(x, 2.0) = x*x
907d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
908d34e60caSNicola Zaghen                       << "\n");
9097f37794eSStanislav Mekhanoshin     Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
9107f37794eSStanislav Mekhanoshin     replaceCall(nval);
9117f37794eSStanislav Mekhanoshin     return true;
9127f37794eSStanislav Mekhanoshin   }
9137f37794eSStanislav Mekhanoshin   if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
9147f37794eSStanislav Mekhanoshin     // pow/powr/pown(x, -1.0) = 1.0/x
915d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
9167f37794eSStanislav Mekhanoshin     Constant *cnval = ConstantFP::get(eltType, 1.0);
9177f37794eSStanislav Mekhanoshin     if (getVecSize(FInfo) > 1) {
9187f37794eSStanislav Mekhanoshin       cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9197f37794eSStanislav Mekhanoshin     }
9207f37794eSStanislav Mekhanoshin     Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
9217f37794eSStanislav Mekhanoshin     replaceCall(nval);
9227f37794eSStanislav Mekhanoshin     return true;
9237f37794eSStanislav Mekhanoshin   }
9247f37794eSStanislav Mekhanoshin 
9257f37794eSStanislav Mekhanoshin   Module *M = CI->getModule();
9267f37794eSStanislav Mekhanoshin   if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
9277f37794eSStanislav Mekhanoshin     // pow[r](x, [-]0.5) = sqrt(x)
9287f37794eSStanislav Mekhanoshin     bool issqrt = CF->isExactlyValue(0.5);
92913680223SJames Y Knight     if (FunctionCallee FPExpr =
93013680223SJames Y Knight             getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
93113680223SJames Y Knight                                                 : AMDGPULibFunc::EI_RSQRT,
93213680223SJames Y Knight                                          FInfo))) {
933d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
9347f37794eSStanislav Mekhanoshin                         << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
9357f37794eSStanislav Mekhanoshin       Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
9367f37794eSStanislav Mekhanoshin                                                         : "__pow2rsqrt");
9377f37794eSStanislav Mekhanoshin       replaceCall(nval);
9387f37794eSStanislav Mekhanoshin       return true;
9397f37794eSStanislav Mekhanoshin     }
9407f37794eSStanislav Mekhanoshin   }
9417f37794eSStanislav Mekhanoshin 
9427f37794eSStanislav Mekhanoshin   if (!isUnsafeMath(CI))
9437f37794eSStanislav Mekhanoshin     return false;
9447f37794eSStanislav Mekhanoshin 
9457f37794eSStanislav Mekhanoshin   // Unsafe Math optimization
9467f37794eSStanislav Mekhanoshin 
9477f37794eSStanislav Mekhanoshin   // Remember that ci_opr1 is set if opr1 is integral
9487f37794eSStanislav Mekhanoshin   if (CF) {
9497f37794eSStanislav Mekhanoshin     double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
9507f37794eSStanislav Mekhanoshin                     ? (double)CF->getValueAPF().convertToFloat()
9517f37794eSStanislav Mekhanoshin                     : CF->getValueAPF().convertToDouble();
9527f37794eSStanislav Mekhanoshin     int ival = (int)dval;
9537f37794eSStanislav Mekhanoshin     if ((double)ival == dval) {
9547f37794eSStanislav Mekhanoshin       ci_opr1 = ival;
9557f37794eSStanislav Mekhanoshin     } else
9567f37794eSStanislav Mekhanoshin       ci_opr1 = 0x11111111;
9577f37794eSStanislav Mekhanoshin   }
9587f37794eSStanislav Mekhanoshin 
9597f37794eSStanislav Mekhanoshin   // pow/powr/pown(x, c) = [1/](x*x*..x); where
9607f37794eSStanislav Mekhanoshin   //   trunc(c) == c && the number of x == c && |c| <= 12
9617f37794eSStanislav Mekhanoshin   unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
9627f37794eSStanislav Mekhanoshin   if (abs_opr1 <= 12) {
9637f37794eSStanislav Mekhanoshin     Constant *cnval;
9647f37794eSStanislav Mekhanoshin     Value *nval;
9657f37794eSStanislav Mekhanoshin     if (abs_opr1 == 0) {
9667f37794eSStanislav Mekhanoshin       cnval = ConstantFP::get(eltType, 1.0);
9677f37794eSStanislav Mekhanoshin       if (getVecSize(FInfo) > 1) {
9687f37794eSStanislav Mekhanoshin         cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9697f37794eSStanislav Mekhanoshin       }
9707f37794eSStanislav Mekhanoshin       nval = cnval;
9717f37794eSStanislav Mekhanoshin     } else {
9727f37794eSStanislav Mekhanoshin       Value *valx2 = nullptr;
9737f37794eSStanislav Mekhanoshin       nval = nullptr;
9747f37794eSStanislav Mekhanoshin       while (abs_opr1 > 0) {
9757f37794eSStanislav Mekhanoshin         valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
9767f37794eSStanislav Mekhanoshin         if (abs_opr1 & 1) {
9777f37794eSStanislav Mekhanoshin           nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
9787f37794eSStanislav Mekhanoshin         }
9797f37794eSStanislav Mekhanoshin         abs_opr1 >>= 1;
9807f37794eSStanislav Mekhanoshin       }
9817f37794eSStanislav Mekhanoshin     }
9827f37794eSStanislav Mekhanoshin 
9837f37794eSStanislav Mekhanoshin     if (ci_opr1 < 0) {
9847f37794eSStanislav Mekhanoshin       cnval = ConstantFP::get(eltType, 1.0);
9857f37794eSStanislav Mekhanoshin       if (getVecSize(FInfo) > 1) {
9867f37794eSStanislav Mekhanoshin         cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9877f37794eSStanislav Mekhanoshin       }
9887f37794eSStanislav Mekhanoshin       nval = B.CreateFDiv(cnval, nval, "__1powprod");
9897f37794eSStanislav Mekhanoshin     }
990d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
991d34e60caSNicola Zaghen                       << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
992d34e60caSNicola Zaghen                       << ")\n");
9937f37794eSStanislav Mekhanoshin     replaceCall(nval);
9947f37794eSStanislav Mekhanoshin     return true;
9957f37794eSStanislav Mekhanoshin   }
9967f37794eSStanislav Mekhanoshin 
9977f37794eSStanislav Mekhanoshin   // powr ---> exp2(y * log2(x))
9987f37794eSStanislav Mekhanoshin   // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
99913680223SJames Y Knight   FunctionCallee ExpExpr =
100013680223SJames Y Knight       getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
10017f37794eSStanislav Mekhanoshin   if (!ExpExpr)
10027f37794eSStanislav Mekhanoshin     return false;
10037f37794eSStanislav Mekhanoshin 
10047f37794eSStanislav Mekhanoshin   bool needlog = false;
10057f37794eSStanislav Mekhanoshin   bool needabs = false;
10067f37794eSStanislav Mekhanoshin   bool needcopysign = false;
10077f37794eSStanislav Mekhanoshin   Constant *cnval = nullptr;
10087f37794eSStanislav Mekhanoshin   if (getVecSize(FInfo) == 1) {
10097f37794eSStanislav Mekhanoshin     CF = dyn_cast<ConstantFP>(opr0);
10107f37794eSStanislav Mekhanoshin 
10117f37794eSStanislav Mekhanoshin     if (CF) {
10127f37794eSStanislav Mekhanoshin       double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
10137f37794eSStanislav Mekhanoshin                    ? (double)CF->getValueAPF().convertToFloat()
10147f37794eSStanislav Mekhanoshin                    : CF->getValueAPF().convertToDouble();
10157f37794eSStanislav Mekhanoshin 
10167f37794eSStanislav Mekhanoshin       V = log2(std::abs(V));
10177f37794eSStanislav Mekhanoshin       cnval = ConstantFP::get(eltType, V);
10187f37794eSStanislav Mekhanoshin       needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
10197f37794eSStanislav Mekhanoshin                      CF->isNegative();
10207f37794eSStanislav Mekhanoshin     } else {
10217f37794eSStanislav Mekhanoshin       needlog = true;
10227f37794eSStanislav Mekhanoshin       needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
10237f37794eSStanislav Mekhanoshin                                (!CF || CF->isNegative());
10247f37794eSStanislav Mekhanoshin     }
10257f37794eSStanislav Mekhanoshin   } else {
10267f37794eSStanislav Mekhanoshin     ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
10277f37794eSStanislav Mekhanoshin 
10287f37794eSStanislav Mekhanoshin     if (!CDV) {
10297f37794eSStanislav Mekhanoshin       needlog = true;
10307f37794eSStanislav Mekhanoshin       needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
10317f37794eSStanislav Mekhanoshin     } else {
10327f37794eSStanislav Mekhanoshin       assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
10337f37794eSStanislav Mekhanoshin               "Wrong vector size detected");
10347f37794eSStanislav Mekhanoshin 
10357f37794eSStanislav Mekhanoshin       SmallVector<double, 0> DVal;
10367f37794eSStanislav Mekhanoshin       for (int i=0; i < getVecSize(FInfo); ++i) {
10377f37794eSStanislav Mekhanoshin         double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
10387f37794eSStanislav Mekhanoshin                      ? (double)CDV->getElementAsFloat(i)
10397f37794eSStanislav Mekhanoshin                      : CDV->getElementAsDouble(i);
10407f37794eSStanislav Mekhanoshin         if (V < 0.0) needcopysign = true;
10417f37794eSStanislav Mekhanoshin         V = log2(std::abs(V));
10427f37794eSStanislav Mekhanoshin         DVal.push_back(V);
10437f37794eSStanislav Mekhanoshin       }
10447f37794eSStanislav Mekhanoshin       if (getArgType(FInfo) == AMDGPULibFunc::F32) {
10457f37794eSStanislav Mekhanoshin         SmallVector<float, 0> FVal;
10467f37794eSStanislav Mekhanoshin         for (unsigned i=0; i < DVal.size(); ++i) {
10477f37794eSStanislav Mekhanoshin           FVal.push_back((float)DVal[i]);
10487f37794eSStanislav Mekhanoshin         }
10497f37794eSStanislav Mekhanoshin         ArrayRef<float> tmp(FVal);
10507f37794eSStanislav Mekhanoshin         cnval = ConstantDataVector::get(M->getContext(), tmp);
10517f37794eSStanislav Mekhanoshin       } else {
10527f37794eSStanislav Mekhanoshin         ArrayRef<double> tmp(DVal);
10537f37794eSStanislav Mekhanoshin         cnval = ConstantDataVector::get(M->getContext(), tmp);
10547f37794eSStanislav Mekhanoshin       }
10557f37794eSStanislav Mekhanoshin     }
10567f37794eSStanislav Mekhanoshin   }
10577f37794eSStanislav Mekhanoshin 
10587f37794eSStanislav Mekhanoshin   if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
10597f37794eSStanislav Mekhanoshin     // We cannot handle corner cases for a general pow() function, give up
10607f37794eSStanislav Mekhanoshin     // unless y is a constant integral value. Then proceed as if it were pown.
10617f37794eSStanislav Mekhanoshin     if (getVecSize(FInfo) == 1) {
10627f37794eSStanislav Mekhanoshin       if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
10637f37794eSStanislav Mekhanoshin         double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
10647f37794eSStanislav Mekhanoshin                    ? (double)CF->getValueAPF().convertToFloat()
10657f37794eSStanislav Mekhanoshin                    : CF->getValueAPF().convertToDouble();
10667f37794eSStanislav Mekhanoshin         if (y != (double)(int64_t)y)
10677f37794eSStanislav Mekhanoshin           return false;
10687f37794eSStanislav Mekhanoshin       } else
10697f37794eSStanislav Mekhanoshin         return false;
10707f37794eSStanislav Mekhanoshin     } else {
10717f37794eSStanislav Mekhanoshin       if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
10727f37794eSStanislav Mekhanoshin         for (int i=0; i < getVecSize(FInfo); ++i) {
10737f37794eSStanislav Mekhanoshin           double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
10747f37794eSStanislav Mekhanoshin                      ? (double)CDV->getElementAsFloat(i)
10757f37794eSStanislav Mekhanoshin                      : CDV->getElementAsDouble(i);
10767f37794eSStanislav Mekhanoshin           if (y != (double)(int64_t)y)
10777f37794eSStanislav Mekhanoshin             return false;
10787f37794eSStanislav Mekhanoshin         }
10797f37794eSStanislav Mekhanoshin       } else
10807f37794eSStanislav Mekhanoshin         return false;
10817f37794eSStanislav Mekhanoshin     }
10827f37794eSStanislav Mekhanoshin   }
10837f37794eSStanislav Mekhanoshin 
10847f37794eSStanislav Mekhanoshin   Value *nval;
10857f37794eSStanislav Mekhanoshin   if (needabs) {
108613680223SJames Y Knight     FunctionCallee AbsExpr =
108713680223SJames Y Knight         getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
10887f37794eSStanislav Mekhanoshin     if (!AbsExpr)
10897f37794eSStanislav Mekhanoshin       return false;
10907f37794eSStanislav Mekhanoshin     nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
10917f37794eSStanislav Mekhanoshin   } else {
10927f37794eSStanislav Mekhanoshin     nval = cnval ? cnval : opr0;
10937f37794eSStanislav Mekhanoshin   }
10947f37794eSStanislav Mekhanoshin   if (needlog) {
109513680223SJames Y Knight     FunctionCallee LogExpr =
109613680223SJames Y Knight         getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
10977f37794eSStanislav Mekhanoshin     if (!LogExpr)
10987f37794eSStanislav Mekhanoshin       return false;
10997f37794eSStanislav Mekhanoshin     nval = CreateCallEx(B,LogExpr, nval, "__log2");
11007f37794eSStanislav Mekhanoshin   }
11017f37794eSStanislav Mekhanoshin 
11027f37794eSStanislav Mekhanoshin   if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
11037f37794eSStanislav Mekhanoshin     // convert int(32) to fp(f32 or f64)
11047f37794eSStanislav Mekhanoshin     opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
11057f37794eSStanislav Mekhanoshin   }
11067f37794eSStanislav Mekhanoshin   nval = B.CreateFMul(opr1, nval, "__ylogx");
11077f37794eSStanislav Mekhanoshin   nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
11087f37794eSStanislav Mekhanoshin 
11097f37794eSStanislav Mekhanoshin   if (needcopysign) {
11107f37794eSStanislav Mekhanoshin     Value *opr_n;
11117f37794eSStanislav Mekhanoshin     Type* rTy = opr0->getType();
11127f37794eSStanislav Mekhanoshin     Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
11137f37794eSStanislav Mekhanoshin     Type *nTy = nTyS;
11143254a001SChristopher Tetreault     if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
11153254a001SChristopher Tetreault       nTy = FixedVectorType::get(nTyS, vTy);
11167f37794eSStanislav Mekhanoshin     unsigned size = nTy->getScalarSizeInBits();
11177f37794eSStanislav Mekhanoshin     opr_n = CI->getArgOperand(1);
11187f37794eSStanislav Mekhanoshin     if (opr_n->getType()->isIntegerTy())
11197f37794eSStanislav Mekhanoshin       opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
11207f37794eSStanislav Mekhanoshin     else
11217f37794eSStanislav Mekhanoshin       opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
11227f37794eSStanislav Mekhanoshin 
11237f37794eSStanislav Mekhanoshin     Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
11247f37794eSStanislav Mekhanoshin     sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
11257f37794eSStanislav Mekhanoshin     nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
11267f37794eSStanislav Mekhanoshin     nval = B.CreateBitCast(nval, opr0->getType());
11277f37794eSStanislav Mekhanoshin   }
11287f37794eSStanislav Mekhanoshin 
1129d34e60caSNicola Zaghen   LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
11307f37794eSStanislav Mekhanoshin                     << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
11317f37794eSStanislav Mekhanoshin   replaceCall(nval);
11327f37794eSStanislav Mekhanoshin 
11337f37794eSStanislav Mekhanoshin   return true;
11347f37794eSStanislav Mekhanoshin }
11357f37794eSStanislav Mekhanoshin 
11367f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
11377f37794eSStanislav Mekhanoshin                                 const FuncInfo &FInfo) {
11387f37794eSStanislav Mekhanoshin   Value *opr0 = CI->getArgOperand(0);
11397f37794eSStanislav Mekhanoshin   Value *opr1 = CI->getArgOperand(1);
11407f37794eSStanislav Mekhanoshin 
11417f37794eSStanislav Mekhanoshin   ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
11427f37794eSStanislav Mekhanoshin   if (!CINT) {
11437f37794eSStanislav Mekhanoshin     return false;
11447f37794eSStanislav Mekhanoshin   }
11457f37794eSStanislav Mekhanoshin   int ci_opr1 = (int)CINT->getSExtValue();
11467f37794eSStanislav Mekhanoshin   if (ci_opr1 == 1) {  // rootn(x, 1) = x
1147d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
11487f37794eSStanislav Mekhanoshin     replaceCall(opr0);
11497f37794eSStanislav Mekhanoshin     return true;
11507f37794eSStanislav Mekhanoshin   }
11517f37794eSStanislav Mekhanoshin   if (ci_opr1 == 2) {  // rootn(x, 2) = sqrt(x)
11527f37794eSStanislav Mekhanoshin     Module *M = CI->getModule();
115313680223SJames Y Knight     if (FunctionCallee FPExpr =
115413680223SJames Y Knight             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1155d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
11567f37794eSStanislav Mekhanoshin       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
11577f37794eSStanislav Mekhanoshin       replaceCall(nval);
11587f37794eSStanislav Mekhanoshin       return true;
11597f37794eSStanislav Mekhanoshin     }
11607f37794eSStanislav Mekhanoshin   } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
11617f37794eSStanislav Mekhanoshin     Module *M = CI->getModule();
116213680223SJames Y Knight     if (FunctionCallee FPExpr =
116313680223SJames Y Knight             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1164d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
11657f37794eSStanislav Mekhanoshin       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
11667f37794eSStanislav Mekhanoshin       replaceCall(nval);
11677f37794eSStanislav Mekhanoshin       return true;
11687f37794eSStanislav Mekhanoshin     }
11697f37794eSStanislav Mekhanoshin   } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1170d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
11717f37794eSStanislav Mekhanoshin     Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
11727f37794eSStanislav Mekhanoshin                                opr0,
11737f37794eSStanislav Mekhanoshin                                "__rootn2div");
11747f37794eSStanislav Mekhanoshin     replaceCall(nval);
11757f37794eSStanislav Mekhanoshin     return true;
11767f37794eSStanislav Mekhanoshin   } else if (ci_opr1 == -2) {  // rootn(x, -2) = rsqrt(x)
11777f37794eSStanislav Mekhanoshin     Module *M = CI->getModule();
117813680223SJames Y Knight     if (FunctionCallee FPExpr =
117913680223SJames Y Knight             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
1180d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
1181d34e60caSNicola Zaghen                         << ")\n");
11827f37794eSStanislav Mekhanoshin       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
11837f37794eSStanislav Mekhanoshin       replaceCall(nval);
11847f37794eSStanislav Mekhanoshin       return true;
11857f37794eSStanislav Mekhanoshin     }
11867f37794eSStanislav Mekhanoshin   }
11877f37794eSStanislav Mekhanoshin   return false;
11887f37794eSStanislav Mekhanoshin }
11897f37794eSStanislav Mekhanoshin 
11907f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
11917f37794eSStanislav Mekhanoshin                                   const FuncInfo &FInfo) {
11927f37794eSStanislav Mekhanoshin   Value *opr0 = CI->getArgOperand(0);
11937f37794eSStanislav Mekhanoshin   Value *opr1 = CI->getArgOperand(1);
11947f37794eSStanislav Mekhanoshin   Value *opr2 = CI->getArgOperand(2);
11957f37794eSStanislav Mekhanoshin 
11967f37794eSStanislav Mekhanoshin   ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
11977f37794eSStanislav Mekhanoshin   ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
11987f37794eSStanislav Mekhanoshin   if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
11997f37794eSStanislav Mekhanoshin     // fma/mad(a, b, c) = c if a=0 || b=0
1200d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
12017f37794eSStanislav Mekhanoshin     replaceCall(opr2);
12027f37794eSStanislav Mekhanoshin     return true;
12037f37794eSStanislav Mekhanoshin   }
12047f37794eSStanislav Mekhanoshin   if (CF0 && CF0->isExactlyValue(1.0f)) {
12057f37794eSStanislav Mekhanoshin     // fma/mad(a, b, c) = b+c if a=1
1206d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
1207d34e60caSNicola Zaghen                       << "\n");
12087f37794eSStanislav Mekhanoshin     Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
12097f37794eSStanislav Mekhanoshin     replaceCall(nval);
12107f37794eSStanislav Mekhanoshin     return true;
12117f37794eSStanislav Mekhanoshin   }
12127f37794eSStanislav Mekhanoshin   if (CF1 && CF1->isExactlyValue(1.0f)) {
12137f37794eSStanislav Mekhanoshin     // fma/mad(a, b, c) = a+c if b=1
1214d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
1215d34e60caSNicola Zaghen                       << "\n");
12167f37794eSStanislav Mekhanoshin     Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
12177f37794eSStanislav Mekhanoshin     replaceCall(nval);
12187f37794eSStanislav Mekhanoshin     return true;
12197f37794eSStanislav Mekhanoshin   }
12207f37794eSStanislav Mekhanoshin   if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
12217f37794eSStanislav Mekhanoshin     if (CF->isZero()) {
12227f37794eSStanislav Mekhanoshin       // fma/mad(a, b, c) = a*b if c=0
1223d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
1224d34e60caSNicola Zaghen                         << *opr1 << "\n");
12257f37794eSStanislav Mekhanoshin       Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
12267f37794eSStanislav Mekhanoshin       replaceCall(nval);
12277f37794eSStanislav Mekhanoshin       return true;
12287f37794eSStanislav Mekhanoshin     }
12297f37794eSStanislav Mekhanoshin   }
12307f37794eSStanislav Mekhanoshin 
12317f37794eSStanislav Mekhanoshin   return false;
12327f37794eSStanislav Mekhanoshin }
12337f37794eSStanislav Mekhanoshin 
1234*dc6e8dfdSJacob Lambert // Get a scalar native builtin single argument FP function
123513680223SJames Y Knight FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
123613680223SJames Y Knight                                                  const FuncInfo &FInfo) {
1237312c557bSStanislav Mekhanoshin   if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1238312c557bSStanislav Mekhanoshin     return nullptr;
12397f37794eSStanislav Mekhanoshin   FuncInfo nf = FInfo;
12407f37794eSStanislav Mekhanoshin   nf.setPrefix(AMDGPULibFunc::NATIVE);
12417f37794eSStanislav Mekhanoshin   return getFunction(M, nf);
12427f37794eSStanislav Mekhanoshin }
12437f37794eSStanislav Mekhanoshin 
12447f37794eSStanislav Mekhanoshin // fold sqrt -> native_sqrt (x)
12457f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
12467f37794eSStanislav Mekhanoshin                                const FuncInfo &FInfo) {
1247312c557bSStanislav Mekhanoshin   if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
12487f37794eSStanislav Mekhanoshin       (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
124913680223SJames Y Knight     if (FunctionCallee FPExpr = getNativeFunction(
12507f37794eSStanislav Mekhanoshin             CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
12517f37794eSStanislav Mekhanoshin       Value *opr0 = CI->getArgOperand(0);
1252d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
12537f37794eSStanislav Mekhanoshin                         << "sqrt(" << *opr0 << ")\n");
12547f37794eSStanislav Mekhanoshin       Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
12557f37794eSStanislav Mekhanoshin       replaceCall(nval);
12567f37794eSStanislav Mekhanoshin       return true;
12577f37794eSStanislav Mekhanoshin     }
12587f37794eSStanislav Mekhanoshin   }
12597f37794eSStanislav Mekhanoshin   return false;
12607f37794eSStanislav Mekhanoshin }
12617f37794eSStanislav Mekhanoshin 
12627f37794eSStanislav Mekhanoshin // fold sin, cos -> sincos.
12637f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
12647f37794eSStanislav Mekhanoshin                                  AliasAnalysis *AA) {
12657f37794eSStanislav Mekhanoshin   AMDGPULibFunc fInfo;
12667f37794eSStanislav Mekhanoshin   if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
12677f37794eSStanislav Mekhanoshin     return false;
12687f37794eSStanislav Mekhanoshin 
12697f37794eSStanislav Mekhanoshin   assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
12707f37794eSStanislav Mekhanoshin          fInfo.getId() == AMDGPULibFunc::EI_COS);
12717f37794eSStanislav Mekhanoshin   bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
12727f37794eSStanislav Mekhanoshin 
12737f37794eSStanislav Mekhanoshin   Value *CArgVal = CI->getArgOperand(0);
12747f37794eSStanislav Mekhanoshin   BasicBlock * const CBB = CI->getParent();
12757f37794eSStanislav Mekhanoshin 
12767f37794eSStanislav Mekhanoshin   int const MaxScan = 30;
1277c9821cecSStanislav Mekhanoshin   bool Changed = false;
12787f37794eSStanislav Mekhanoshin 
12797f37794eSStanislav Mekhanoshin   { // fold in load value.
12807f37794eSStanislav Mekhanoshin     LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
12817f37794eSStanislav Mekhanoshin     if (LI && LI->getParent() == CBB) {
12827f37794eSStanislav Mekhanoshin       BasicBlock::iterator BBI = LI->getIterator();
12837f37794eSStanislav Mekhanoshin       Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
12847f37794eSStanislav Mekhanoshin       if (AvailableVal) {
1285c9821cecSStanislav Mekhanoshin         Changed = true;
12867f37794eSStanislav Mekhanoshin         CArgVal->replaceAllUsesWith(AvailableVal);
12877f37794eSStanislav Mekhanoshin         if (CArgVal->getNumUses() == 0)
12887f37794eSStanislav Mekhanoshin           LI->eraseFromParent();
12897f37794eSStanislav Mekhanoshin         CArgVal = CI->getArgOperand(0);
12907f37794eSStanislav Mekhanoshin       }
12917f37794eSStanislav Mekhanoshin     }
12927f37794eSStanislav Mekhanoshin   }
12937f37794eSStanislav Mekhanoshin 
12947f37794eSStanislav Mekhanoshin   Module *M = CI->getModule();
12957f37794eSStanislav Mekhanoshin   fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
12967f37794eSStanislav Mekhanoshin   std::string const PairName = fInfo.mangle();
12977f37794eSStanislav Mekhanoshin 
12987f37794eSStanislav Mekhanoshin   CallInst *UI = nullptr;
12997f37794eSStanislav Mekhanoshin   for (User* U : CArgVal->users()) {
13007f37794eSStanislav Mekhanoshin     CallInst *XI = dyn_cast_or_null<CallInst>(U);
13017f37794eSStanislav Mekhanoshin     if (!XI || XI == CI || XI->getParent() != CBB)
13027f37794eSStanislav Mekhanoshin       continue;
13037f37794eSStanislav Mekhanoshin 
13047f37794eSStanislav Mekhanoshin     Function *UCallee = XI->getCalledFunction();
13057f37794eSStanislav Mekhanoshin     if (!UCallee || !UCallee->getName().equals(PairName))
13067f37794eSStanislav Mekhanoshin       continue;
13077f37794eSStanislav Mekhanoshin 
13087f37794eSStanislav Mekhanoshin     BasicBlock::iterator BBI = CI->getIterator();
13097f37794eSStanislav Mekhanoshin     if (BBI == CI->getParent()->begin())
13107f37794eSStanislav Mekhanoshin       break;
13117f37794eSStanislav Mekhanoshin     --BBI;
13127f37794eSStanislav Mekhanoshin     for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
13137f37794eSStanislav Mekhanoshin       if (cast<Instruction>(BBI) == XI) {
13147f37794eSStanislav Mekhanoshin         UI = XI;
13157f37794eSStanislav Mekhanoshin         break;
13167f37794eSStanislav Mekhanoshin       }
13177f37794eSStanislav Mekhanoshin     }
13187f37794eSStanislav Mekhanoshin     if (UI) break;
13197f37794eSStanislav Mekhanoshin   }
13207f37794eSStanislav Mekhanoshin 
1321c9821cecSStanislav Mekhanoshin   if (!UI)
1322c9821cecSStanislav Mekhanoshin     return Changed;
13237f37794eSStanislav Mekhanoshin 
13247f37794eSStanislav Mekhanoshin   // Merge the sin and cos.
13257f37794eSStanislav Mekhanoshin 
13267f37794eSStanislav Mekhanoshin   // for OpenCL 2.0 we have only generic implementation of sincos
13277f37794eSStanislav Mekhanoshin   // function.
13287f37794eSStanislav Mekhanoshin   AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
13290da6350dSMatt Arsenault   nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
133013680223SJames Y Knight   FunctionCallee Fsincos = getFunction(M, nf);
1331c9821cecSStanislav Mekhanoshin   if (!Fsincos)
1332c9821cecSStanislav Mekhanoshin     return Changed;
13337f37794eSStanislav Mekhanoshin 
13347f37794eSStanislav Mekhanoshin   BasicBlock::iterator ItOld = B.GetInsertPoint();
13357f37794eSStanislav Mekhanoshin   AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
13367f37794eSStanislav Mekhanoshin   B.SetInsertPoint(UI);
13377f37794eSStanislav Mekhanoshin 
13387f37794eSStanislav Mekhanoshin   Value *P = Alloc;
133913680223SJames Y Knight   Type *PTy = Fsincos.getFunctionType()->getParamType(1);
13407f37794eSStanislav Mekhanoshin   // The allocaInst allocates the memory in private address space. This need
13417f37794eSStanislav Mekhanoshin   // to be bitcasted to point to the address space of cos pointer type.
13427f37794eSStanislav Mekhanoshin   // In OpenCL 2.0 this is generic, while in 1.2 that is private.
13430da6350dSMatt Arsenault   if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
13447f37794eSStanislav Mekhanoshin     P = B.CreateAddrSpaceCast(Alloc, PTy);
13457f37794eSStanislav Mekhanoshin   CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
13467f37794eSStanislav Mekhanoshin 
1347d34e60caSNicola Zaghen   LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
1348d34e60caSNicola Zaghen                     << *Call << "\n");
13497f37794eSStanislav Mekhanoshin 
13507f37794eSStanislav Mekhanoshin   if (!isSin) { // CI->cos, UI->sin
13517f37794eSStanislav Mekhanoshin     B.SetInsertPoint(&*ItOld);
13527f37794eSStanislav Mekhanoshin     UI->replaceAllUsesWith(&*Call);
135314359ef1SJames Y Knight     Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
13547f37794eSStanislav Mekhanoshin     CI->replaceAllUsesWith(Reload);
13557f37794eSStanislav Mekhanoshin     UI->eraseFromParent();
13567f37794eSStanislav Mekhanoshin     CI->eraseFromParent();
13577f37794eSStanislav Mekhanoshin   } else { // CI->sin, UI->cos
135814359ef1SJames Y Knight     Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
13597f37794eSStanislav Mekhanoshin     UI->replaceAllUsesWith(Reload);
13607f37794eSStanislav Mekhanoshin     CI->replaceAllUsesWith(Call);
13617f37794eSStanislav Mekhanoshin     UI->eraseFromParent();
13627f37794eSStanislav Mekhanoshin     CI->eraseFromParent();
13637f37794eSStanislav Mekhanoshin   }
13647f37794eSStanislav Mekhanoshin   return true;
13657f37794eSStanislav Mekhanoshin }
13667f37794eSStanislav Mekhanoshin 
1367a9191c84SStanislav Mekhanoshin bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
1368a9191c84SStanislav Mekhanoshin   if (!TM)
1369a9191c84SStanislav Mekhanoshin     return false;
1370a9191c84SStanislav Mekhanoshin 
1371a9191c84SStanislav Mekhanoshin   StringRef CPU = TM->getTargetCPU();
1372a9191c84SStanislav Mekhanoshin   StringRef Features = TM->getTargetFeatureString();
137342f74e82SMartin Storsjö   if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
1374a9191c84SStanislav Mekhanoshin       (Features.empty() ||
137542f74e82SMartin Storsjö        Features.find_insensitive("wavefrontsize") == StringRef::npos))
1376a9191c84SStanislav Mekhanoshin     return false;
1377a9191c84SStanislav Mekhanoshin 
1378a9191c84SStanislav Mekhanoshin   Function *F = CI->getParent()->getParent();
1379a9191c84SStanislav Mekhanoshin   const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
1380a9191c84SStanislav Mekhanoshin   unsigned N = ST.getWavefrontSize();
1381a9191c84SStanislav Mekhanoshin 
1382a9191c84SStanislav Mekhanoshin   LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
1383a9191c84SStanislav Mekhanoshin                << N << "\n");
1384a9191c84SStanislav Mekhanoshin 
1385a9191c84SStanislav Mekhanoshin   CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
1386a9191c84SStanislav Mekhanoshin   CI->eraseFromParent();
1387a9191c84SStanislav Mekhanoshin   return true;
1388a9191c84SStanislav Mekhanoshin }
1389a9191c84SStanislav Mekhanoshin 
13907f37794eSStanislav Mekhanoshin // Get insertion point at entry.
13917f37794eSStanislav Mekhanoshin BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
13927f37794eSStanislav Mekhanoshin   Function * Func = UI->getParent()->getParent();
13937f37794eSStanislav Mekhanoshin   BasicBlock * BB = &Func->getEntryBlock();
13947f37794eSStanislav Mekhanoshin   assert(BB && "Entry block not found!");
13957f37794eSStanislav Mekhanoshin   BasicBlock::iterator ItNew = BB->begin();
13967f37794eSStanislav Mekhanoshin   return ItNew;
13977f37794eSStanislav Mekhanoshin }
13987f37794eSStanislav Mekhanoshin 
13997f37794eSStanislav Mekhanoshin // Insert a AllocsInst at the beginning of function entry block.
14007f37794eSStanislav Mekhanoshin AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
14017f37794eSStanislav Mekhanoshin                                          const char *prefix) {
14027f37794eSStanislav Mekhanoshin   BasicBlock::iterator ItNew = getEntryIns(UI);
14037f37794eSStanislav Mekhanoshin   Function *UCallee = UI->getCalledFunction();
14047f37794eSStanislav Mekhanoshin   Type *RetType = UCallee->getReturnType();
14057f37794eSStanislav Mekhanoshin   B.SetInsertPoint(&*ItNew);
14067f37794eSStanislav Mekhanoshin   AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
14077f37794eSStanislav Mekhanoshin     std::string(prefix) + UI->getName());
14084f04db4bSEli Friedman   Alloc->setAlignment(
14094f04db4bSEli Friedman       Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
14107f37794eSStanislav Mekhanoshin   return Alloc;
14117f37794eSStanislav Mekhanoshin }
14127f37794eSStanislav Mekhanoshin 
14137f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
14147f37794eSStanislav Mekhanoshin                                             double& Res0, double& Res1,
14157f37794eSStanislav Mekhanoshin                                             Constant *copr0, Constant *copr1,
14167f37794eSStanislav Mekhanoshin                                             Constant *copr2) {
14177f37794eSStanislav Mekhanoshin   // By default, opr0/opr1/opr3 holds values of float/double type.
14187f37794eSStanislav Mekhanoshin   // If they are not float/double, each function has to its
14197f37794eSStanislav Mekhanoshin   // operand separately.
14207f37794eSStanislav Mekhanoshin   double opr0=0.0, opr1=0.0, opr2=0.0;
14217f37794eSStanislav Mekhanoshin   ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
14227f37794eSStanislav Mekhanoshin   ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
14237f37794eSStanislav Mekhanoshin   ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
14247f37794eSStanislav Mekhanoshin   if (fpopr0) {
14257f37794eSStanislav Mekhanoshin     opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14267f37794eSStanislav Mekhanoshin              ? fpopr0->getValueAPF().convertToDouble()
14277f37794eSStanislav Mekhanoshin              : (double)fpopr0->getValueAPF().convertToFloat();
14287f37794eSStanislav Mekhanoshin   }
14297f37794eSStanislav Mekhanoshin 
14307f37794eSStanislav Mekhanoshin   if (fpopr1) {
14317f37794eSStanislav Mekhanoshin     opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14327f37794eSStanislav Mekhanoshin              ? fpopr1->getValueAPF().convertToDouble()
14337f37794eSStanislav Mekhanoshin              : (double)fpopr1->getValueAPF().convertToFloat();
14347f37794eSStanislav Mekhanoshin   }
14357f37794eSStanislav Mekhanoshin 
14367f37794eSStanislav Mekhanoshin   if (fpopr2) {
14377f37794eSStanislav Mekhanoshin     opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14387f37794eSStanislav Mekhanoshin              ? fpopr2->getValueAPF().convertToDouble()
14397f37794eSStanislav Mekhanoshin              : (double)fpopr2->getValueAPF().convertToFloat();
14407f37794eSStanislav Mekhanoshin   }
14417f37794eSStanislav Mekhanoshin 
14427f37794eSStanislav Mekhanoshin   switch (FInfo.getId()) {
14437f37794eSStanislav Mekhanoshin   default : return false;
14447f37794eSStanislav Mekhanoshin 
14457f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ACOS:
14467f37794eSStanislav Mekhanoshin     Res0 = acos(opr0);
14477f37794eSStanislav Mekhanoshin     return true;
14487f37794eSStanislav Mekhanoshin 
14497f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ACOSH:
14507f37794eSStanislav Mekhanoshin     // acosh(x) == log(x + sqrt(x*x - 1))
14517f37794eSStanislav Mekhanoshin     Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
14527f37794eSStanislav Mekhanoshin     return true;
14537f37794eSStanislav Mekhanoshin 
14547f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ACOSPI:
14557f37794eSStanislav Mekhanoshin     Res0 = acos(opr0) / MATH_PI;
14567f37794eSStanislav Mekhanoshin     return true;
14577f37794eSStanislav Mekhanoshin 
14587f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ASIN:
14597f37794eSStanislav Mekhanoshin     Res0 = asin(opr0);
14607f37794eSStanislav Mekhanoshin     return true;
14617f37794eSStanislav Mekhanoshin 
14627f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ASINH:
14637f37794eSStanislav Mekhanoshin     // asinh(x) == log(x + sqrt(x*x + 1))
14647f37794eSStanislav Mekhanoshin     Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
14657f37794eSStanislav Mekhanoshin     return true;
14667f37794eSStanislav Mekhanoshin 
14677f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ASINPI:
14687f37794eSStanislav Mekhanoshin     Res0 = asin(opr0) / MATH_PI;
14697f37794eSStanislav Mekhanoshin     return true;
14707f37794eSStanislav Mekhanoshin 
14717f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ATAN:
14727f37794eSStanislav Mekhanoshin     Res0 = atan(opr0);
14737f37794eSStanislav Mekhanoshin     return true;
14747f37794eSStanislav Mekhanoshin 
14757f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ATANH:
14767f37794eSStanislav Mekhanoshin     // atanh(x) == (log(x+1) - log(x-1))/2;
14777f37794eSStanislav Mekhanoshin     Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
14787f37794eSStanislav Mekhanoshin     return true;
14797f37794eSStanislav Mekhanoshin 
14807f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ATANPI:
14817f37794eSStanislav Mekhanoshin     Res0 = atan(opr0) / MATH_PI;
14827f37794eSStanislav Mekhanoshin     return true;
14837f37794eSStanislav Mekhanoshin 
14847f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_CBRT:
14857f37794eSStanislav Mekhanoshin     Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
14867f37794eSStanislav Mekhanoshin     return true;
14877f37794eSStanislav Mekhanoshin 
14887f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COS:
14897f37794eSStanislav Mekhanoshin     Res0 = cos(opr0);
14907f37794eSStanislav Mekhanoshin     return true;
14917f37794eSStanislav Mekhanoshin 
14927f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COSH:
14937f37794eSStanislav Mekhanoshin     Res0 = cosh(opr0);
14947f37794eSStanislav Mekhanoshin     return true;
14957f37794eSStanislav Mekhanoshin 
14967f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COSPI:
14977f37794eSStanislav Mekhanoshin     Res0 = cos(MATH_PI * opr0);
14987f37794eSStanislav Mekhanoshin     return true;
14997f37794eSStanislav Mekhanoshin 
15007f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP:
15017f37794eSStanislav Mekhanoshin     Res0 = exp(opr0);
15027f37794eSStanislav Mekhanoshin     return true;
15037f37794eSStanislav Mekhanoshin 
15047f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP2:
15057f37794eSStanislav Mekhanoshin     Res0 = pow(2.0, opr0);
15067f37794eSStanislav Mekhanoshin     return true;
15077f37794eSStanislav Mekhanoshin 
15087f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP10:
15097f37794eSStanislav Mekhanoshin     Res0 = pow(10.0, opr0);
15107f37794eSStanislav Mekhanoshin     return true;
15117f37794eSStanislav Mekhanoshin 
15127f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXPM1:
15137f37794eSStanislav Mekhanoshin     Res0 = exp(opr0) - 1.0;
15147f37794eSStanislav Mekhanoshin     return true;
15157f37794eSStanislav Mekhanoshin 
15167f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG:
15177f37794eSStanislav Mekhanoshin     Res0 = log(opr0);
15187f37794eSStanislav Mekhanoshin     return true;
15197f37794eSStanislav Mekhanoshin 
15207f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG2:
15217f37794eSStanislav Mekhanoshin     Res0 = log(opr0) / log(2.0);
15227f37794eSStanislav Mekhanoshin     return true;
15237f37794eSStanislav Mekhanoshin 
15247f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG10:
15257f37794eSStanislav Mekhanoshin     Res0 = log(opr0) / log(10.0);
15267f37794eSStanislav Mekhanoshin     return true;
15277f37794eSStanislav Mekhanoshin 
15287f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_RSQRT:
15297f37794eSStanislav Mekhanoshin     Res0 = 1.0 / sqrt(opr0);
15307f37794eSStanislav Mekhanoshin     return true;
15317f37794eSStanislav Mekhanoshin 
15327f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SIN:
15337f37794eSStanislav Mekhanoshin     Res0 = sin(opr0);
15347f37794eSStanislav Mekhanoshin     return true;
15357f37794eSStanislav Mekhanoshin 
15367f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SINH:
15377f37794eSStanislav Mekhanoshin     Res0 = sinh(opr0);
15387f37794eSStanislav Mekhanoshin     return true;
15397f37794eSStanislav Mekhanoshin 
15407f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SINPI:
15417f37794eSStanislav Mekhanoshin     Res0 = sin(MATH_PI * opr0);
15427f37794eSStanislav Mekhanoshin     return true;
15437f37794eSStanislav Mekhanoshin 
15447f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SQRT:
15457f37794eSStanislav Mekhanoshin     Res0 = sqrt(opr0);
15467f37794eSStanislav Mekhanoshin     return true;
15477f37794eSStanislav Mekhanoshin 
15487f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TAN:
15497f37794eSStanislav Mekhanoshin     Res0 = tan(opr0);
15507f37794eSStanislav Mekhanoshin     return true;
15517f37794eSStanislav Mekhanoshin 
15527f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TANH:
15537f37794eSStanislav Mekhanoshin     Res0 = tanh(opr0);
15547f37794eSStanislav Mekhanoshin     return true;
15557f37794eSStanislav Mekhanoshin 
15567f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TANPI:
15577f37794eSStanislav Mekhanoshin     Res0 = tan(MATH_PI * opr0);
15587f37794eSStanislav Mekhanoshin     return true;
15597f37794eSStanislav Mekhanoshin 
15607f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_RECIP:
15617f37794eSStanislav Mekhanoshin     Res0 = 1.0 / opr0;
15627f37794eSStanislav Mekhanoshin     return true;
15637f37794eSStanislav Mekhanoshin 
15647f37794eSStanislav Mekhanoshin   // two-arg functions
15657f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_DIVIDE:
15667f37794eSStanislav Mekhanoshin     Res0 = opr0 / opr1;
15677f37794eSStanislav Mekhanoshin     return true;
15687f37794eSStanislav Mekhanoshin 
15697f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POW:
15707f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POWR:
15717f37794eSStanislav Mekhanoshin     Res0 = pow(opr0, opr1);
15727f37794eSStanislav Mekhanoshin     return true;
15737f37794eSStanislav Mekhanoshin 
15747f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POWN: {
15757f37794eSStanislav Mekhanoshin     if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
15767f37794eSStanislav Mekhanoshin       double val = (double)iopr1->getSExtValue();
15777f37794eSStanislav Mekhanoshin       Res0 = pow(opr0, val);
15787f37794eSStanislav Mekhanoshin       return true;
15797f37794eSStanislav Mekhanoshin     }
15807f37794eSStanislav Mekhanoshin     return false;
15817f37794eSStanislav Mekhanoshin   }
15827f37794eSStanislav Mekhanoshin 
15837f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ROOTN: {
15847f37794eSStanislav Mekhanoshin     if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
15857f37794eSStanislav Mekhanoshin       double val = (double)iopr1->getSExtValue();
15867f37794eSStanislav Mekhanoshin       Res0 = pow(opr0, 1.0 / val);
15877f37794eSStanislav Mekhanoshin       return true;
15887f37794eSStanislav Mekhanoshin     }
15897f37794eSStanislav Mekhanoshin     return false;
15907f37794eSStanislav Mekhanoshin   }
15917f37794eSStanislav Mekhanoshin 
15927f37794eSStanislav Mekhanoshin   // with ptr arg
15937f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SINCOS:
15947f37794eSStanislav Mekhanoshin     Res0 = sin(opr0);
15957f37794eSStanislav Mekhanoshin     Res1 = cos(opr0);
15967f37794eSStanislav Mekhanoshin     return true;
15977f37794eSStanislav Mekhanoshin 
15987f37794eSStanislav Mekhanoshin   // three-arg functions
15997f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_FMA:
16007f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_MAD:
16017f37794eSStanislav Mekhanoshin     Res0 = opr0 * opr1 + opr2;
16027f37794eSStanislav Mekhanoshin     return true;
16037f37794eSStanislav Mekhanoshin   }
16047f37794eSStanislav Mekhanoshin 
16057f37794eSStanislav Mekhanoshin   return false;
16067f37794eSStanislav Mekhanoshin }
16077f37794eSStanislav Mekhanoshin 
16087f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
16097f37794eSStanislav Mekhanoshin   int numArgs = (int)aCI->getNumArgOperands();
16107f37794eSStanislav Mekhanoshin   if (numArgs > 3)
16117f37794eSStanislav Mekhanoshin     return false;
16127f37794eSStanislav Mekhanoshin 
16137f37794eSStanislav Mekhanoshin   Constant *copr0 = nullptr;
16147f37794eSStanislav Mekhanoshin   Constant *copr1 = nullptr;
16157f37794eSStanislav Mekhanoshin   Constant *copr2 = nullptr;
16167f37794eSStanislav Mekhanoshin   if (numArgs > 0) {
16177f37794eSStanislav Mekhanoshin     if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
16187f37794eSStanislav Mekhanoshin       return false;
16197f37794eSStanislav Mekhanoshin   }
16207f37794eSStanislav Mekhanoshin 
16217f37794eSStanislav Mekhanoshin   if (numArgs > 1) {
16227f37794eSStanislav Mekhanoshin     if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
16237f37794eSStanislav Mekhanoshin       if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
16247f37794eSStanislav Mekhanoshin         return false;
16257f37794eSStanislav Mekhanoshin     }
16267f37794eSStanislav Mekhanoshin   }
16277f37794eSStanislav Mekhanoshin 
16287f37794eSStanislav Mekhanoshin   if (numArgs > 2) {
16297f37794eSStanislav Mekhanoshin     if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
16307f37794eSStanislav Mekhanoshin       return false;
16317f37794eSStanislav Mekhanoshin   }
16327f37794eSStanislav Mekhanoshin 
16337f37794eSStanislav Mekhanoshin   // At this point, all arguments to aCI are constants.
16347f37794eSStanislav Mekhanoshin 
16357f37794eSStanislav Mekhanoshin   // max vector size is 16, and sincos will generate two results.
16367f37794eSStanislav Mekhanoshin   double DVal0[16], DVal1[16];
16377f37794eSStanislav Mekhanoshin   bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
16387f37794eSStanislav Mekhanoshin   if (getVecSize(FInfo) == 1) {
16397f37794eSStanislav Mekhanoshin     if (!evaluateScalarMathFunc(FInfo, DVal0[0],
16407f37794eSStanislav Mekhanoshin                                 DVal1[0], copr0, copr1, copr2)) {
16417f37794eSStanislav Mekhanoshin       return false;
16427f37794eSStanislav Mekhanoshin     }
16437f37794eSStanislav Mekhanoshin   } else {
16447f37794eSStanislav Mekhanoshin     ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
16457f37794eSStanislav Mekhanoshin     ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
16467f37794eSStanislav Mekhanoshin     ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
16477f37794eSStanislav Mekhanoshin     for (int i=0; i < getVecSize(FInfo); ++i) {
16487f37794eSStanislav Mekhanoshin       Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
16497f37794eSStanislav Mekhanoshin       Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
16507f37794eSStanislav Mekhanoshin       Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
16517f37794eSStanislav Mekhanoshin       if (!evaluateScalarMathFunc(FInfo, DVal0[i],
16527f37794eSStanislav Mekhanoshin                                   DVal1[i], celt0, celt1, celt2)) {
16537f37794eSStanislav Mekhanoshin         return false;
16547f37794eSStanislav Mekhanoshin       }
16557f37794eSStanislav Mekhanoshin     }
16567f37794eSStanislav Mekhanoshin   }
16577f37794eSStanislav Mekhanoshin 
16587f37794eSStanislav Mekhanoshin   LLVMContext &context = CI->getParent()->getParent()->getContext();
16597f37794eSStanislav Mekhanoshin   Constant *nval0, *nval1;
16607f37794eSStanislav Mekhanoshin   if (getVecSize(FInfo) == 1) {
16617f37794eSStanislav Mekhanoshin     nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
16627f37794eSStanislav Mekhanoshin     if (hasTwoResults)
16637f37794eSStanislav Mekhanoshin       nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
16647f37794eSStanislav Mekhanoshin   } else {
16657f37794eSStanislav Mekhanoshin     if (getArgType(FInfo) == AMDGPULibFunc::F32) {
16667f37794eSStanislav Mekhanoshin       SmallVector <float, 0> FVal0, FVal1;
16677f37794eSStanislav Mekhanoshin       for (int i=0; i < getVecSize(FInfo); ++i)
16687f37794eSStanislav Mekhanoshin         FVal0.push_back((float)DVal0[i]);
16697f37794eSStanislav Mekhanoshin       ArrayRef<float> tmp0(FVal0);
16707f37794eSStanislav Mekhanoshin       nval0 = ConstantDataVector::get(context, tmp0);
16717f37794eSStanislav Mekhanoshin       if (hasTwoResults) {
16727f37794eSStanislav Mekhanoshin         for (int i=0; i < getVecSize(FInfo); ++i)
16737f37794eSStanislav Mekhanoshin           FVal1.push_back((float)DVal1[i]);
16747f37794eSStanislav Mekhanoshin         ArrayRef<float> tmp1(FVal1);
16757f37794eSStanislav Mekhanoshin         nval1 = ConstantDataVector::get(context, tmp1);
16767f37794eSStanislav Mekhanoshin       }
16777f37794eSStanislav Mekhanoshin     } else {
16787f37794eSStanislav Mekhanoshin       ArrayRef<double> tmp0(DVal0);
16797f37794eSStanislav Mekhanoshin       nval0 = ConstantDataVector::get(context, tmp0);
16807f37794eSStanislav Mekhanoshin       if (hasTwoResults) {
16817f37794eSStanislav Mekhanoshin         ArrayRef<double> tmp1(DVal1);
16827f37794eSStanislav Mekhanoshin         nval1 = ConstantDataVector::get(context, tmp1);
16837f37794eSStanislav Mekhanoshin       }
16847f37794eSStanislav Mekhanoshin     }
16857f37794eSStanislav Mekhanoshin   }
16867f37794eSStanislav Mekhanoshin 
16877f37794eSStanislav Mekhanoshin   if (hasTwoResults) {
16887f37794eSStanislav Mekhanoshin     // sincos
16897f37794eSStanislav Mekhanoshin     assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
16907f37794eSStanislav Mekhanoshin            "math function with ptr arg not supported yet");
16917f37794eSStanislav Mekhanoshin     new StoreInst(nval1, aCI->getArgOperand(1), aCI);
16927f37794eSStanislav Mekhanoshin   }
16937f37794eSStanislav Mekhanoshin 
16947f37794eSStanislav Mekhanoshin   replaceCall(nval0);
16957f37794eSStanislav Mekhanoshin   return true;
16967f37794eSStanislav Mekhanoshin }
16977f37794eSStanislav Mekhanoshin 
16987f37794eSStanislav Mekhanoshin // Public interface to the Simplify LibCalls pass.
1699348735b7SMatt Arsenault FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) {
1700348735b7SMatt Arsenault   return new AMDGPUSimplifyLibCalls(TM);
17017f37794eSStanislav Mekhanoshin }
17027f37794eSStanislav Mekhanoshin 
17037f37794eSStanislav Mekhanoshin FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
17047f37794eSStanislav Mekhanoshin   return new AMDGPUUseNativeCalls();
17057f37794eSStanislav Mekhanoshin }
17067f37794eSStanislav Mekhanoshin 
17077f37794eSStanislav Mekhanoshin bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
17087f37794eSStanislav Mekhanoshin   if (skipFunction(F))
17097f37794eSStanislav Mekhanoshin     return false;
17107f37794eSStanislav Mekhanoshin 
17117f37794eSStanislav Mekhanoshin   bool Changed = false;
17127f37794eSStanislav Mekhanoshin   auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
17137f37794eSStanislav Mekhanoshin 
1714d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1715d34e60caSNicola Zaghen              F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
17167f37794eSStanislav Mekhanoshin 
17177f37794eSStanislav Mekhanoshin   for (auto &BB : F) {
17187f37794eSStanislav Mekhanoshin     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
17197f37794eSStanislav Mekhanoshin       // Ignore non-calls.
17207f37794eSStanislav Mekhanoshin       CallInst *CI = dyn_cast<CallInst>(I);
17217f37794eSStanislav Mekhanoshin       ++I;
17226a31a9a5Sdfukalov       // Ignore intrinsics that do not become real instructions.
17236a31a9a5Sdfukalov       if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
17246a31a9a5Sdfukalov         continue;
17257f37794eSStanislav Mekhanoshin 
17267f37794eSStanislav Mekhanoshin       // Ignore indirect calls.
17277f37794eSStanislav Mekhanoshin       Function *Callee = CI->getCalledFunction();
17287f37794eSStanislav Mekhanoshin       if (Callee == 0) continue;
17297f37794eSStanislav Mekhanoshin 
1730d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
17317f37794eSStanislav Mekhanoshin                  dbgs().flush());
17327f37794eSStanislav Mekhanoshin       if(Simplifier.fold(CI, AA))
17337f37794eSStanislav Mekhanoshin         Changed = true;
17347f37794eSStanislav Mekhanoshin     }
17357f37794eSStanislav Mekhanoshin   }
17367f37794eSStanislav Mekhanoshin   return Changed;
17377f37794eSStanislav Mekhanoshin }
17387f37794eSStanislav Mekhanoshin 
17399abc4577SArthur Eubanks PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F,
17409abc4577SArthur Eubanks                                                   FunctionAnalysisManager &AM) {
17418e293fe6SArthur Eubanks   AMDGPULibCalls Simplifier(&TM);
17429abc4577SArthur Eubanks   Simplifier.initNativeFuncs();
17439abc4577SArthur Eubanks 
17449abc4577SArthur Eubanks   bool Changed = false;
17459abc4577SArthur Eubanks   auto AA = &AM.getResult<AAManager>(F);
17469abc4577SArthur Eubanks 
17479abc4577SArthur Eubanks   LLVM_DEBUG(dbgs() << "AMDIC: process function ";
17489abc4577SArthur Eubanks              F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
17499abc4577SArthur Eubanks 
17509abc4577SArthur Eubanks   for (auto &BB : F) {
17519abc4577SArthur Eubanks     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
17529abc4577SArthur Eubanks       // Ignore non-calls.
17539abc4577SArthur Eubanks       CallInst *CI = dyn_cast<CallInst>(I);
17549abc4577SArthur Eubanks       ++I;
17559abc4577SArthur Eubanks       // Ignore intrinsics that do not become real instructions.
17569abc4577SArthur Eubanks       if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
17579abc4577SArthur Eubanks         continue;
17589abc4577SArthur Eubanks 
17599abc4577SArthur Eubanks       // Ignore indirect calls.
17609abc4577SArthur Eubanks       Function *Callee = CI->getCalledFunction();
17619abc4577SArthur Eubanks       if (Callee == 0)
17629abc4577SArthur Eubanks         continue;
17639abc4577SArthur Eubanks 
17649abc4577SArthur Eubanks       LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
17659abc4577SArthur Eubanks                  dbgs().flush());
17669abc4577SArthur Eubanks       if (Simplifier.fold(CI, AA))
17679abc4577SArthur Eubanks         Changed = true;
17689abc4577SArthur Eubanks     }
17699abc4577SArthur Eubanks   }
17709abc4577SArthur Eubanks   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
17719abc4577SArthur Eubanks }
17729abc4577SArthur Eubanks 
17737f37794eSStanislav Mekhanoshin bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
17747f37794eSStanislav Mekhanoshin   if (skipFunction(F) || UseNative.empty())
17757f37794eSStanislav Mekhanoshin     return false;
17767f37794eSStanislav Mekhanoshin 
17777f37794eSStanislav Mekhanoshin   bool Changed = false;
17787f37794eSStanislav Mekhanoshin   for (auto &BB : F) {
17797f37794eSStanislav Mekhanoshin     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
17807f37794eSStanislav Mekhanoshin       // Ignore non-calls.
17817f37794eSStanislav Mekhanoshin       CallInst *CI = dyn_cast<CallInst>(I);
17827f37794eSStanislav Mekhanoshin       ++I;
17837f37794eSStanislav Mekhanoshin       if (!CI) continue;
17847f37794eSStanislav Mekhanoshin 
17857f37794eSStanislav Mekhanoshin       // Ignore indirect calls.
17867f37794eSStanislav Mekhanoshin       Function *Callee = CI->getCalledFunction();
17877f37794eSStanislav Mekhanoshin       if (Callee == 0) continue;
17887f37794eSStanislav Mekhanoshin 
17897f37794eSStanislav Mekhanoshin       if(Simplifier.useNative(CI))
17907f37794eSStanislav Mekhanoshin         Changed = true;
17917f37794eSStanislav Mekhanoshin     }
17927f37794eSStanislav Mekhanoshin   }
17937f37794eSStanislav Mekhanoshin   return Changed;
17947f37794eSStanislav Mekhanoshin }
17959abc4577SArthur Eubanks 
17969abc4577SArthur Eubanks PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F,
17979abc4577SArthur Eubanks                                                 FunctionAnalysisManager &AM) {
17989abc4577SArthur Eubanks   if (UseNative.empty())
17999abc4577SArthur Eubanks     return PreservedAnalyses::all();
18009abc4577SArthur Eubanks 
18019abc4577SArthur Eubanks   AMDGPULibCalls Simplifier;
18029abc4577SArthur Eubanks   Simplifier.initNativeFuncs();
18039abc4577SArthur Eubanks 
18049abc4577SArthur Eubanks   bool Changed = false;
18059abc4577SArthur Eubanks   for (auto &BB : F) {
18069abc4577SArthur Eubanks     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
18079abc4577SArthur Eubanks       // Ignore non-calls.
18089abc4577SArthur Eubanks       CallInst *CI = dyn_cast<CallInst>(I);
18099abc4577SArthur Eubanks       ++I;
18109abc4577SArthur Eubanks       if (!CI)
18119abc4577SArthur Eubanks         continue;
18129abc4577SArthur Eubanks 
18139abc4577SArthur Eubanks       // Ignore indirect calls.
18149abc4577SArthur Eubanks       Function *Callee = CI->getCalledFunction();
18159abc4577SArthur Eubanks       if (Callee == 0)
18169abc4577SArthur Eubanks         continue;
18179abc4577SArthur Eubanks 
18189abc4577SArthur Eubanks       if (Simplifier.useNative(CI))
18199abc4577SArthur Eubanks         Changed = true;
18209abc4577SArthur Eubanks     }
18219abc4577SArthur Eubanks   }
18229abc4577SArthur Eubanks   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
18239abc4577SArthur Eubanks }
1824