17f37794eSStanislav Mekhanoshin //===- AMDGPULibCalls.cpp -------------------------------------------------===//
27f37794eSStanislav Mekhanoshin //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67f37794eSStanislav Mekhanoshin //
77f37794eSStanislav Mekhanoshin //===----------------------------------------------------------------------===//
87f37794eSStanislav Mekhanoshin //
97f37794eSStanislav Mekhanoshin /// \file
105f8f34e4SAdrian Prantl /// This file does AMD library function optimizations.
117f37794eSStanislav Mekhanoshin //
127f37794eSStanislav Mekhanoshin //===----------------------------------------------------------------------===//
137f37794eSStanislav Mekhanoshin 
147f37794eSStanislav Mekhanoshin #include "AMDGPU.h"
157f37794eSStanislav Mekhanoshin #include "AMDGPULibFunc.h"
16560d7e04Sdfukalov #include "GCNSubtarget.h"
177f37794eSStanislav Mekhanoshin #include "llvm/Analysis/AliasAnalysis.h"
187f37794eSStanislav Mekhanoshin #include "llvm/Analysis/Loads.h"
1999142003SNikita Popov #include "llvm/IR/IRBuilder.h"
20e188aae4Sserge-sans-paille #include "llvm/IR/IntrinsicInst.h"
21e188aae4Sserge-sans-paille #include "llvm/IR/IntrinsicsAMDGPU.h"
2205da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
23a9191c84SStanislav Mekhanoshin #include "llvm/Target/TargetMachine.h"
2405da2fe5SReid Kleckner 
2505da2fe5SReid Kleckner #define DEBUG_TYPE "amdgpu-simplifylib"
267f37794eSStanislav Mekhanoshin 
277f37794eSStanislav Mekhanoshin using namespace llvm;
287f37794eSStanislav Mekhanoshin 
297f37794eSStanislav Mekhanoshin static cl::opt<bool> EnablePreLink("amdgpu-prelink",
307f37794eSStanislav Mekhanoshin   cl::desc("Enable pre-link mode optimizations"),
317f37794eSStanislav Mekhanoshin   cl::init(false),
327f37794eSStanislav Mekhanoshin   cl::Hidden);
337f37794eSStanislav Mekhanoshin 
347f37794eSStanislav Mekhanoshin static cl::list<std::string> UseNative("amdgpu-use-native",
357f37794eSStanislav Mekhanoshin   cl::desc("Comma separated list of functions to replace with native, or all"),
367f37794eSStanislav Mekhanoshin   cl::CommaSeparated, cl::ValueOptional,
377f37794eSStanislav Mekhanoshin   cl::Hidden);
387f37794eSStanislav Mekhanoshin 
39c57a9dc4SEvandro Menezes #define MATH_PI      numbers::pi
40c57a9dc4SEvandro Menezes #define MATH_E       numbers::e
41c57a9dc4SEvandro Menezes #define MATH_SQRT2   numbers::sqrt2
42c57a9dc4SEvandro Menezes #define MATH_SQRT1_2 numbers::inv_sqrt2
437f37794eSStanislav Mekhanoshin 
447f37794eSStanislav Mekhanoshin namespace llvm {
457f37794eSStanislav Mekhanoshin 
467f37794eSStanislav Mekhanoshin class AMDGPULibCalls {
477f37794eSStanislav Mekhanoshin private:
487f37794eSStanislav Mekhanoshin 
497f37794eSStanislav Mekhanoshin   typedef llvm::AMDGPULibFunc FuncInfo;
507f37794eSStanislav Mekhanoshin 
51a9191c84SStanislav Mekhanoshin   const TargetMachine *TM;
52a9191c84SStanislav Mekhanoshin 
537f37794eSStanislav Mekhanoshin   // -fuse-native.
547f37794eSStanislav Mekhanoshin   bool AllNative = false;
557f37794eSStanislav Mekhanoshin 
567f37794eSStanislav Mekhanoshin   bool useNativeFunc(const StringRef F) const;
577f37794eSStanislav Mekhanoshin 
58dc6e8dfdSJacob Lambert   // Return a pointer (pointer expr) to the function if function definition with
597f37794eSStanislav Mekhanoshin   // "FuncName" exists. It may create a new function prototype in pre-link mode.
6013680223SJames Y Knight   FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
617f37794eSStanislav Mekhanoshin 
6299a64cc9SSimon Pilgrim   bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
637f37794eSStanislav Mekhanoshin 
647f37794eSStanislav Mekhanoshin   bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
657f37794eSStanislav Mekhanoshin 
667f37794eSStanislav Mekhanoshin   /* Specialized optimizations */
677f37794eSStanislav Mekhanoshin 
687f37794eSStanislav Mekhanoshin   // recip (half or native)
697f37794eSStanislav Mekhanoshin   bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
707f37794eSStanislav Mekhanoshin 
717f37794eSStanislav Mekhanoshin   // divide (half or native)
727f37794eSStanislav Mekhanoshin   bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
737f37794eSStanislav Mekhanoshin 
747f37794eSStanislav Mekhanoshin   // pow/powr/pown
757f37794eSStanislav Mekhanoshin   bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
767f37794eSStanislav Mekhanoshin 
777f37794eSStanislav Mekhanoshin   // rootn
787f37794eSStanislav Mekhanoshin   bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
797f37794eSStanislav Mekhanoshin 
807f37794eSStanislav Mekhanoshin   // fma/mad
817f37794eSStanislav Mekhanoshin   bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
827f37794eSStanislav Mekhanoshin 
837f37794eSStanislav Mekhanoshin   // -fuse-native for sincos
847f37794eSStanislav Mekhanoshin   bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
857f37794eSStanislav Mekhanoshin 
867f37794eSStanislav Mekhanoshin   // evaluate calls if calls' arguments are constants.
87a750332dSSimon Pilgrim   bool evaluateScalarMathFunc(const FuncInfo &FInfo, double& Res0,
887f37794eSStanislav Mekhanoshin     double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
89a750332dSSimon Pilgrim   bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
907f37794eSStanislav Mekhanoshin 
917f37794eSStanislav Mekhanoshin   // sqrt
927f37794eSStanislav Mekhanoshin   bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
937f37794eSStanislav Mekhanoshin 
947f37794eSStanislav Mekhanoshin   // sin/cos
957f37794eSStanislav Mekhanoshin   bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
967f37794eSStanislav Mekhanoshin 
97fc5121a7SYaxun Liu   // __read_pipe/__write_pipe
98a750332dSSimon Pilgrim   bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
99a750332dSSimon Pilgrim                             const FuncInfo &FInfo);
100fc5121a7SYaxun Liu 
101a9191c84SStanislav Mekhanoshin   // llvm.amdgcn.wavefrontsize
102a9191c84SStanislav Mekhanoshin   bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
103a9191c84SStanislav Mekhanoshin 
1047f37794eSStanislav Mekhanoshin   // Get insertion point at entry.
1057f37794eSStanislav Mekhanoshin   BasicBlock::iterator getEntryIns(CallInst * UI);
1067f37794eSStanislav Mekhanoshin   // Insert an Alloc instruction.
1077f37794eSStanislav Mekhanoshin   AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
108d1f45ed5SNeubauer, Sebastian   // Get a scalar native builtin single argument FP function
10913680223SJames Y Knight   FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
1107f37794eSStanislav Mekhanoshin 
1117f37794eSStanislav Mekhanoshin protected:
1127f37794eSStanislav Mekhanoshin   CallInst *CI;
1137f37794eSStanislav Mekhanoshin 
1147f37794eSStanislav Mekhanoshin   bool isUnsafeMath(const CallInst *CI) const;
1157f37794eSStanislav Mekhanoshin 
replaceCall(Value * With)1167f37794eSStanislav Mekhanoshin   void replaceCall(Value *With) {
1177f37794eSStanislav Mekhanoshin     CI->replaceAllUsesWith(With);
1187f37794eSStanislav Mekhanoshin     CI->eraseFromParent();
1197f37794eSStanislav Mekhanoshin   }
1207f37794eSStanislav Mekhanoshin 
1217f37794eSStanislav Mekhanoshin public:
AMDGPULibCalls(const TargetMachine * TM_=nullptr)122a9191c84SStanislav Mekhanoshin   AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
123a9191c84SStanislav Mekhanoshin 
1247f37794eSStanislav Mekhanoshin   bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
1257f37794eSStanislav Mekhanoshin 
1267f37794eSStanislav Mekhanoshin   void initNativeFuncs();
1277f37794eSStanislav Mekhanoshin 
1287f37794eSStanislav Mekhanoshin   // Replace a normal math function call with that native version
1297f37794eSStanislav Mekhanoshin   bool useNative(CallInst *CI);
1307f37794eSStanislav Mekhanoshin };
1317f37794eSStanislav Mekhanoshin 
1327f37794eSStanislav Mekhanoshin } // end llvm namespace
1337f37794eSStanislav Mekhanoshin 
1347f37794eSStanislav Mekhanoshin namespace {
1357f37794eSStanislav Mekhanoshin 
1367f37794eSStanislav Mekhanoshin   class AMDGPUSimplifyLibCalls : public FunctionPass {
1377f37794eSStanislav Mekhanoshin 
138a9191c84SStanislav Mekhanoshin   AMDGPULibCalls Simplifier;
139a9191c84SStanislav Mekhanoshin 
1407f37794eSStanislav Mekhanoshin   public:
1417f37794eSStanislav Mekhanoshin     static char ID; // Pass identification
1427f37794eSStanislav Mekhanoshin 
AMDGPUSimplifyLibCalls(const TargetMachine * TM=nullptr)143348735b7SMatt Arsenault     AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
144348735b7SMatt Arsenault       : FunctionPass(ID), Simplifier(TM) {
1457f37794eSStanislav Mekhanoshin       initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
1467f37794eSStanislav Mekhanoshin     }
1477f37794eSStanislav Mekhanoshin 
getAnalysisUsage(AnalysisUsage & AU) const1487f37794eSStanislav Mekhanoshin     void getAnalysisUsage(AnalysisUsage &AU) const override {
1497f37794eSStanislav Mekhanoshin       AU.addRequired<AAResultsWrapperPass>();
1507f37794eSStanislav Mekhanoshin     }
1517f37794eSStanislav Mekhanoshin 
1527f37794eSStanislav Mekhanoshin     bool runOnFunction(Function &M) override;
1537f37794eSStanislav Mekhanoshin   };
1547f37794eSStanislav Mekhanoshin 
1557f37794eSStanislav Mekhanoshin   class AMDGPUUseNativeCalls : public FunctionPass {
1567f37794eSStanislav Mekhanoshin 
1577f37794eSStanislav Mekhanoshin   AMDGPULibCalls Simplifier;
1587f37794eSStanislav Mekhanoshin 
1597f37794eSStanislav Mekhanoshin   public:
1607f37794eSStanislav Mekhanoshin     static char ID; // Pass identification
1617f37794eSStanislav Mekhanoshin 
AMDGPUUseNativeCalls()1627f37794eSStanislav Mekhanoshin     AMDGPUUseNativeCalls() : FunctionPass(ID) {
1637f37794eSStanislav Mekhanoshin       initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
1647f37794eSStanislav Mekhanoshin       Simplifier.initNativeFuncs();
1657f37794eSStanislav Mekhanoshin     }
1667f37794eSStanislav Mekhanoshin 
1677f37794eSStanislav Mekhanoshin     bool runOnFunction(Function &F) override;
1687f37794eSStanislav Mekhanoshin   };
1697f37794eSStanislav Mekhanoshin 
1707f37794eSStanislav Mekhanoshin } // end anonymous namespace.
1717f37794eSStanislav Mekhanoshin 
1727f37794eSStanislav Mekhanoshin char AMDGPUSimplifyLibCalls::ID = 0;
1737f37794eSStanislav Mekhanoshin char AMDGPUUseNativeCalls::ID = 0;
1747f37794eSStanislav Mekhanoshin 
1757f37794eSStanislav Mekhanoshin INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
1767f37794eSStanislav Mekhanoshin                       "Simplify well-known AMD library calls", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)1777f37794eSStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1787f37794eSStanislav Mekhanoshin INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
1797f37794eSStanislav Mekhanoshin                     "Simplify well-known AMD library calls", false, false)
1807f37794eSStanislav Mekhanoshin 
1817f37794eSStanislav Mekhanoshin INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
1827f37794eSStanislav Mekhanoshin                 "Replace builtin math calls with that native versions.",
1837f37794eSStanislav Mekhanoshin                 false, false)
1847f37794eSStanislav Mekhanoshin 
1857f37794eSStanislav Mekhanoshin template <typename IRB>
18613680223SJames Y Knight static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
18751ebcaafSBenjamin Kramer                               const Twine &Name = "") {
1887f37794eSStanislav Mekhanoshin   CallInst *R = B.CreateCall(Callee, Arg, Name);
18913680223SJames Y Knight   if (Function *F = dyn_cast<Function>(Callee.getCallee()))
1907f37794eSStanislav Mekhanoshin     R->setCallingConv(F->getCallingConv());
1917f37794eSStanislav Mekhanoshin   return R;
1927f37794eSStanislav Mekhanoshin }
1937f37794eSStanislav Mekhanoshin 
1947f37794eSStanislav Mekhanoshin template <typename IRB>
CreateCallEx2(IRB & B,FunctionCallee Callee,Value * Arg1,Value * Arg2,const Twine & Name="")19513680223SJames Y Knight static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
19613680223SJames Y Knight                                Value *Arg2, const Twine &Name = "") {
1977f37794eSStanislav Mekhanoshin   CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
19813680223SJames Y Knight   if (Function *F = dyn_cast<Function>(Callee.getCallee()))
1997f37794eSStanislav Mekhanoshin     R->setCallingConv(F->getCallingConv());
2007f37794eSStanislav Mekhanoshin   return R;
2017f37794eSStanislav Mekhanoshin }
2027f37794eSStanislav Mekhanoshin 
2037f37794eSStanislav Mekhanoshin //  Data structures for table-driven optimizations.
2047f37794eSStanislav Mekhanoshin //  FuncTbl works for both f32 and f64 functions with 1 input argument
2057f37794eSStanislav Mekhanoshin 
2067f37794eSStanislav Mekhanoshin struct TableEntry {
2077f37794eSStanislav Mekhanoshin   double   result;
2087f37794eSStanislav Mekhanoshin   double   input;
2097f37794eSStanislav Mekhanoshin };
2107f37794eSStanislav Mekhanoshin 
2117f37794eSStanislav Mekhanoshin /* a list of {result, input} */
2127f37794eSStanislav Mekhanoshin static const TableEntry tbl_acos[] = {
2137f37794eSStanislav Mekhanoshin   {MATH_PI / 2.0, 0.0},
2147f37794eSStanislav Mekhanoshin   {MATH_PI / 2.0, -0.0},
2157f37794eSStanislav Mekhanoshin   {0.0, 1.0},
2167f37794eSStanislav Mekhanoshin   {MATH_PI, -1.0}
2177f37794eSStanislav Mekhanoshin };
2187f37794eSStanislav Mekhanoshin static const TableEntry tbl_acosh[] = {
2197f37794eSStanislav Mekhanoshin   {0.0, 1.0}
2207f37794eSStanislav Mekhanoshin };
2217f37794eSStanislav Mekhanoshin static const TableEntry tbl_acospi[] = {
2227f37794eSStanislav Mekhanoshin   {0.5, 0.0},
2237f37794eSStanislav Mekhanoshin   {0.5, -0.0},
2247f37794eSStanislav Mekhanoshin   {0.0, 1.0},
2257f37794eSStanislav Mekhanoshin   {1.0, -1.0}
2267f37794eSStanislav Mekhanoshin };
2277f37794eSStanislav Mekhanoshin static const TableEntry tbl_asin[] = {
2287f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2297f37794eSStanislav Mekhanoshin   {-0.0, -0.0},
2307f37794eSStanislav Mekhanoshin   {MATH_PI / 2.0, 1.0},
2317f37794eSStanislav Mekhanoshin   {-MATH_PI / 2.0, -1.0}
2327f37794eSStanislav Mekhanoshin };
2337f37794eSStanislav Mekhanoshin static const TableEntry tbl_asinh[] = {
2347f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2357f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
2367f37794eSStanislav Mekhanoshin };
2377f37794eSStanislav Mekhanoshin static const TableEntry tbl_asinpi[] = {
2387f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2397f37794eSStanislav Mekhanoshin   {-0.0, -0.0},
2407f37794eSStanislav Mekhanoshin   {0.5, 1.0},
2417f37794eSStanislav Mekhanoshin   {-0.5, -1.0}
2427f37794eSStanislav Mekhanoshin };
2437f37794eSStanislav Mekhanoshin static const TableEntry tbl_atan[] = {
2447f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2457f37794eSStanislav Mekhanoshin   {-0.0, -0.0},
2467f37794eSStanislav Mekhanoshin   {MATH_PI / 4.0, 1.0},
2477f37794eSStanislav Mekhanoshin   {-MATH_PI / 4.0, -1.0}
2487f37794eSStanislav Mekhanoshin };
2497f37794eSStanislav Mekhanoshin static const TableEntry tbl_atanh[] = {
2507f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2517f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
2527f37794eSStanislav Mekhanoshin };
2537f37794eSStanislav Mekhanoshin static const TableEntry tbl_atanpi[] = {
2547f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2557f37794eSStanislav Mekhanoshin   {-0.0, -0.0},
2567f37794eSStanislav Mekhanoshin   {0.25, 1.0},
2577f37794eSStanislav Mekhanoshin   {-0.25, -1.0}
2587f37794eSStanislav Mekhanoshin };
2597f37794eSStanislav Mekhanoshin static const TableEntry tbl_cbrt[] = {
2607f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2617f37794eSStanislav Mekhanoshin   {-0.0, -0.0},
2627f37794eSStanislav Mekhanoshin   {1.0, 1.0},
2637f37794eSStanislav Mekhanoshin   {-1.0, -1.0},
2647f37794eSStanislav Mekhanoshin };
2657f37794eSStanislav Mekhanoshin static const TableEntry tbl_cos[] = {
2667f37794eSStanislav Mekhanoshin   {1.0, 0.0},
2677f37794eSStanislav Mekhanoshin   {1.0, -0.0}
2687f37794eSStanislav Mekhanoshin };
2697f37794eSStanislav Mekhanoshin static const TableEntry tbl_cosh[] = {
2707f37794eSStanislav Mekhanoshin   {1.0, 0.0},
2717f37794eSStanislav Mekhanoshin   {1.0, -0.0}
2727f37794eSStanislav Mekhanoshin };
2737f37794eSStanislav Mekhanoshin static const TableEntry tbl_cospi[] = {
2747f37794eSStanislav Mekhanoshin   {1.0, 0.0},
2757f37794eSStanislav Mekhanoshin   {1.0, -0.0}
2767f37794eSStanislav Mekhanoshin };
2777f37794eSStanislav Mekhanoshin static const TableEntry tbl_erfc[] = {
2787f37794eSStanislav Mekhanoshin   {1.0, 0.0},
2797f37794eSStanislav Mekhanoshin   {1.0, -0.0}
2807f37794eSStanislav Mekhanoshin };
2817f37794eSStanislav Mekhanoshin static const TableEntry tbl_erf[] = {
2827f37794eSStanislav Mekhanoshin   {0.0, 0.0},
2837f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
2847f37794eSStanislav Mekhanoshin };
2857f37794eSStanislav Mekhanoshin static const TableEntry tbl_exp[] = {
2867f37794eSStanislav Mekhanoshin   {1.0, 0.0},
2877f37794eSStanislav Mekhanoshin   {1.0, -0.0},
2887f37794eSStanislav Mekhanoshin   {MATH_E, 1.0}
2897f37794eSStanislav Mekhanoshin };
2907f37794eSStanislav Mekhanoshin static const TableEntry tbl_exp2[] = {
2917f37794eSStanislav Mekhanoshin   {1.0, 0.0},
2927f37794eSStanislav Mekhanoshin   {1.0, -0.0},
2937f37794eSStanislav Mekhanoshin   {2.0, 1.0}
2947f37794eSStanislav Mekhanoshin };
2957f37794eSStanislav Mekhanoshin static const TableEntry tbl_exp10[] = {
2967f37794eSStanislav Mekhanoshin   {1.0, 0.0},
2977f37794eSStanislav Mekhanoshin   {1.0, -0.0},
2987f37794eSStanislav Mekhanoshin   {10.0, 1.0}
2997f37794eSStanislav Mekhanoshin };
3007f37794eSStanislav Mekhanoshin static const TableEntry tbl_expm1[] = {
3017f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3027f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3037f37794eSStanislav Mekhanoshin };
3047f37794eSStanislav Mekhanoshin static const TableEntry tbl_log[] = {
3057f37794eSStanislav Mekhanoshin   {0.0, 1.0},
3067f37794eSStanislav Mekhanoshin   {1.0, MATH_E}
3077f37794eSStanislav Mekhanoshin };
3087f37794eSStanislav Mekhanoshin static const TableEntry tbl_log2[] = {
3097f37794eSStanislav Mekhanoshin   {0.0, 1.0},
3107f37794eSStanislav Mekhanoshin   {1.0, 2.0}
3117f37794eSStanislav Mekhanoshin };
3127f37794eSStanislav Mekhanoshin static const TableEntry tbl_log10[] = {
3137f37794eSStanislav Mekhanoshin   {0.0, 1.0},
3147f37794eSStanislav Mekhanoshin   {1.0, 10.0}
3157f37794eSStanislav Mekhanoshin };
3167f37794eSStanislav Mekhanoshin static const TableEntry tbl_rsqrt[] = {
3177f37794eSStanislav Mekhanoshin   {1.0, 1.0},
318c57a9dc4SEvandro Menezes   {MATH_SQRT1_2, 2.0}
3197f37794eSStanislav Mekhanoshin };
3207f37794eSStanislav Mekhanoshin static const TableEntry tbl_sin[] = {
3217f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3227f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3237f37794eSStanislav Mekhanoshin };
3247f37794eSStanislav Mekhanoshin static const TableEntry tbl_sinh[] = {
3257f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3267f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3277f37794eSStanislav Mekhanoshin };
3287f37794eSStanislav Mekhanoshin static const TableEntry tbl_sinpi[] = {
3297f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3307f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3317f37794eSStanislav Mekhanoshin };
3327f37794eSStanislav Mekhanoshin static const TableEntry tbl_sqrt[] = {
3337f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3347f37794eSStanislav Mekhanoshin   {1.0, 1.0},
3357f37794eSStanislav Mekhanoshin   {MATH_SQRT2, 2.0}
3367f37794eSStanislav Mekhanoshin };
3377f37794eSStanislav Mekhanoshin static const TableEntry tbl_tan[] = {
3387f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3397f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3407f37794eSStanislav Mekhanoshin };
3417f37794eSStanislav Mekhanoshin static const TableEntry tbl_tanh[] = {
3427f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3437f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3447f37794eSStanislav Mekhanoshin };
3457f37794eSStanislav Mekhanoshin static const TableEntry tbl_tanpi[] = {
3467f37794eSStanislav Mekhanoshin   {0.0, 0.0},
3477f37794eSStanislav Mekhanoshin   {-0.0, -0.0}
3487f37794eSStanislav Mekhanoshin };
3497f37794eSStanislav Mekhanoshin static const TableEntry tbl_tgamma[] = {
3507f37794eSStanislav Mekhanoshin   {1.0, 1.0},
3517f37794eSStanislav Mekhanoshin   {1.0, 2.0},
3527f37794eSStanislav Mekhanoshin   {2.0, 3.0},
3537f37794eSStanislav Mekhanoshin   {6.0, 4.0}
3547f37794eSStanislav Mekhanoshin };
3557f37794eSStanislav Mekhanoshin 
HasNative(AMDGPULibFunc::EFuncId id)3567f37794eSStanislav Mekhanoshin static bool HasNative(AMDGPULibFunc::EFuncId id) {
3577f37794eSStanislav Mekhanoshin   switch(id) {
3587f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_DIVIDE:
3597f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COS:
3607f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP:
3617f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP2:
3627f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP10:
3637f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG:
3647f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG2:
3657f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG10:
3667f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POWR:
3677f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_RECIP:
3687f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_RSQRT:
3697f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SIN:
3707f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SINCOS:
3717f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SQRT:
3727f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TAN:
3737f37794eSStanislav Mekhanoshin     return true;
3747f37794eSStanislav Mekhanoshin   default:;
3757f37794eSStanislav Mekhanoshin   }
3767f37794eSStanislav Mekhanoshin   return false;
3777f37794eSStanislav Mekhanoshin }
3787f37794eSStanislav Mekhanoshin 
3796cbfffb3SKazu Hirata using TableRef = ArrayRef<TableEntry>;
3807f37794eSStanislav Mekhanoshin 
getOptTable(AMDGPULibFunc::EFuncId id)3817f37794eSStanislav Mekhanoshin static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
3827f37794eSStanislav Mekhanoshin   switch(id) {
3837f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ACOS:    return TableRef(tbl_acos);
3847f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ACOSH:   return TableRef(tbl_acosh);
3857f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ACOSPI:  return TableRef(tbl_acospi);
3867f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ASIN:    return TableRef(tbl_asin);
3877f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ASINH:   return TableRef(tbl_asinh);
3887f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ASINPI:  return TableRef(tbl_asinpi);
3897f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ATAN:    return TableRef(tbl_atan);
3907f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ATANH:   return TableRef(tbl_atanh);
3917f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ATANPI:  return TableRef(tbl_atanpi);
3927f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_CBRT:    return TableRef(tbl_cbrt);
3937f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NCOS:
3947f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COS:     return TableRef(tbl_cos);
3957f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COSH:    return TableRef(tbl_cosh);
3967f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COSPI:   return TableRef(tbl_cospi);
3977f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ERFC:    return TableRef(tbl_erfc);
3987f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ERF:     return TableRef(tbl_erf);
3997f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP:     return TableRef(tbl_exp);
4007f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NEXP2:
4017f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP2:    return TableRef(tbl_exp2);
4027f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP10:   return TableRef(tbl_exp10);
4037f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXPM1:   return TableRef(tbl_expm1);
4047f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG:     return TableRef(tbl_log);
4057f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NLOG2:
4067f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG2:    return TableRef(tbl_log2);
4077f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG10:   return TableRef(tbl_log10);
4087f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NRSQRT:
4097f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_RSQRT:   return TableRef(tbl_rsqrt);
4107f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NSIN:
4117f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SIN:     return TableRef(tbl_sin);
4127f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SINH:    return TableRef(tbl_sinh);
4137f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SINPI:   return TableRef(tbl_sinpi);
4147f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NSQRT:
4157f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SQRT:    return TableRef(tbl_sqrt);
4167f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TAN:     return TableRef(tbl_tan);
4177f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TANH:    return TableRef(tbl_tanh);
4187f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TANPI:   return TableRef(tbl_tanpi);
4197f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TGAMMA:  return TableRef(tbl_tgamma);
4207f37794eSStanislav Mekhanoshin   default:;
4217f37794eSStanislav Mekhanoshin   }
4227f37794eSStanislav Mekhanoshin   return TableRef();
4237f37794eSStanislav Mekhanoshin }
4247f37794eSStanislav Mekhanoshin 
getVecSize(const AMDGPULibFunc & FInfo)4257f37794eSStanislav Mekhanoshin static inline int getVecSize(const AMDGPULibFunc& FInfo) {
426fc5121a7SYaxun Liu   return FInfo.getLeads()[0].VectorSize;
4277f37794eSStanislav Mekhanoshin }
4287f37794eSStanislav Mekhanoshin 
getArgType(const AMDGPULibFunc & FInfo)4297f37794eSStanislav Mekhanoshin static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
430fc5121a7SYaxun Liu   return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
4317f37794eSStanislav Mekhanoshin }
4327f37794eSStanislav Mekhanoshin 
getFunction(Module * M,const FuncInfo & fInfo)43313680223SJames Y Knight FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
4347f37794eSStanislav Mekhanoshin   // If we are doing PreLinkOpt, the function is external. So it is safe to
4357f37794eSStanislav Mekhanoshin   // use getOrInsertFunction() at this stage.
4367f37794eSStanislav Mekhanoshin 
4377f37794eSStanislav Mekhanoshin   return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
4387f37794eSStanislav Mekhanoshin                        : AMDGPULibFunc::getFunction(M, fInfo);
4397f37794eSStanislav Mekhanoshin }
4407f37794eSStanislav Mekhanoshin 
parseFunctionName(const StringRef & FMangledName,FuncInfo & FInfo)4417f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
44299a64cc9SSimon Pilgrim                                        FuncInfo &FInfo) {
44399a64cc9SSimon Pilgrim   return AMDGPULibFunc::parse(FMangledName, FInfo);
4447f37794eSStanislav Mekhanoshin }
4457f37794eSStanislav Mekhanoshin 
isUnsafeMath(const CallInst * CI) const4467f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
4477f37794eSStanislav Mekhanoshin   if (auto Op = dyn_cast<FPMathOperator>(CI))
448629c4115SSanjay Patel     if (Op->isFast())
4497f37794eSStanislav Mekhanoshin       return true;
4507f37794eSStanislav Mekhanoshin   const Function *F = CI->getParent()->getParent();
4517f37794eSStanislav Mekhanoshin   Attribute Attr = F->getFnAttribute("unsafe-fp-math");
452d6de1e1aSSerge Guelton   return Attr.getValueAsBool();
4537f37794eSStanislav Mekhanoshin }
4547f37794eSStanislav Mekhanoshin 
useNativeFunc(const StringRef F) const4557f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
456902cbcd5SKazu Hirata   return AllNative || llvm::is_contained(UseNative, F);
4577f37794eSStanislav Mekhanoshin }
4587f37794eSStanislav Mekhanoshin 
initNativeFuncs()4597f37794eSStanislav Mekhanoshin void AMDGPULibCalls::initNativeFuncs() {
4607f37794eSStanislav Mekhanoshin   AllNative = useNativeFunc("all") ||
4617f37794eSStanislav Mekhanoshin               (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
4627f37794eSStanislav Mekhanoshin                UseNative.begin()->empty());
4637f37794eSStanislav Mekhanoshin }
4647f37794eSStanislav Mekhanoshin 
sincosUseNative(CallInst * aCI,const FuncInfo & FInfo)4657f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
4667f37794eSStanislav Mekhanoshin   bool native_sin = useNativeFunc("sin");
4677f37794eSStanislav Mekhanoshin   bool native_cos = useNativeFunc("cos");
4687f37794eSStanislav Mekhanoshin 
4697f37794eSStanislav Mekhanoshin   if (native_sin && native_cos) {
4707f37794eSStanislav Mekhanoshin     Module *M = aCI->getModule();
4717f37794eSStanislav Mekhanoshin     Value *opr0 = aCI->getArgOperand(0);
4727f37794eSStanislav Mekhanoshin 
4737f37794eSStanislav Mekhanoshin     AMDGPULibFunc nf;
474fc5121a7SYaxun Liu     nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
475fc5121a7SYaxun Liu     nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
4767f37794eSStanislav Mekhanoshin 
4777f37794eSStanislav Mekhanoshin     nf.setPrefix(AMDGPULibFunc::NATIVE);
4787f37794eSStanislav Mekhanoshin     nf.setId(AMDGPULibFunc::EI_SIN);
47913680223SJames Y Knight     FunctionCallee sinExpr = getFunction(M, nf);
4807f37794eSStanislav Mekhanoshin 
4817f37794eSStanislav Mekhanoshin     nf.setPrefix(AMDGPULibFunc::NATIVE);
4827f37794eSStanislav Mekhanoshin     nf.setId(AMDGPULibFunc::EI_COS);
48313680223SJames Y Knight     FunctionCallee cosExpr = getFunction(M, nf);
4847f37794eSStanislav Mekhanoshin     if (sinExpr && cosExpr) {
4857f37794eSStanislav Mekhanoshin       Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
4867f37794eSStanislav Mekhanoshin       Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
4877f37794eSStanislav Mekhanoshin       new StoreInst(cosval, aCI->getArgOperand(1), aCI);
4887f37794eSStanislav Mekhanoshin 
4897f37794eSStanislav Mekhanoshin       DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
4907f37794eSStanislav Mekhanoshin                                           << " with native version of sin/cos");
4917f37794eSStanislav Mekhanoshin 
4927f37794eSStanislav Mekhanoshin       replaceCall(sinval);
4937f37794eSStanislav Mekhanoshin       return true;
4947f37794eSStanislav Mekhanoshin     }
4957f37794eSStanislav Mekhanoshin   }
4967f37794eSStanislav Mekhanoshin   return false;
4977f37794eSStanislav Mekhanoshin }
4987f37794eSStanislav Mekhanoshin 
useNative(CallInst * aCI)4997f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::useNative(CallInst *aCI) {
5007f37794eSStanislav Mekhanoshin   CI = aCI;
5017f37794eSStanislav Mekhanoshin   Function *Callee = aCI->getCalledFunction();
5027f37794eSStanislav Mekhanoshin 
5037f37794eSStanislav Mekhanoshin   FuncInfo FInfo;
50499a64cc9SSimon Pilgrim   if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
5057f37794eSStanislav Mekhanoshin       FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
506fc5121a7SYaxun Liu       getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
5077f37794eSStanislav Mekhanoshin       !(AllNative || useNativeFunc(FInfo.getName()))) {
5087f37794eSStanislav Mekhanoshin     return false;
5097f37794eSStanislav Mekhanoshin   }
5107f37794eSStanislav Mekhanoshin 
5117f37794eSStanislav Mekhanoshin   if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
5127f37794eSStanislav Mekhanoshin     return sincosUseNative(aCI, FInfo);
5137f37794eSStanislav Mekhanoshin 
5147f37794eSStanislav Mekhanoshin   FInfo.setPrefix(AMDGPULibFunc::NATIVE);
51513680223SJames Y Knight   FunctionCallee F = getFunction(aCI->getModule(), FInfo);
5167f37794eSStanislav Mekhanoshin   if (!F)
5177f37794eSStanislav Mekhanoshin     return false;
5187f37794eSStanislav Mekhanoshin 
5197f37794eSStanislav Mekhanoshin   aCI->setCalledFunction(F);
5207f37794eSStanislav Mekhanoshin   DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
5217f37794eSStanislav Mekhanoshin                                       << " with native version");
5227f37794eSStanislav Mekhanoshin   return true;
5237f37794eSStanislav Mekhanoshin }
5247f37794eSStanislav Mekhanoshin 
525fc5121a7SYaxun Liu // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
526fc5121a7SYaxun Liu // builtin, with appended type size and alignment arguments, where 2 or 4
527fc5121a7SYaxun Liu // indicates the original number of arguments. The library has optimized version
528fc5121a7SYaxun Liu // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
529fc5121a7SYaxun Liu // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
530fc5121a7SYaxun Liu // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
531fc5121a7SYaxun Liu // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
fold_read_write_pipe(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)532fc5121a7SYaxun Liu bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
533a750332dSSimon Pilgrim                                           const FuncInfo &FInfo) {
534fc5121a7SYaxun Liu   auto *Callee = CI->getCalledFunction();
535fc5121a7SYaxun Liu   if (!Callee->isDeclaration())
536fc5121a7SYaxun Liu     return false;
537fc5121a7SYaxun Liu 
538fc5121a7SYaxun Liu   assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
539fc5121a7SYaxun Liu   auto *M = Callee->getParent();
540fc5121a7SYaxun Liu   auto &Ctx = M->getContext();
541adcd0268SBenjamin Kramer   std::string Name = std::string(Callee->getName());
542c1e32b3fSKazu Hirata   auto NumArg = CI->arg_size();
543fc5121a7SYaxun Liu   if (NumArg != 4 && NumArg != 6)
544fc5121a7SYaxun Liu     return false;
545fc5121a7SYaxun Liu   auto *PacketSize = CI->getArgOperand(NumArg - 2);
546fc5121a7SYaxun Liu   auto *PacketAlign = CI->getArgOperand(NumArg - 1);
547fc5121a7SYaxun Liu   if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
548fc5121a7SYaxun Liu     return false;
549fc5121a7SYaxun Liu   unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
55087e2751cSGuillaume Chatelet   Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
55187e2751cSGuillaume Chatelet   if (Alignment != Size)
552fc5121a7SYaxun Liu     return false;
553fc5121a7SYaxun Liu 
554fc5121a7SYaxun Liu   Type *PtrElemTy;
555fc5121a7SYaxun Liu   if (Size <= 8)
556fc5121a7SYaxun Liu     PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
557fc5121a7SYaxun Liu   else
558aad93654SChristopher Tetreault     PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
559c1e32b3fSKazu Hirata   unsigned PtrArgLoc = CI->arg_size() - 3;
560fc5121a7SYaxun Liu   auto PtrArg = CI->getArgOperand(PtrArgLoc);
561fc5121a7SYaxun Liu   unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
562fc5121a7SYaxun Liu   auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
563fc5121a7SYaxun Liu 
564fc5121a7SYaxun Liu   SmallVector<llvm::Type *, 6> ArgTys;
565fc5121a7SYaxun Liu   for (unsigned I = 0; I != PtrArgLoc; ++I)
566fc5121a7SYaxun Liu     ArgTys.push_back(CI->getArgOperand(I)->getType());
567fc5121a7SYaxun Liu   ArgTys.push_back(PtrTy);
568fc5121a7SYaxun Liu 
569fc5121a7SYaxun Liu   Name = Name + "_" + std::to_string(Size);
570fc5121a7SYaxun Liu   auto *FTy = FunctionType::get(Callee->getReturnType(),
571fc5121a7SYaxun Liu                                 ArrayRef<Type *>(ArgTys), false);
572fc5121a7SYaxun Liu   AMDGPULibFunc NewLibFunc(Name, FTy);
57313680223SJames Y Knight   FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
574fc5121a7SYaxun Liu   if (!F)
575fc5121a7SYaxun Liu     return false;
576fc5121a7SYaxun Liu 
577fc5121a7SYaxun Liu   auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
578fc5121a7SYaxun Liu   SmallVector<Value *, 6> Args;
579fc5121a7SYaxun Liu   for (unsigned I = 0; I != PtrArgLoc; ++I)
580fc5121a7SYaxun Liu     Args.push_back(CI->getArgOperand(I));
581fc5121a7SYaxun Liu   Args.push_back(BCast);
582fc5121a7SYaxun Liu 
583fc5121a7SYaxun Liu   auto *NCI = B.CreateCall(F, Args);
584fc5121a7SYaxun Liu   NCI->setAttributes(CI->getAttributes());
585fc5121a7SYaxun Liu   CI->replaceAllUsesWith(NCI);
586fc5121a7SYaxun Liu   CI->dropAllReferences();
587fc5121a7SYaxun Liu   CI->eraseFromParent();
588fc5121a7SYaxun Liu 
589fc5121a7SYaxun Liu   return true;
590fc5121a7SYaxun Liu }
591fc5121a7SYaxun Liu 
5927f37794eSStanislav Mekhanoshin // This function returns false if no change; return true otherwise.
fold(CallInst * CI,AliasAnalysis * AA)5937f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
5947f37794eSStanislav Mekhanoshin   this->CI = CI;
5957f37794eSStanislav Mekhanoshin   Function *Callee = CI->getCalledFunction();
5967f37794eSStanislav Mekhanoshin 
5977f37794eSStanislav Mekhanoshin   // Ignore indirect calls.
5985a667c0eSKazu Hirata   if (Callee == nullptr)
5995a667c0eSKazu Hirata     return false;
6007f37794eSStanislav Mekhanoshin 
6017f37794eSStanislav Mekhanoshin   BasicBlock *BB = CI->getParent();
6027f37794eSStanislav Mekhanoshin   LLVMContext &Context = CI->getParent()->getContext();
6037f37794eSStanislav Mekhanoshin   IRBuilder<> B(Context);
6047f37794eSStanislav Mekhanoshin 
6057f37794eSStanislav Mekhanoshin   // Set the builder to the instruction after the call.
6067f37794eSStanislav Mekhanoshin   B.SetInsertPoint(BB, CI->getIterator());
6077f37794eSStanislav Mekhanoshin 
6087f37794eSStanislav Mekhanoshin   // Copy fast flags from the original call.
6097f37794eSStanislav Mekhanoshin   if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
6107f37794eSStanislav Mekhanoshin     B.setFastMathFlags(FPOp->getFastMathFlags());
6117f37794eSStanislav Mekhanoshin 
612a9191c84SStanislav Mekhanoshin   switch (Callee->getIntrinsicID()) {
613a9191c84SStanislav Mekhanoshin   default:
614a9191c84SStanislav Mekhanoshin     break;
615a9191c84SStanislav Mekhanoshin   case Intrinsic::amdgcn_wavefrontsize:
616a9191c84SStanislav Mekhanoshin     return !EnablePreLink && fold_wavefrontsize(CI, B);
617a9191c84SStanislav Mekhanoshin   }
618a9191c84SStanislav Mekhanoshin 
619a9191c84SStanislav Mekhanoshin   FuncInfo FInfo;
62099a64cc9SSimon Pilgrim   if (!parseFunctionName(Callee->getName(), FInfo))
621a9191c84SStanislav Mekhanoshin     return false;
622a9191c84SStanislav Mekhanoshin 
623a9191c84SStanislav Mekhanoshin   // Further check the number of arguments to see if they match.
624c1e32b3fSKazu Hirata   if (CI->arg_size() != FInfo.getNumArgs())
625a9191c84SStanislav Mekhanoshin     return false;
626a9191c84SStanislav Mekhanoshin 
6277f37794eSStanislav Mekhanoshin   if (TDOFold(CI, FInfo))
6287f37794eSStanislav Mekhanoshin     return true;
6297f37794eSStanislav Mekhanoshin 
6307f37794eSStanislav Mekhanoshin   // Under unsafe-math, evaluate calls if possible.
6317f37794eSStanislav Mekhanoshin   // According to Brian Sumner, we can do this for all f32 function calls
6327f37794eSStanislav Mekhanoshin   // using host's double function calls.
6337f37794eSStanislav Mekhanoshin   if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
6347f37794eSStanislav Mekhanoshin     return true;
6357f37794eSStanislav Mekhanoshin 
636dc6e8dfdSJacob Lambert   // Specialized optimizations for each function call
6377f37794eSStanislav Mekhanoshin   switch (FInfo.getId()) {
6387f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_RECIP:
6397f37794eSStanislav Mekhanoshin     // skip vector function
6407f37794eSStanislav Mekhanoshin     assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
6417f37794eSStanislav Mekhanoshin              FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
6427f37794eSStanislav Mekhanoshin             "recip must be an either native or half function");
6437f37794eSStanislav Mekhanoshin     return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
6447f37794eSStanislav Mekhanoshin 
6457f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_DIVIDE:
6467f37794eSStanislav Mekhanoshin     // skip vector function
6477f37794eSStanislav Mekhanoshin     assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
6487f37794eSStanislav Mekhanoshin              FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
6497f37794eSStanislav Mekhanoshin             "divide must be an either native or half function");
6507f37794eSStanislav Mekhanoshin     return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
6517f37794eSStanislav Mekhanoshin 
6527f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POW:
6537f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POWR:
6547f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POWN:
6557f37794eSStanislav Mekhanoshin     return fold_pow(CI, B, FInfo);
6567f37794eSStanislav Mekhanoshin 
6577f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ROOTN:
6587f37794eSStanislav Mekhanoshin     // skip vector function
6597f37794eSStanislav Mekhanoshin     return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
6607f37794eSStanislav Mekhanoshin 
6617f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_FMA:
6627f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_MAD:
6637f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_NFMA:
6647f37794eSStanislav Mekhanoshin     // skip vector function
6657f37794eSStanislav Mekhanoshin     return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
6667f37794eSStanislav Mekhanoshin 
6677f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SQRT:
6687f37794eSStanislav Mekhanoshin     return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
6697f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COS:
6707f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SIN:
6717f37794eSStanislav Mekhanoshin     if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
6727f37794eSStanislav Mekhanoshin          getArgType(FInfo) == AMDGPULibFunc::F64)
6737f37794eSStanislav Mekhanoshin         && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
6747f37794eSStanislav Mekhanoshin       return fold_sincos(CI, B, AA);
6757f37794eSStanislav Mekhanoshin 
6767f37794eSStanislav Mekhanoshin     break;
677fc5121a7SYaxun Liu   case AMDGPULibFunc::EI_READ_PIPE_2:
678fc5121a7SYaxun Liu   case AMDGPULibFunc::EI_READ_PIPE_4:
679fc5121a7SYaxun Liu   case AMDGPULibFunc::EI_WRITE_PIPE_2:
680fc5121a7SYaxun Liu   case AMDGPULibFunc::EI_WRITE_PIPE_4:
681fc5121a7SYaxun Liu     return fold_read_write_pipe(CI, B, FInfo);
6827f37794eSStanislav Mekhanoshin 
6837f37794eSStanislav Mekhanoshin   default:
6847f37794eSStanislav Mekhanoshin     break;
6857f37794eSStanislav Mekhanoshin   }
6867f37794eSStanislav Mekhanoshin 
6877f37794eSStanislav Mekhanoshin   return false;
6887f37794eSStanislav Mekhanoshin }
6897f37794eSStanislav Mekhanoshin 
TDOFold(CallInst * CI,const FuncInfo & FInfo)6907f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
6917f37794eSStanislav Mekhanoshin   // Table-Driven optimization
6927f37794eSStanislav Mekhanoshin   const TableRef tr = getOptTable(FInfo.getId());
6936cbfffb3SKazu Hirata   if (tr.empty())
6947f37794eSStanislav Mekhanoshin     return false;
6957f37794eSStanislav Mekhanoshin 
6966cbfffb3SKazu Hirata   int const sz = (int)tr.size();
6977f37794eSStanislav Mekhanoshin   Value *opr0 = CI->getArgOperand(0);
6987f37794eSStanislav Mekhanoshin 
6997f37794eSStanislav Mekhanoshin   if (getVecSize(FInfo) > 1) {
7007f37794eSStanislav Mekhanoshin     if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
7017f37794eSStanislav Mekhanoshin       SmallVector<double, 0> DVal;
7027f37794eSStanislav Mekhanoshin       for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
7037f37794eSStanislav Mekhanoshin         ConstantFP *eltval = dyn_cast<ConstantFP>(
7047f37794eSStanislav Mekhanoshin                                CV->getElementAsConstant((unsigned)eltNo));
7057f37794eSStanislav Mekhanoshin         assert(eltval && "Non-FP arguments in math function!");
7067f37794eSStanislav Mekhanoshin         bool found = false;
7077f37794eSStanislav Mekhanoshin         for (int i=0; i < sz; ++i) {
708*ae998555SKazu Hirata           if (eltval->isExactlyValue(tr[i].input)) {
709*ae998555SKazu Hirata             DVal.push_back(tr[i].result);
7107f37794eSStanislav Mekhanoshin             found = true;
7117f37794eSStanislav Mekhanoshin             break;
7127f37794eSStanislav Mekhanoshin           }
7137f37794eSStanislav Mekhanoshin         }
7147f37794eSStanislav Mekhanoshin         if (!found) {
7157f37794eSStanislav Mekhanoshin           // This vector constants not handled yet.
7167f37794eSStanislav Mekhanoshin           return false;
7177f37794eSStanislav Mekhanoshin         }
7187f37794eSStanislav Mekhanoshin       }
7197f37794eSStanislav Mekhanoshin       LLVMContext &context = CI->getParent()->getParent()->getContext();
7207f37794eSStanislav Mekhanoshin       Constant *nval;
7217f37794eSStanislav Mekhanoshin       if (getArgType(FInfo) == AMDGPULibFunc::F32) {
7227f37794eSStanislav Mekhanoshin         SmallVector<float, 0> FVal;
7237f37794eSStanislav Mekhanoshin         for (unsigned i = 0; i < DVal.size(); ++i) {
7247f37794eSStanislav Mekhanoshin           FVal.push_back((float)DVal[i]);
7257f37794eSStanislav Mekhanoshin         }
7267f37794eSStanislav Mekhanoshin         ArrayRef<float> tmp(FVal);
7277f37794eSStanislav Mekhanoshin         nval = ConstantDataVector::get(context, tmp);
7287f37794eSStanislav Mekhanoshin       } else { // F64
7297f37794eSStanislav Mekhanoshin         ArrayRef<double> tmp(DVal);
7307f37794eSStanislav Mekhanoshin         nval = ConstantDataVector::get(context, tmp);
7317f37794eSStanislav Mekhanoshin       }
732d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
7337f37794eSStanislav Mekhanoshin       replaceCall(nval);
7347f37794eSStanislav Mekhanoshin       return true;
7357f37794eSStanislav Mekhanoshin     }
7367f37794eSStanislav Mekhanoshin   } else {
7377f37794eSStanislav Mekhanoshin     // Scalar version
7387f37794eSStanislav Mekhanoshin     if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
7397f37794eSStanislav Mekhanoshin       for (int i = 0; i < sz; ++i) {
740*ae998555SKazu Hirata         if (CF->isExactlyValue(tr[i].input)) {
741*ae998555SKazu Hirata           Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
742d34e60caSNicola Zaghen           LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
7437f37794eSStanislav Mekhanoshin           replaceCall(nval);
7447f37794eSStanislav Mekhanoshin           return true;
7457f37794eSStanislav Mekhanoshin         }
7467f37794eSStanislav Mekhanoshin       }
7477f37794eSStanislav Mekhanoshin     }
7487f37794eSStanislav Mekhanoshin   }
7497f37794eSStanislav Mekhanoshin 
7507f37794eSStanislav Mekhanoshin   return false;
7517f37794eSStanislav Mekhanoshin }
7527f37794eSStanislav Mekhanoshin 
7537f37794eSStanislav Mekhanoshin //  [native_]half_recip(c) ==> 1.0/c
fold_recip(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)7547f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
7557f37794eSStanislav Mekhanoshin                                 const FuncInfo &FInfo) {
7567f37794eSStanislav Mekhanoshin   Value *opr0 = CI->getArgOperand(0);
7577f37794eSStanislav Mekhanoshin   if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
7587f37794eSStanislav Mekhanoshin     // Just create a normal div. Later, InstCombine will be able
7597f37794eSStanislav Mekhanoshin     // to compute the divide into a constant (avoid check float infinity
7607f37794eSStanislav Mekhanoshin     // or subnormal at this point).
7617f37794eSStanislav Mekhanoshin     Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
7627f37794eSStanislav Mekhanoshin                                opr0,
7637f37794eSStanislav Mekhanoshin                                "recip2div");
764d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
7657f37794eSStanislav Mekhanoshin     replaceCall(nval);
7667f37794eSStanislav Mekhanoshin     return true;
7677f37794eSStanislav Mekhanoshin   }
7687f37794eSStanislav Mekhanoshin   return false;
7697f37794eSStanislav Mekhanoshin }
7707f37794eSStanislav Mekhanoshin 
7717f37794eSStanislav Mekhanoshin //  [native_]half_divide(x, c) ==> x/c
fold_divide(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)7727f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
7737f37794eSStanislav Mekhanoshin                                  const FuncInfo &FInfo) {
7747f37794eSStanislav Mekhanoshin   Value *opr0 = CI->getArgOperand(0);
7757f37794eSStanislav Mekhanoshin   Value *opr1 = CI->getArgOperand(1);
7767f37794eSStanislav Mekhanoshin   ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
7777f37794eSStanislav Mekhanoshin   ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
7787f37794eSStanislav Mekhanoshin 
7797f37794eSStanislav Mekhanoshin   if ((CF0 && CF1) ||  // both are constants
7807f37794eSStanislav Mekhanoshin       (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
7817f37794eSStanislav Mekhanoshin       // CF1 is constant && f32 divide
7827f37794eSStanislav Mekhanoshin   {
7837f37794eSStanislav Mekhanoshin     Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
7847f37794eSStanislav Mekhanoshin                                 opr1, "__div2recip");
7857f37794eSStanislav Mekhanoshin     Value *nval  = B.CreateFMul(opr0, nval1, "__div2mul");
7867f37794eSStanislav Mekhanoshin     replaceCall(nval);
7877f37794eSStanislav Mekhanoshin     return true;
7887f37794eSStanislav Mekhanoshin   }
7897f37794eSStanislav Mekhanoshin   return false;
7907f37794eSStanislav Mekhanoshin }
7917f37794eSStanislav Mekhanoshin 
7927f37794eSStanislav Mekhanoshin namespace llvm {
log2(double V)7937f37794eSStanislav Mekhanoshin static double log2(double V) {
794ae79a2c3SDavid Tenty #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
7957f37794eSStanislav Mekhanoshin   return ::log2(V);
7967f37794eSStanislav Mekhanoshin #else
797c57a9dc4SEvandro Menezes   return log(V) / numbers::ln2;
7987f37794eSStanislav Mekhanoshin #endif
7997f37794eSStanislav Mekhanoshin }
8007f37794eSStanislav Mekhanoshin }
8017f37794eSStanislav Mekhanoshin 
fold_pow(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)8027f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
8037f37794eSStanislav Mekhanoshin                               const FuncInfo &FInfo) {
8047f37794eSStanislav Mekhanoshin   assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
8057f37794eSStanislav Mekhanoshin           FInfo.getId() == AMDGPULibFunc::EI_POWR ||
8067f37794eSStanislav Mekhanoshin           FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
8077f37794eSStanislav Mekhanoshin          "fold_pow: encounter a wrong function call");
8087f37794eSStanislav Mekhanoshin 
8097f37794eSStanislav Mekhanoshin   Value *opr0, *opr1;
8107f37794eSStanislav Mekhanoshin   ConstantFP *CF;
8117f37794eSStanislav Mekhanoshin   ConstantInt *CINT;
8127f37794eSStanislav Mekhanoshin   ConstantAggregateZero *CZero;
8137f37794eSStanislav Mekhanoshin   Type *eltType;
8147f37794eSStanislav Mekhanoshin 
8157f37794eSStanislav Mekhanoshin   opr0 = CI->getArgOperand(0);
8167f37794eSStanislav Mekhanoshin   opr1 = CI->getArgOperand(1);
8177f37794eSStanislav Mekhanoshin   CZero = dyn_cast<ConstantAggregateZero>(opr1);
8187f37794eSStanislav Mekhanoshin   if (getVecSize(FInfo) == 1) {
8197f37794eSStanislav Mekhanoshin     eltType = opr0->getType();
8207f37794eSStanislav Mekhanoshin     CF = dyn_cast<ConstantFP>(opr1);
8217f37794eSStanislav Mekhanoshin     CINT = dyn_cast<ConstantInt>(opr1);
8227f37794eSStanislav Mekhanoshin   } else {
8237f37794eSStanislav Mekhanoshin     VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
8247f37794eSStanislav Mekhanoshin     assert(VTy && "Oprand of vector function should be of vectortype");
8257f37794eSStanislav Mekhanoshin     eltType = VTy->getElementType();
8267f37794eSStanislav Mekhanoshin     ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
8277f37794eSStanislav Mekhanoshin 
8287f37794eSStanislav Mekhanoshin     // Now, only Handle vector const whose elements have the same value.
8297f37794eSStanislav Mekhanoshin     CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
8307f37794eSStanislav Mekhanoshin     CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
8317f37794eSStanislav Mekhanoshin   }
8327f37794eSStanislav Mekhanoshin 
8337f37794eSStanislav Mekhanoshin   // No unsafe math , no constant argument, do nothing
8347f37794eSStanislav Mekhanoshin   if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
8357f37794eSStanislav Mekhanoshin     return false;
8367f37794eSStanislav Mekhanoshin 
8377f37794eSStanislav Mekhanoshin   // 0x1111111 means that we don't do anything for this call.
8387f37794eSStanislav Mekhanoshin   int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
8397f37794eSStanislav Mekhanoshin 
8407f37794eSStanislav Mekhanoshin   if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
8417f37794eSStanislav Mekhanoshin     //  pow/powr/pown(x, 0) == 1
842d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
8437f37794eSStanislav Mekhanoshin     Constant *cnval = ConstantFP::get(eltType, 1.0);
8447f37794eSStanislav Mekhanoshin     if (getVecSize(FInfo) > 1) {
8457f37794eSStanislav Mekhanoshin       cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
8467f37794eSStanislav Mekhanoshin     }
8477f37794eSStanislav Mekhanoshin     replaceCall(cnval);
8487f37794eSStanislav Mekhanoshin     return true;
8497f37794eSStanislav Mekhanoshin   }
8507f37794eSStanislav Mekhanoshin   if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
8517f37794eSStanislav Mekhanoshin     // pow/powr/pown(x, 1.0) = x
852d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
8537f37794eSStanislav Mekhanoshin     replaceCall(opr0);
8547f37794eSStanislav Mekhanoshin     return true;
8557f37794eSStanislav Mekhanoshin   }
8567f37794eSStanislav Mekhanoshin   if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
8577f37794eSStanislav Mekhanoshin     // pow/powr/pown(x, 2.0) = x*x
858d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
859d34e60caSNicola Zaghen                       << "\n");
8607f37794eSStanislav Mekhanoshin     Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
8617f37794eSStanislav Mekhanoshin     replaceCall(nval);
8627f37794eSStanislav Mekhanoshin     return true;
8637f37794eSStanislav Mekhanoshin   }
8647f37794eSStanislav Mekhanoshin   if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
8657f37794eSStanislav Mekhanoshin     // pow/powr/pown(x, -1.0) = 1.0/x
866d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
8677f37794eSStanislav Mekhanoshin     Constant *cnval = ConstantFP::get(eltType, 1.0);
8687f37794eSStanislav Mekhanoshin     if (getVecSize(FInfo) > 1) {
8697f37794eSStanislav Mekhanoshin       cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
8707f37794eSStanislav Mekhanoshin     }
8717f37794eSStanislav Mekhanoshin     Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
8727f37794eSStanislav Mekhanoshin     replaceCall(nval);
8737f37794eSStanislav Mekhanoshin     return true;
8747f37794eSStanislav Mekhanoshin   }
8757f37794eSStanislav Mekhanoshin 
8767f37794eSStanislav Mekhanoshin   Module *M = CI->getModule();
8777f37794eSStanislav Mekhanoshin   if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
8787f37794eSStanislav Mekhanoshin     // pow[r](x, [-]0.5) = sqrt(x)
8797f37794eSStanislav Mekhanoshin     bool issqrt = CF->isExactlyValue(0.5);
88013680223SJames Y Knight     if (FunctionCallee FPExpr =
88113680223SJames Y Knight             getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
88213680223SJames Y Knight                                                 : AMDGPULibFunc::EI_RSQRT,
88313680223SJames Y Knight                                          FInfo))) {
884d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
8857f37794eSStanislav Mekhanoshin                         << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
8867f37794eSStanislav Mekhanoshin       Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
8877f37794eSStanislav Mekhanoshin                                                         : "__pow2rsqrt");
8887f37794eSStanislav Mekhanoshin       replaceCall(nval);
8897f37794eSStanislav Mekhanoshin       return true;
8907f37794eSStanislav Mekhanoshin     }
8917f37794eSStanislav Mekhanoshin   }
8927f37794eSStanislav Mekhanoshin 
8937f37794eSStanislav Mekhanoshin   if (!isUnsafeMath(CI))
8947f37794eSStanislav Mekhanoshin     return false;
8957f37794eSStanislav Mekhanoshin 
8967f37794eSStanislav Mekhanoshin   // Unsafe Math optimization
8977f37794eSStanislav Mekhanoshin 
8987f37794eSStanislav Mekhanoshin   // Remember that ci_opr1 is set if opr1 is integral
8997f37794eSStanislav Mekhanoshin   if (CF) {
9007f37794eSStanislav Mekhanoshin     double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
9017f37794eSStanislav Mekhanoshin                     ? (double)CF->getValueAPF().convertToFloat()
9027f37794eSStanislav Mekhanoshin                     : CF->getValueAPF().convertToDouble();
9037f37794eSStanislav Mekhanoshin     int ival = (int)dval;
9047f37794eSStanislav Mekhanoshin     if ((double)ival == dval) {
9057f37794eSStanislav Mekhanoshin       ci_opr1 = ival;
9067f37794eSStanislav Mekhanoshin     } else
9077f37794eSStanislav Mekhanoshin       ci_opr1 = 0x11111111;
9087f37794eSStanislav Mekhanoshin   }
9097f37794eSStanislav Mekhanoshin 
9107f37794eSStanislav Mekhanoshin   // pow/powr/pown(x, c) = [1/](x*x*..x); where
9117f37794eSStanislav Mekhanoshin   //   trunc(c) == c && the number of x == c && |c| <= 12
9127f37794eSStanislav Mekhanoshin   unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
9137f37794eSStanislav Mekhanoshin   if (abs_opr1 <= 12) {
9147f37794eSStanislav Mekhanoshin     Constant *cnval;
9157f37794eSStanislav Mekhanoshin     Value *nval;
9167f37794eSStanislav Mekhanoshin     if (abs_opr1 == 0) {
9177f37794eSStanislav Mekhanoshin       cnval = ConstantFP::get(eltType, 1.0);
9187f37794eSStanislav Mekhanoshin       if (getVecSize(FInfo) > 1) {
9197f37794eSStanislav Mekhanoshin         cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9207f37794eSStanislav Mekhanoshin       }
9217f37794eSStanislav Mekhanoshin       nval = cnval;
9227f37794eSStanislav Mekhanoshin     } else {
9237f37794eSStanislav Mekhanoshin       Value *valx2 = nullptr;
9247f37794eSStanislav Mekhanoshin       nval = nullptr;
9257f37794eSStanislav Mekhanoshin       while (abs_opr1 > 0) {
9267f37794eSStanislav Mekhanoshin         valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
9277f37794eSStanislav Mekhanoshin         if (abs_opr1 & 1) {
9287f37794eSStanislav Mekhanoshin           nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
9297f37794eSStanislav Mekhanoshin         }
9307f37794eSStanislav Mekhanoshin         abs_opr1 >>= 1;
9317f37794eSStanislav Mekhanoshin       }
9327f37794eSStanislav Mekhanoshin     }
9337f37794eSStanislav Mekhanoshin 
9347f37794eSStanislav Mekhanoshin     if (ci_opr1 < 0) {
9357f37794eSStanislav Mekhanoshin       cnval = ConstantFP::get(eltType, 1.0);
9367f37794eSStanislav Mekhanoshin       if (getVecSize(FInfo) > 1) {
9377f37794eSStanislav Mekhanoshin         cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9387f37794eSStanislav Mekhanoshin       }
9397f37794eSStanislav Mekhanoshin       nval = B.CreateFDiv(cnval, nval, "__1powprod");
9407f37794eSStanislav Mekhanoshin     }
941d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
942d34e60caSNicola Zaghen                       << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
943d34e60caSNicola Zaghen                       << ")\n");
9447f37794eSStanislav Mekhanoshin     replaceCall(nval);
9457f37794eSStanislav Mekhanoshin     return true;
9467f37794eSStanislav Mekhanoshin   }
9477f37794eSStanislav Mekhanoshin 
9487f37794eSStanislav Mekhanoshin   // powr ---> exp2(y * log2(x))
9497f37794eSStanislav Mekhanoshin   // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
95013680223SJames Y Knight   FunctionCallee ExpExpr =
95113680223SJames Y Knight       getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
9527f37794eSStanislav Mekhanoshin   if (!ExpExpr)
9537f37794eSStanislav Mekhanoshin     return false;
9547f37794eSStanislav Mekhanoshin 
9557f37794eSStanislav Mekhanoshin   bool needlog = false;
9567f37794eSStanislav Mekhanoshin   bool needabs = false;
9577f37794eSStanislav Mekhanoshin   bool needcopysign = false;
9587f37794eSStanislav Mekhanoshin   Constant *cnval = nullptr;
9597f37794eSStanislav Mekhanoshin   if (getVecSize(FInfo) == 1) {
9607f37794eSStanislav Mekhanoshin     CF = dyn_cast<ConstantFP>(opr0);
9617f37794eSStanislav Mekhanoshin 
9627f37794eSStanislav Mekhanoshin     if (CF) {
9637f37794eSStanislav Mekhanoshin       double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
9647f37794eSStanislav Mekhanoshin                    ? (double)CF->getValueAPF().convertToFloat()
9657f37794eSStanislav Mekhanoshin                    : CF->getValueAPF().convertToDouble();
9667f37794eSStanislav Mekhanoshin 
9677f37794eSStanislav Mekhanoshin       V = log2(std::abs(V));
9687f37794eSStanislav Mekhanoshin       cnval = ConstantFP::get(eltType, V);
9697f37794eSStanislav Mekhanoshin       needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
9707f37794eSStanislav Mekhanoshin                      CF->isNegative();
9717f37794eSStanislav Mekhanoshin     } else {
9727f37794eSStanislav Mekhanoshin       needlog = true;
9737f37794eSStanislav Mekhanoshin       needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
9747f37794eSStanislav Mekhanoshin                                (!CF || CF->isNegative());
9757f37794eSStanislav Mekhanoshin     }
9767f37794eSStanislav Mekhanoshin   } else {
9777f37794eSStanislav Mekhanoshin     ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
9787f37794eSStanislav Mekhanoshin 
9797f37794eSStanislav Mekhanoshin     if (!CDV) {
9807f37794eSStanislav Mekhanoshin       needlog = true;
9817f37794eSStanislav Mekhanoshin       needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
9827f37794eSStanislav Mekhanoshin     } else {
9837f37794eSStanislav Mekhanoshin       assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
9847f37794eSStanislav Mekhanoshin               "Wrong vector size detected");
9857f37794eSStanislav Mekhanoshin 
9867f37794eSStanislav Mekhanoshin       SmallVector<double, 0> DVal;
9877f37794eSStanislav Mekhanoshin       for (int i=0; i < getVecSize(FInfo); ++i) {
9887f37794eSStanislav Mekhanoshin         double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
9897f37794eSStanislav Mekhanoshin                      ? (double)CDV->getElementAsFloat(i)
9907f37794eSStanislav Mekhanoshin                      : CDV->getElementAsDouble(i);
9917f37794eSStanislav Mekhanoshin         if (V < 0.0) needcopysign = true;
9927f37794eSStanislav Mekhanoshin         V = log2(std::abs(V));
9937f37794eSStanislav Mekhanoshin         DVal.push_back(V);
9947f37794eSStanislav Mekhanoshin       }
9957f37794eSStanislav Mekhanoshin       if (getArgType(FInfo) == AMDGPULibFunc::F32) {
9967f37794eSStanislav Mekhanoshin         SmallVector<float, 0> FVal;
9977f37794eSStanislav Mekhanoshin         for (unsigned i=0; i < DVal.size(); ++i) {
9987f37794eSStanislav Mekhanoshin           FVal.push_back((float)DVal[i]);
9997f37794eSStanislav Mekhanoshin         }
10007f37794eSStanislav Mekhanoshin         ArrayRef<float> tmp(FVal);
10017f37794eSStanislav Mekhanoshin         cnval = ConstantDataVector::get(M->getContext(), tmp);
10027f37794eSStanislav Mekhanoshin       } else {
10037f37794eSStanislav Mekhanoshin         ArrayRef<double> tmp(DVal);
10047f37794eSStanislav Mekhanoshin         cnval = ConstantDataVector::get(M->getContext(), tmp);
10057f37794eSStanislav Mekhanoshin       }
10067f37794eSStanislav Mekhanoshin     }
10077f37794eSStanislav Mekhanoshin   }
10087f37794eSStanislav Mekhanoshin 
10097f37794eSStanislav Mekhanoshin   if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
10107f37794eSStanislav Mekhanoshin     // We cannot handle corner cases for a general pow() function, give up
10117f37794eSStanislav Mekhanoshin     // unless y is a constant integral value. Then proceed as if it were pown.
10127f37794eSStanislav Mekhanoshin     if (getVecSize(FInfo) == 1) {
10137f37794eSStanislav Mekhanoshin       if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
10147f37794eSStanislav Mekhanoshin         double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
10157f37794eSStanislav Mekhanoshin                    ? (double)CF->getValueAPF().convertToFloat()
10167f37794eSStanislav Mekhanoshin                    : CF->getValueAPF().convertToDouble();
10177f37794eSStanislav Mekhanoshin         if (y != (double)(int64_t)y)
10187f37794eSStanislav Mekhanoshin           return false;
10197f37794eSStanislav Mekhanoshin       } else
10207f37794eSStanislav Mekhanoshin         return false;
10217f37794eSStanislav Mekhanoshin     } else {
10227f37794eSStanislav Mekhanoshin       if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
10237f37794eSStanislav Mekhanoshin         for (int i=0; i < getVecSize(FInfo); ++i) {
10247f37794eSStanislav Mekhanoshin           double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
10257f37794eSStanislav Mekhanoshin                      ? (double)CDV->getElementAsFloat(i)
10267f37794eSStanislav Mekhanoshin                      : CDV->getElementAsDouble(i);
10277f37794eSStanislav Mekhanoshin           if (y != (double)(int64_t)y)
10287f37794eSStanislav Mekhanoshin             return false;
10297f37794eSStanislav Mekhanoshin         }
10307f37794eSStanislav Mekhanoshin       } else
10317f37794eSStanislav Mekhanoshin         return false;
10327f37794eSStanislav Mekhanoshin     }
10337f37794eSStanislav Mekhanoshin   }
10347f37794eSStanislav Mekhanoshin 
10357f37794eSStanislav Mekhanoshin   Value *nval;
10367f37794eSStanislav Mekhanoshin   if (needabs) {
103713680223SJames Y Knight     FunctionCallee AbsExpr =
103813680223SJames Y Knight         getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
10397f37794eSStanislav Mekhanoshin     if (!AbsExpr)
10407f37794eSStanislav Mekhanoshin       return false;
10417f37794eSStanislav Mekhanoshin     nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
10427f37794eSStanislav Mekhanoshin   } else {
10437f37794eSStanislav Mekhanoshin     nval = cnval ? cnval : opr0;
10447f37794eSStanislav Mekhanoshin   }
10457f37794eSStanislav Mekhanoshin   if (needlog) {
104613680223SJames Y Knight     FunctionCallee LogExpr =
104713680223SJames Y Knight         getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
10487f37794eSStanislav Mekhanoshin     if (!LogExpr)
10497f37794eSStanislav Mekhanoshin       return false;
10507f37794eSStanislav Mekhanoshin     nval = CreateCallEx(B,LogExpr, nval, "__log2");
10517f37794eSStanislav Mekhanoshin   }
10527f37794eSStanislav Mekhanoshin 
10537f37794eSStanislav Mekhanoshin   if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
10547f37794eSStanislav Mekhanoshin     // convert int(32) to fp(f32 or f64)
10557f37794eSStanislav Mekhanoshin     opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
10567f37794eSStanislav Mekhanoshin   }
10577f37794eSStanislav Mekhanoshin   nval = B.CreateFMul(opr1, nval, "__ylogx");
10587f37794eSStanislav Mekhanoshin   nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
10597f37794eSStanislav Mekhanoshin 
10607f37794eSStanislav Mekhanoshin   if (needcopysign) {
10617f37794eSStanislav Mekhanoshin     Value *opr_n;
10627f37794eSStanislav Mekhanoshin     Type* rTy = opr0->getType();
10637f37794eSStanislav Mekhanoshin     Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
10647f37794eSStanislav Mekhanoshin     Type *nTy = nTyS;
10653254a001SChristopher Tetreault     if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
10663254a001SChristopher Tetreault       nTy = FixedVectorType::get(nTyS, vTy);
10677f37794eSStanislav Mekhanoshin     unsigned size = nTy->getScalarSizeInBits();
10687f37794eSStanislav Mekhanoshin     opr_n = CI->getArgOperand(1);
10697f37794eSStanislav Mekhanoshin     if (opr_n->getType()->isIntegerTy())
10707f37794eSStanislav Mekhanoshin       opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
10717f37794eSStanislav Mekhanoshin     else
10727f37794eSStanislav Mekhanoshin       opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
10737f37794eSStanislav Mekhanoshin 
10747f37794eSStanislav Mekhanoshin     Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
10757f37794eSStanislav Mekhanoshin     sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
10767f37794eSStanislav Mekhanoshin     nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
10777f37794eSStanislav Mekhanoshin     nval = B.CreateBitCast(nval, opr0->getType());
10787f37794eSStanislav Mekhanoshin   }
10797f37794eSStanislav Mekhanoshin 
1080d34e60caSNicola Zaghen   LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
10817f37794eSStanislav Mekhanoshin                     << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
10827f37794eSStanislav Mekhanoshin   replaceCall(nval);
10837f37794eSStanislav Mekhanoshin 
10847f37794eSStanislav Mekhanoshin   return true;
10857f37794eSStanislav Mekhanoshin }
10867f37794eSStanislav Mekhanoshin 
fold_rootn(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)10877f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
10887f37794eSStanislav Mekhanoshin                                 const FuncInfo &FInfo) {
10897f37794eSStanislav Mekhanoshin   Value *opr0 = CI->getArgOperand(0);
10907f37794eSStanislav Mekhanoshin   Value *opr1 = CI->getArgOperand(1);
10917f37794eSStanislav Mekhanoshin 
10927f37794eSStanislav Mekhanoshin   ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
10937f37794eSStanislav Mekhanoshin   if (!CINT) {
10947f37794eSStanislav Mekhanoshin     return false;
10957f37794eSStanislav Mekhanoshin   }
10967f37794eSStanislav Mekhanoshin   int ci_opr1 = (int)CINT->getSExtValue();
10977f37794eSStanislav Mekhanoshin   if (ci_opr1 == 1) {  // rootn(x, 1) = x
1098d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
10997f37794eSStanislav Mekhanoshin     replaceCall(opr0);
11007f37794eSStanislav Mekhanoshin     return true;
11017f37794eSStanislav Mekhanoshin   }
11027f37794eSStanislav Mekhanoshin   if (ci_opr1 == 2) {  // rootn(x, 2) = sqrt(x)
11037f37794eSStanislav Mekhanoshin     Module *M = CI->getModule();
110413680223SJames Y Knight     if (FunctionCallee FPExpr =
110513680223SJames Y Knight             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1106d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
11077f37794eSStanislav Mekhanoshin       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
11087f37794eSStanislav Mekhanoshin       replaceCall(nval);
11097f37794eSStanislav Mekhanoshin       return true;
11107f37794eSStanislav Mekhanoshin     }
11117f37794eSStanislav Mekhanoshin   } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
11127f37794eSStanislav Mekhanoshin     Module *M = CI->getModule();
111313680223SJames Y Knight     if (FunctionCallee FPExpr =
111413680223SJames Y Knight             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1115d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
11167f37794eSStanislav Mekhanoshin       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
11177f37794eSStanislav Mekhanoshin       replaceCall(nval);
11187f37794eSStanislav Mekhanoshin       return true;
11197f37794eSStanislav Mekhanoshin     }
11207f37794eSStanislav Mekhanoshin   } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1121d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
11227f37794eSStanislav Mekhanoshin     Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
11237f37794eSStanislav Mekhanoshin                                opr0,
11247f37794eSStanislav Mekhanoshin                                "__rootn2div");
11257f37794eSStanislav Mekhanoshin     replaceCall(nval);
11267f37794eSStanislav Mekhanoshin     return true;
11277f37794eSStanislav Mekhanoshin   } else if (ci_opr1 == -2) {  // rootn(x, -2) = rsqrt(x)
11287f37794eSStanislav Mekhanoshin     Module *M = CI->getModule();
112913680223SJames Y Knight     if (FunctionCallee FPExpr =
113013680223SJames Y Knight             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
1131d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
1132d34e60caSNicola Zaghen                         << ")\n");
11337f37794eSStanislav Mekhanoshin       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
11347f37794eSStanislav Mekhanoshin       replaceCall(nval);
11357f37794eSStanislav Mekhanoshin       return true;
11367f37794eSStanislav Mekhanoshin     }
11377f37794eSStanislav Mekhanoshin   }
11387f37794eSStanislav Mekhanoshin   return false;
11397f37794eSStanislav Mekhanoshin }
11407f37794eSStanislav Mekhanoshin 
fold_fma_mad(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)11417f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
11427f37794eSStanislav Mekhanoshin                                   const FuncInfo &FInfo) {
11437f37794eSStanislav Mekhanoshin   Value *opr0 = CI->getArgOperand(0);
11447f37794eSStanislav Mekhanoshin   Value *opr1 = CI->getArgOperand(1);
11457f37794eSStanislav Mekhanoshin   Value *opr2 = CI->getArgOperand(2);
11467f37794eSStanislav Mekhanoshin 
11477f37794eSStanislav Mekhanoshin   ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
11487f37794eSStanislav Mekhanoshin   ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
11497f37794eSStanislav Mekhanoshin   if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
11507f37794eSStanislav Mekhanoshin     // fma/mad(a, b, c) = c if a=0 || b=0
1151d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
11527f37794eSStanislav Mekhanoshin     replaceCall(opr2);
11537f37794eSStanislav Mekhanoshin     return true;
11547f37794eSStanislav Mekhanoshin   }
11557f37794eSStanislav Mekhanoshin   if (CF0 && CF0->isExactlyValue(1.0f)) {
11567f37794eSStanislav Mekhanoshin     // fma/mad(a, b, c) = b+c if a=1
1157d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
1158d34e60caSNicola Zaghen                       << "\n");
11597f37794eSStanislav Mekhanoshin     Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
11607f37794eSStanislav Mekhanoshin     replaceCall(nval);
11617f37794eSStanislav Mekhanoshin     return true;
11627f37794eSStanislav Mekhanoshin   }
11637f37794eSStanislav Mekhanoshin   if (CF1 && CF1->isExactlyValue(1.0f)) {
11647f37794eSStanislav Mekhanoshin     // fma/mad(a, b, c) = a+c if b=1
1165d34e60caSNicola Zaghen     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
1166d34e60caSNicola Zaghen                       << "\n");
11677f37794eSStanislav Mekhanoshin     Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
11687f37794eSStanislav Mekhanoshin     replaceCall(nval);
11697f37794eSStanislav Mekhanoshin     return true;
11707f37794eSStanislav Mekhanoshin   }
11717f37794eSStanislav Mekhanoshin   if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
11727f37794eSStanislav Mekhanoshin     if (CF->isZero()) {
11737f37794eSStanislav Mekhanoshin       // fma/mad(a, b, c) = a*b if c=0
1174d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
1175d34e60caSNicola Zaghen                         << *opr1 << "\n");
11767f37794eSStanislav Mekhanoshin       Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
11777f37794eSStanislav Mekhanoshin       replaceCall(nval);
11787f37794eSStanislav Mekhanoshin       return true;
11797f37794eSStanislav Mekhanoshin     }
11807f37794eSStanislav Mekhanoshin   }
11817f37794eSStanislav Mekhanoshin 
11827f37794eSStanislav Mekhanoshin   return false;
11837f37794eSStanislav Mekhanoshin }
11847f37794eSStanislav Mekhanoshin 
1185dc6e8dfdSJacob Lambert // Get a scalar native builtin single argument FP function
getNativeFunction(Module * M,const FuncInfo & FInfo)118613680223SJames Y Knight FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
118713680223SJames Y Knight                                                  const FuncInfo &FInfo) {
1188312c557bSStanislav Mekhanoshin   if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1189312c557bSStanislav Mekhanoshin     return nullptr;
11907f37794eSStanislav Mekhanoshin   FuncInfo nf = FInfo;
11917f37794eSStanislav Mekhanoshin   nf.setPrefix(AMDGPULibFunc::NATIVE);
11927f37794eSStanislav Mekhanoshin   return getFunction(M, nf);
11937f37794eSStanislav Mekhanoshin }
11947f37794eSStanislav Mekhanoshin 
11957f37794eSStanislav Mekhanoshin // fold sqrt -> native_sqrt (x)
fold_sqrt(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)11967f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
11977f37794eSStanislav Mekhanoshin                                const FuncInfo &FInfo) {
1198312c557bSStanislav Mekhanoshin   if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
11997f37794eSStanislav Mekhanoshin       (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
120013680223SJames Y Knight     if (FunctionCallee FPExpr = getNativeFunction(
12017f37794eSStanislav Mekhanoshin             CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
12027f37794eSStanislav Mekhanoshin       Value *opr0 = CI->getArgOperand(0);
1203d34e60caSNicola Zaghen       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
12047f37794eSStanislav Mekhanoshin                         << "sqrt(" << *opr0 << ")\n");
12057f37794eSStanislav Mekhanoshin       Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
12067f37794eSStanislav Mekhanoshin       replaceCall(nval);
12077f37794eSStanislav Mekhanoshin       return true;
12087f37794eSStanislav Mekhanoshin     }
12097f37794eSStanislav Mekhanoshin   }
12107f37794eSStanislav Mekhanoshin   return false;
12117f37794eSStanislav Mekhanoshin }
12127f37794eSStanislav Mekhanoshin 
12137f37794eSStanislav Mekhanoshin // fold sin, cos -> sincos.
fold_sincos(CallInst * CI,IRBuilder<> & B,AliasAnalysis * AA)12147f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
12157f37794eSStanislav Mekhanoshin                                  AliasAnalysis *AA) {
12167f37794eSStanislav Mekhanoshin   AMDGPULibFunc fInfo;
12177f37794eSStanislav Mekhanoshin   if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
12187f37794eSStanislav Mekhanoshin     return false;
12197f37794eSStanislav Mekhanoshin 
12207f37794eSStanislav Mekhanoshin   assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
12217f37794eSStanislav Mekhanoshin          fInfo.getId() == AMDGPULibFunc::EI_COS);
12227f37794eSStanislav Mekhanoshin   bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
12237f37794eSStanislav Mekhanoshin 
12247f37794eSStanislav Mekhanoshin   Value *CArgVal = CI->getArgOperand(0);
12257f37794eSStanislav Mekhanoshin   BasicBlock * const CBB = CI->getParent();
12267f37794eSStanislav Mekhanoshin 
12277f37794eSStanislav Mekhanoshin   int const MaxScan = 30;
1228c9821cecSStanislav Mekhanoshin   bool Changed = false;
12297f37794eSStanislav Mekhanoshin 
12307f37794eSStanislav Mekhanoshin   { // fold in load value.
12317f37794eSStanislav Mekhanoshin     LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
12327f37794eSStanislav Mekhanoshin     if (LI && LI->getParent() == CBB) {
12337f37794eSStanislav Mekhanoshin       BasicBlock::iterator BBI = LI->getIterator();
12347f37794eSStanislav Mekhanoshin       Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
12357f37794eSStanislav Mekhanoshin       if (AvailableVal) {
1236c9821cecSStanislav Mekhanoshin         Changed = true;
12377f37794eSStanislav Mekhanoshin         CArgVal->replaceAllUsesWith(AvailableVal);
12387f37794eSStanislav Mekhanoshin         if (CArgVal->getNumUses() == 0)
12397f37794eSStanislav Mekhanoshin           LI->eraseFromParent();
12407f37794eSStanislav Mekhanoshin         CArgVal = CI->getArgOperand(0);
12417f37794eSStanislav Mekhanoshin       }
12427f37794eSStanislav Mekhanoshin     }
12437f37794eSStanislav Mekhanoshin   }
12447f37794eSStanislav Mekhanoshin 
12457f37794eSStanislav Mekhanoshin   Module *M = CI->getModule();
12467f37794eSStanislav Mekhanoshin   fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
12477f37794eSStanislav Mekhanoshin   std::string const PairName = fInfo.mangle();
12487f37794eSStanislav Mekhanoshin 
12497f37794eSStanislav Mekhanoshin   CallInst *UI = nullptr;
12507f37794eSStanislav Mekhanoshin   for (User* U : CArgVal->users()) {
12517f37794eSStanislav Mekhanoshin     CallInst *XI = dyn_cast_or_null<CallInst>(U);
12527f37794eSStanislav Mekhanoshin     if (!XI || XI == CI || XI->getParent() != CBB)
12537f37794eSStanislav Mekhanoshin       continue;
12547f37794eSStanislav Mekhanoshin 
12557f37794eSStanislav Mekhanoshin     Function *UCallee = XI->getCalledFunction();
12567f37794eSStanislav Mekhanoshin     if (!UCallee || !UCallee->getName().equals(PairName))
12577f37794eSStanislav Mekhanoshin       continue;
12587f37794eSStanislav Mekhanoshin 
12597f37794eSStanislav Mekhanoshin     BasicBlock::iterator BBI = CI->getIterator();
12607f37794eSStanislav Mekhanoshin     if (BBI == CI->getParent()->begin())
12617f37794eSStanislav Mekhanoshin       break;
12627f37794eSStanislav Mekhanoshin     --BBI;
12637f37794eSStanislav Mekhanoshin     for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
12647f37794eSStanislav Mekhanoshin       if (cast<Instruction>(BBI) == XI) {
12657f37794eSStanislav Mekhanoshin         UI = XI;
12667f37794eSStanislav Mekhanoshin         break;
12677f37794eSStanislav Mekhanoshin       }
12687f37794eSStanislav Mekhanoshin     }
12697f37794eSStanislav Mekhanoshin     if (UI) break;
12707f37794eSStanislav Mekhanoshin   }
12717f37794eSStanislav Mekhanoshin 
1272c9821cecSStanislav Mekhanoshin   if (!UI)
1273c9821cecSStanislav Mekhanoshin     return Changed;
12747f37794eSStanislav Mekhanoshin 
12757f37794eSStanislav Mekhanoshin   // Merge the sin and cos.
12767f37794eSStanislav Mekhanoshin 
12777f37794eSStanislav Mekhanoshin   // for OpenCL 2.0 we have only generic implementation of sincos
12787f37794eSStanislav Mekhanoshin   // function.
12797f37794eSStanislav Mekhanoshin   AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
12800da6350dSMatt Arsenault   nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
128113680223SJames Y Knight   FunctionCallee Fsincos = getFunction(M, nf);
1282c9821cecSStanislav Mekhanoshin   if (!Fsincos)
1283c9821cecSStanislav Mekhanoshin     return Changed;
12847f37794eSStanislav Mekhanoshin 
12857f37794eSStanislav Mekhanoshin   BasicBlock::iterator ItOld = B.GetInsertPoint();
12867f37794eSStanislav Mekhanoshin   AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
12877f37794eSStanislav Mekhanoshin   B.SetInsertPoint(UI);
12887f37794eSStanislav Mekhanoshin 
12897f37794eSStanislav Mekhanoshin   Value *P = Alloc;
129013680223SJames Y Knight   Type *PTy = Fsincos.getFunctionType()->getParamType(1);
12917f37794eSStanislav Mekhanoshin   // The allocaInst allocates the memory in private address space. This need
12927f37794eSStanislav Mekhanoshin   // to be bitcasted to point to the address space of cos pointer type.
12937f37794eSStanislav Mekhanoshin   // In OpenCL 2.0 this is generic, while in 1.2 that is private.
12940da6350dSMatt Arsenault   if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
12957f37794eSStanislav Mekhanoshin     P = B.CreateAddrSpaceCast(Alloc, PTy);
12967f37794eSStanislav Mekhanoshin   CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
12977f37794eSStanislav Mekhanoshin 
1298d34e60caSNicola Zaghen   LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
1299d34e60caSNicola Zaghen                     << *Call << "\n");
13007f37794eSStanislav Mekhanoshin 
13017f37794eSStanislav Mekhanoshin   if (!isSin) { // CI->cos, UI->sin
13027f37794eSStanislav Mekhanoshin     B.SetInsertPoint(&*ItOld);
13037f37794eSStanislav Mekhanoshin     UI->replaceAllUsesWith(&*Call);
130414359ef1SJames Y Knight     Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
13057f37794eSStanislav Mekhanoshin     CI->replaceAllUsesWith(Reload);
13067f37794eSStanislav Mekhanoshin     UI->eraseFromParent();
13077f37794eSStanislav Mekhanoshin     CI->eraseFromParent();
13087f37794eSStanislav Mekhanoshin   } else { // CI->sin, UI->cos
130914359ef1SJames Y Knight     Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
13107f37794eSStanislav Mekhanoshin     UI->replaceAllUsesWith(Reload);
13117f37794eSStanislav Mekhanoshin     CI->replaceAllUsesWith(Call);
13127f37794eSStanislav Mekhanoshin     UI->eraseFromParent();
13137f37794eSStanislav Mekhanoshin     CI->eraseFromParent();
13147f37794eSStanislav Mekhanoshin   }
13157f37794eSStanislav Mekhanoshin   return true;
13167f37794eSStanislav Mekhanoshin }
13177f37794eSStanislav Mekhanoshin 
fold_wavefrontsize(CallInst * CI,IRBuilder<> & B)1318a9191c84SStanislav Mekhanoshin bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
1319a9191c84SStanislav Mekhanoshin   if (!TM)
1320a9191c84SStanislav Mekhanoshin     return false;
1321a9191c84SStanislav Mekhanoshin 
1322a9191c84SStanislav Mekhanoshin   StringRef CPU = TM->getTargetCPU();
1323a9191c84SStanislav Mekhanoshin   StringRef Features = TM->getTargetFeatureString();
132442f74e82SMartin Storsjö   if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
13256fe949c4SKazu Hirata       (Features.empty() || !Features.contains_insensitive("wavefrontsize")))
1326a9191c84SStanislav Mekhanoshin     return false;
1327a9191c84SStanislav Mekhanoshin 
1328a9191c84SStanislav Mekhanoshin   Function *F = CI->getParent()->getParent();
1329a9191c84SStanislav Mekhanoshin   const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
1330a9191c84SStanislav Mekhanoshin   unsigned N = ST.getWavefrontSize();
1331a9191c84SStanislav Mekhanoshin 
1332a9191c84SStanislav Mekhanoshin   LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
1333a9191c84SStanislav Mekhanoshin                << N << "\n");
1334a9191c84SStanislav Mekhanoshin 
1335a9191c84SStanislav Mekhanoshin   CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
1336a9191c84SStanislav Mekhanoshin   CI->eraseFromParent();
1337a9191c84SStanislav Mekhanoshin   return true;
1338a9191c84SStanislav Mekhanoshin }
1339a9191c84SStanislav Mekhanoshin 
13407f37794eSStanislav Mekhanoshin // Get insertion point at entry.
getEntryIns(CallInst * UI)13417f37794eSStanislav Mekhanoshin BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
13427f37794eSStanislav Mekhanoshin   Function * Func = UI->getParent()->getParent();
13437f37794eSStanislav Mekhanoshin   BasicBlock * BB = &Func->getEntryBlock();
13447f37794eSStanislav Mekhanoshin   assert(BB && "Entry block not found!");
13457f37794eSStanislav Mekhanoshin   BasicBlock::iterator ItNew = BB->begin();
13467f37794eSStanislav Mekhanoshin   return ItNew;
13477f37794eSStanislav Mekhanoshin }
13487f37794eSStanislav Mekhanoshin 
13497f37794eSStanislav Mekhanoshin // Insert a AllocsInst at the beginning of function entry block.
insertAlloca(CallInst * UI,IRBuilder<> & B,const char * prefix)13507f37794eSStanislav Mekhanoshin AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
13517f37794eSStanislav Mekhanoshin                                          const char *prefix) {
13527f37794eSStanislav Mekhanoshin   BasicBlock::iterator ItNew = getEntryIns(UI);
13537f37794eSStanislav Mekhanoshin   Function *UCallee = UI->getCalledFunction();
13547f37794eSStanislav Mekhanoshin   Type *RetType = UCallee->getReturnType();
13557f37794eSStanislav Mekhanoshin   B.SetInsertPoint(&*ItNew);
13565a667c0eSKazu Hirata   AllocaInst *Alloc =
13575a667c0eSKazu Hirata       B.CreateAlloca(RetType, nullptr, std::string(prefix) + UI->getName());
13584f04db4bSEli Friedman   Alloc->setAlignment(
13594f04db4bSEli Friedman       Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
13607f37794eSStanislav Mekhanoshin   return Alloc;
13617f37794eSStanislav Mekhanoshin }
13627f37794eSStanislav Mekhanoshin 
evaluateScalarMathFunc(const FuncInfo & FInfo,double & Res0,double & Res1,Constant * copr0,Constant * copr1,Constant * copr2)1363a750332dSSimon Pilgrim bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
13647f37794eSStanislav Mekhanoshin                                             double& Res0, double& Res1,
13657f37794eSStanislav Mekhanoshin                                             Constant *copr0, Constant *copr1,
13667f37794eSStanislav Mekhanoshin                                             Constant *copr2) {
13677f37794eSStanislav Mekhanoshin   // By default, opr0/opr1/opr3 holds values of float/double type.
13687f37794eSStanislav Mekhanoshin   // If they are not float/double, each function has to its
13697f37794eSStanislav Mekhanoshin   // operand separately.
13707f37794eSStanislav Mekhanoshin   double opr0=0.0, opr1=0.0, opr2=0.0;
13717f37794eSStanislav Mekhanoshin   ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
13727f37794eSStanislav Mekhanoshin   ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
13737f37794eSStanislav Mekhanoshin   ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
13747f37794eSStanislav Mekhanoshin   if (fpopr0) {
13757f37794eSStanislav Mekhanoshin     opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
13767f37794eSStanislav Mekhanoshin              ? fpopr0->getValueAPF().convertToDouble()
13777f37794eSStanislav Mekhanoshin              : (double)fpopr0->getValueAPF().convertToFloat();
13787f37794eSStanislav Mekhanoshin   }
13797f37794eSStanislav Mekhanoshin 
13807f37794eSStanislav Mekhanoshin   if (fpopr1) {
13817f37794eSStanislav Mekhanoshin     opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
13827f37794eSStanislav Mekhanoshin              ? fpopr1->getValueAPF().convertToDouble()
13837f37794eSStanislav Mekhanoshin              : (double)fpopr1->getValueAPF().convertToFloat();
13847f37794eSStanislav Mekhanoshin   }
13857f37794eSStanislav Mekhanoshin 
13867f37794eSStanislav Mekhanoshin   if (fpopr2) {
13877f37794eSStanislav Mekhanoshin     opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
13887f37794eSStanislav Mekhanoshin              ? fpopr2->getValueAPF().convertToDouble()
13897f37794eSStanislav Mekhanoshin              : (double)fpopr2->getValueAPF().convertToFloat();
13907f37794eSStanislav Mekhanoshin   }
13917f37794eSStanislav Mekhanoshin 
13927f37794eSStanislav Mekhanoshin   switch (FInfo.getId()) {
13937f37794eSStanislav Mekhanoshin   default : return false;
13947f37794eSStanislav Mekhanoshin 
13957f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ACOS:
13967f37794eSStanislav Mekhanoshin     Res0 = acos(opr0);
13977f37794eSStanislav Mekhanoshin     return true;
13987f37794eSStanislav Mekhanoshin 
13997f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ACOSH:
14007f37794eSStanislav Mekhanoshin     // acosh(x) == log(x + sqrt(x*x - 1))
14017f37794eSStanislav Mekhanoshin     Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
14027f37794eSStanislav Mekhanoshin     return true;
14037f37794eSStanislav Mekhanoshin 
14047f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ACOSPI:
14057f37794eSStanislav Mekhanoshin     Res0 = acos(opr0) / MATH_PI;
14067f37794eSStanislav Mekhanoshin     return true;
14077f37794eSStanislav Mekhanoshin 
14087f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ASIN:
14097f37794eSStanislav Mekhanoshin     Res0 = asin(opr0);
14107f37794eSStanislav Mekhanoshin     return true;
14117f37794eSStanislav Mekhanoshin 
14127f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ASINH:
14137f37794eSStanislav Mekhanoshin     // asinh(x) == log(x + sqrt(x*x + 1))
14147f37794eSStanislav Mekhanoshin     Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
14157f37794eSStanislav Mekhanoshin     return true;
14167f37794eSStanislav Mekhanoshin 
14177f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ASINPI:
14187f37794eSStanislav Mekhanoshin     Res0 = asin(opr0) / MATH_PI;
14197f37794eSStanislav Mekhanoshin     return true;
14207f37794eSStanislav Mekhanoshin 
14217f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ATAN:
14227f37794eSStanislav Mekhanoshin     Res0 = atan(opr0);
14237f37794eSStanislav Mekhanoshin     return true;
14247f37794eSStanislav Mekhanoshin 
14257f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ATANH:
14267f37794eSStanislav Mekhanoshin     // atanh(x) == (log(x+1) - log(x-1))/2;
14277f37794eSStanislav Mekhanoshin     Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
14287f37794eSStanislav Mekhanoshin     return true;
14297f37794eSStanislav Mekhanoshin 
14307f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ATANPI:
14317f37794eSStanislav Mekhanoshin     Res0 = atan(opr0) / MATH_PI;
14327f37794eSStanislav Mekhanoshin     return true;
14337f37794eSStanislav Mekhanoshin 
14347f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_CBRT:
14357f37794eSStanislav Mekhanoshin     Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
14367f37794eSStanislav Mekhanoshin     return true;
14377f37794eSStanislav Mekhanoshin 
14387f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COS:
14397f37794eSStanislav Mekhanoshin     Res0 = cos(opr0);
14407f37794eSStanislav Mekhanoshin     return true;
14417f37794eSStanislav Mekhanoshin 
14427f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COSH:
14437f37794eSStanislav Mekhanoshin     Res0 = cosh(opr0);
14447f37794eSStanislav Mekhanoshin     return true;
14457f37794eSStanislav Mekhanoshin 
14467f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_COSPI:
14477f37794eSStanislav Mekhanoshin     Res0 = cos(MATH_PI * opr0);
14487f37794eSStanislav Mekhanoshin     return true;
14497f37794eSStanislav Mekhanoshin 
14507f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP:
14517f37794eSStanislav Mekhanoshin     Res0 = exp(opr0);
14527f37794eSStanislav Mekhanoshin     return true;
14537f37794eSStanislav Mekhanoshin 
14547f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP2:
14557f37794eSStanislav Mekhanoshin     Res0 = pow(2.0, opr0);
14567f37794eSStanislav Mekhanoshin     return true;
14577f37794eSStanislav Mekhanoshin 
14587f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXP10:
14597f37794eSStanislav Mekhanoshin     Res0 = pow(10.0, opr0);
14607f37794eSStanislav Mekhanoshin     return true;
14617f37794eSStanislav Mekhanoshin 
14627f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_EXPM1:
14637f37794eSStanislav Mekhanoshin     Res0 = exp(opr0) - 1.0;
14647f37794eSStanislav Mekhanoshin     return true;
14657f37794eSStanislav Mekhanoshin 
14667f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG:
14677f37794eSStanislav Mekhanoshin     Res0 = log(opr0);
14687f37794eSStanislav Mekhanoshin     return true;
14697f37794eSStanislav Mekhanoshin 
14707f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG2:
14717f37794eSStanislav Mekhanoshin     Res0 = log(opr0) / log(2.0);
14727f37794eSStanislav Mekhanoshin     return true;
14737f37794eSStanislav Mekhanoshin 
14747f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_LOG10:
14757f37794eSStanislav Mekhanoshin     Res0 = log(opr0) / log(10.0);
14767f37794eSStanislav Mekhanoshin     return true;
14777f37794eSStanislav Mekhanoshin 
14787f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_RSQRT:
14797f37794eSStanislav Mekhanoshin     Res0 = 1.0 / sqrt(opr0);
14807f37794eSStanislav Mekhanoshin     return true;
14817f37794eSStanislav Mekhanoshin 
14827f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SIN:
14837f37794eSStanislav Mekhanoshin     Res0 = sin(opr0);
14847f37794eSStanislav Mekhanoshin     return true;
14857f37794eSStanislav Mekhanoshin 
14867f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SINH:
14877f37794eSStanislav Mekhanoshin     Res0 = sinh(opr0);
14887f37794eSStanislav Mekhanoshin     return true;
14897f37794eSStanislav Mekhanoshin 
14907f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SINPI:
14917f37794eSStanislav Mekhanoshin     Res0 = sin(MATH_PI * opr0);
14927f37794eSStanislav Mekhanoshin     return true;
14937f37794eSStanislav Mekhanoshin 
14947f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SQRT:
14957f37794eSStanislav Mekhanoshin     Res0 = sqrt(opr0);
14967f37794eSStanislav Mekhanoshin     return true;
14977f37794eSStanislav Mekhanoshin 
14987f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TAN:
14997f37794eSStanislav Mekhanoshin     Res0 = tan(opr0);
15007f37794eSStanislav Mekhanoshin     return true;
15017f37794eSStanislav Mekhanoshin 
15027f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TANH:
15037f37794eSStanislav Mekhanoshin     Res0 = tanh(opr0);
15047f37794eSStanislav Mekhanoshin     return true;
15057f37794eSStanislav Mekhanoshin 
15067f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_TANPI:
15077f37794eSStanislav Mekhanoshin     Res0 = tan(MATH_PI * opr0);
15087f37794eSStanislav Mekhanoshin     return true;
15097f37794eSStanislav Mekhanoshin 
15107f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_RECIP:
15117f37794eSStanislav Mekhanoshin     Res0 = 1.0 / opr0;
15127f37794eSStanislav Mekhanoshin     return true;
15137f37794eSStanislav Mekhanoshin 
15147f37794eSStanislav Mekhanoshin   // two-arg functions
15157f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_DIVIDE:
15167f37794eSStanislav Mekhanoshin     Res0 = opr0 / opr1;
15177f37794eSStanislav Mekhanoshin     return true;
15187f37794eSStanislav Mekhanoshin 
15197f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POW:
15207f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POWR:
15217f37794eSStanislav Mekhanoshin     Res0 = pow(opr0, opr1);
15227f37794eSStanislav Mekhanoshin     return true;
15237f37794eSStanislav Mekhanoshin 
15247f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_POWN: {
15257f37794eSStanislav Mekhanoshin     if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
15267f37794eSStanislav Mekhanoshin       double val = (double)iopr1->getSExtValue();
15277f37794eSStanislav Mekhanoshin       Res0 = pow(opr0, val);
15287f37794eSStanislav Mekhanoshin       return true;
15297f37794eSStanislav Mekhanoshin     }
15307f37794eSStanislav Mekhanoshin     return false;
15317f37794eSStanislav Mekhanoshin   }
15327f37794eSStanislav Mekhanoshin 
15337f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_ROOTN: {
15347f37794eSStanislav Mekhanoshin     if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
15357f37794eSStanislav Mekhanoshin       double val = (double)iopr1->getSExtValue();
15367f37794eSStanislav Mekhanoshin       Res0 = pow(opr0, 1.0 / val);
15377f37794eSStanislav Mekhanoshin       return true;
15387f37794eSStanislav Mekhanoshin     }
15397f37794eSStanislav Mekhanoshin     return false;
15407f37794eSStanislav Mekhanoshin   }
15417f37794eSStanislav Mekhanoshin 
15427f37794eSStanislav Mekhanoshin   // with ptr arg
15437f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_SINCOS:
15447f37794eSStanislav Mekhanoshin     Res0 = sin(opr0);
15457f37794eSStanislav Mekhanoshin     Res1 = cos(opr0);
15467f37794eSStanislav Mekhanoshin     return true;
15477f37794eSStanislav Mekhanoshin 
15487f37794eSStanislav Mekhanoshin   // three-arg functions
15497f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_FMA:
15507f37794eSStanislav Mekhanoshin   case AMDGPULibFunc::EI_MAD:
15517f37794eSStanislav Mekhanoshin     Res0 = opr0 * opr1 + opr2;
15527f37794eSStanislav Mekhanoshin     return true;
15537f37794eSStanislav Mekhanoshin   }
15547f37794eSStanislav Mekhanoshin 
15557f37794eSStanislav Mekhanoshin   return false;
15567f37794eSStanislav Mekhanoshin }
15577f37794eSStanislav Mekhanoshin 
evaluateCall(CallInst * aCI,const FuncInfo & FInfo)1558a750332dSSimon Pilgrim bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1559c1e32b3fSKazu Hirata   int numArgs = (int)aCI->arg_size();
15607f37794eSStanislav Mekhanoshin   if (numArgs > 3)
15617f37794eSStanislav Mekhanoshin     return false;
15627f37794eSStanislav Mekhanoshin 
15637f37794eSStanislav Mekhanoshin   Constant *copr0 = nullptr;
15647f37794eSStanislav Mekhanoshin   Constant *copr1 = nullptr;
15657f37794eSStanislav Mekhanoshin   Constant *copr2 = nullptr;
15667f37794eSStanislav Mekhanoshin   if (numArgs > 0) {
15677f37794eSStanislav Mekhanoshin     if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
15687f37794eSStanislav Mekhanoshin       return false;
15697f37794eSStanislav Mekhanoshin   }
15707f37794eSStanislav Mekhanoshin 
15717f37794eSStanislav Mekhanoshin   if (numArgs > 1) {
15727f37794eSStanislav Mekhanoshin     if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
15737f37794eSStanislav Mekhanoshin       if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
15747f37794eSStanislav Mekhanoshin         return false;
15757f37794eSStanislav Mekhanoshin     }
15767f37794eSStanislav Mekhanoshin   }
15777f37794eSStanislav Mekhanoshin 
15787f37794eSStanislav Mekhanoshin   if (numArgs > 2) {
15797f37794eSStanislav Mekhanoshin     if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
15807f37794eSStanislav Mekhanoshin       return false;
15817f37794eSStanislav Mekhanoshin   }
15827f37794eSStanislav Mekhanoshin 
15837f37794eSStanislav Mekhanoshin   // At this point, all arguments to aCI are constants.
15847f37794eSStanislav Mekhanoshin 
15857f37794eSStanislav Mekhanoshin   // max vector size is 16, and sincos will generate two results.
15867f37794eSStanislav Mekhanoshin   double DVal0[16], DVal1[16];
15878de72973SSimon Pilgrim   int FuncVecSize = getVecSize(FInfo);
15887f37794eSStanislav Mekhanoshin   bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
15898de72973SSimon Pilgrim   if (FuncVecSize == 1) {
15907f37794eSStanislav Mekhanoshin     if (!evaluateScalarMathFunc(FInfo, DVal0[0],
15917f37794eSStanislav Mekhanoshin                                 DVal1[0], copr0, copr1, copr2)) {
15927f37794eSStanislav Mekhanoshin       return false;
15937f37794eSStanislav Mekhanoshin     }
15947f37794eSStanislav Mekhanoshin   } else {
15957f37794eSStanislav Mekhanoshin     ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
15967f37794eSStanislav Mekhanoshin     ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
15977f37794eSStanislav Mekhanoshin     ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
15988de72973SSimon Pilgrim     for (int i = 0; i < FuncVecSize; ++i) {
15997f37794eSStanislav Mekhanoshin       Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
16007f37794eSStanislav Mekhanoshin       Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
16017f37794eSStanislav Mekhanoshin       Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
16027f37794eSStanislav Mekhanoshin       if (!evaluateScalarMathFunc(FInfo, DVal0[i],
16037f37794eSStanislav Mekhanoshin                                   DVal1[i], celt0, celt1, celt2)) {
16047f37794eSStanislav Mekhanoshin         return false;
16057f37794eSStanislav Mekhanoshin       }
16067f37794eSStanislav Mekhanoshin     }
16077f37794eSStanislav Mekhanoshin   }
16087f37794eSStanislav Mekhanoshin 
16097f37794eSStanislav Mekhanoshin   LLVMContext &context = CI->getParent()->getParent()->getContext();
16107f37794eSStanislav Mekhanoshin   Constant *nval0, *nval1;
16118de72973SSimon Pilgrim   if (FuncVecSize == 1) {
16127f37794eSStanislav Mekhanoshin     nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
16137f37794eSStanislav Mekhanoshin     if (hasTwoResults)
16147f37794eSStanislav Mekhanoshin       nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
16157f37794eSStanislav Mekhanoshin   } else {
16167f37794eSStanislav Mekhanoshin     if (getArgType(FInfo) == AMDGPULibFunc::F32) {
16177f37794eSStanislav Mekhanoshin       SmallVector <float, 0> FVal0, FVal1;
16188de72973SSimon Pilgrim       for (int i = 0; i < FuncVecSize; ++i)
16197f37794eSStanislav Mekhanoshin         FVal0.push_back((float)DVal0[i]);
16207f37794eSStanislav Mekhanoshin       ArrayRef<float> tmp0(FVal0);
16217f37794eSStanislav Mekhanoshin       nval0 = ConstantDataVector::get(context, tmp0);
16227f37794eSStanislav Mekhanoshin       if (hasTwoResults) {
16238de72973SSimon Pilgrim         for (int i = 0; i < FuncVecSize; ++i)
16247f37794eSStanislav Mekhanoshin           FVal1.push_back((float)DVal1[i]);
16257f37794eSStanislav Mekhanoshin         ArrayRef<float> tmp1(FVal1);
16267f37794eSStanislav Mekhanoshin         nval1 = ConstantDataVector::get(context, tmp1);
16277f37794eSStanislav Mekhanoshin       }
16287f37794eSStanislav Mekhanoshin     } else {
16297f37794eSStanislav Mekhanoshin       ArrayRef<double> tmp0(DVal0);
16307f37794eSStanislav Mekhanoshin       nval0 = ConstantDataVector::get(context, tmp0);
16317f37794eSStanislav Mekhanoshin       if (hasTwoResults) {
16327f37794eSStanislav Mekhanoshin         ArrayRef<double> tmp1(DVal1);
16337f37794eSStanislav Mekhanoshin         nval1 = ConstantDataVector::get(context, tmp1);
16347f37794eSStanislav Mekhanoshin       }
16357f37794eSStanislav Mekhanoshin     }
16367f37794eSStanislav Mekhanoshin   }
16377f37794eSStanislav Mekhanoshin 
16387f37794eSStanislav Mekhanoshin   if (hasTwoResults) {
16397f37794eSStanislav Mekhanoshin     // sincos
16407f37794eSStanislav Mekhanoshin     assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
16417f37794eSStanislav Mekhanoshin            "math function with ptr arg not supported yet");
16427f37794eSStanislav Mekhanoshin     new StoreInst(nval1, aCI->getArgOperand(1), aCI);
16437f37794eSStanislav Mekhanoshin   }
16447f37794eSStanislav Mekhanoshin 
16457f37794eSStanislav Mekhanoshin   replaceCall(nval0);
16467f37794eSStanislav Mekhanoshin   return true;
16477f37794eSStanislav Mekhanoshin }
16487f37794eSStanislav Mekhanoshin 
16497f37794eSStanislav Mekhanoshin // Public interface to the Simplify LibCalls pass.
createAMDGPUSimplifyLibCallsPass(const TargetMachine * TM)1650348735b7SMatt Arsenault FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) {
1651348735b7SMatt Arsenault   return new AMDGPUSimplifyLibCalls(TM);
16527f37794eSStanislav Mekhanoshin }
16537f37794eSStanislav Mekhanoshin 
createAMDGPUUseNativeCallsPass()16547f37794eSStanislav Mekhanoshin FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
16557f37794eSStanislav Mekhanoshin   return new AMDGPUUseNativeCalls();
16567f37794eSStanislav Mekhanoshin }
16577f37794eSStanislav Mekhanoshin 
runOnFunction(Function & F)16587f37794eSStanislav Mekhanoshin bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
16597f37794eSStanislav Mekhanoshin   if (skipFunction(F))
16607f37794eSStanislav Mekhanoshin     return false;
16617f37794eSStanislav Mekhanoshin 
16627f37794eSStanislav Mekhanoshin   bool Changed = false;
16637f37794eSStanislav Mekhanoshin   auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
16647f37794eSStanislav Mekhanoshin 
1665d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1666d34e60caSNicola Zaghen              F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
16677f37794eSStanislav Mekhanoshin 
16687f37794eSStanislav Mekhanoshin   for (auto &BB : F) {
16697f37794eSStanislav Mekhanoshin     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
16707f37794eSStanislav Mekhanoshin       // Ignore non-calls.
16717f37794eSStanislav Mekhanoshin       CallInst *CI = dyn_cast<CallInst>(I);
16727f37794eSStanislav Mekhanoshin       ++I;
16736a31a9a5Sdfukalov       // Ignore intrinsics that do not become real instructions.
16746a31a9a5Sdfukalov       if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
16756a31a9a5Sdfukalov         continue;
16767f37794eSStanislav Mekhanoshin 
16777f37794eSStanislav Mekhanoshin       // Ignore indirect calls.
16787f37794eSStanislav Mekhanoshin       Function *Callee = CI->getCalledFunction();
16795a667c0eSKazu Hirata       if (Callee == nullptr)
16805a667c0eSKazu Hirata         continue;
16817f37794eSStanislav Mekhanoshin 
1682d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
16837f37794eSStanislav Mekhanoshin                  dbgs().flush());
16847f37794eSStanislav Mekhanoshin       if(Simplifier.fold(CI, AA))
16857f37794eSStanislav Mekhanoshin         Changed = true;
16867f37794eSStanislav Mekhanoshin     }
16877f37794eSStanislav Mekhanoshin   }
16887f37794eSStanislav Mekhanoshin   return Changed;
16897f37794eSStanislav Mekhanoshin }
16907f37794eSStanislav Mekhanoshin 
run(Function & F,FunctionAnalysisManager & AM)16919abc4577SArthur Eubanks PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F,
16929abc4577SArthur Eubanks                                                   FunctionAnalysisManager &AM) {
16938e293fe6SArthur Eubanks   AMDGPULibCalls Simplifier(&TM);
16949abc4577SArthur Eubanks   Simplifier.initNativeFuncs();
16959abc4577SArthur Eubanks 
16969abc4577SArthur Eubanks   bool Changed = false;
16979abc4577SArthur Eubanks   auto AA = &AM.getResult<AAManager>(F);
16989abc4577SArthur Eubanks 
16999abc4577SArthur Eubanks   LLVM_DEBUG(dbgs() << "AMDIC: process function ";
17009abc4577SArthur Eubanks              F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
17019abc4577SArthur Eubanks 
17029abc4577SArthur Eubanks   for (auto &BB : F) {
17039abc4577SArthur Eubanks     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
17049abc4577SArthur Eubanks       // Ignore non-calls.
17059abc4577SArthur Eubanks       CallInst *CI = dyn_cast<CallInst>(I);
17069abc4577SArthur Eubanks       ++I;
17079abc4577SArthur Eubanks       // Ignore intrinsics that do not become real instructions.
17089abc4577SArthur Eubanks       if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
17099abc4577SArthur Eubanks         continue;
17109abc4577SArthur Eubanks 
17119abc4577SArthur Eubanks       // Ignore indirect calls.
17129abc4577SArthur Eubanks       Function *Callee = CI->getCalledFunction();
17135a667c0eSKazu Hirata       if (Callee == nullptr)
17149abc4577SArthur Eubanks         continue;
17159abc4577SArthur Eubanks 
17169abc4577SArthur Eubanks       LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
17179abc4577SArthur Eubanks                  dbgs().flush());
17189abc4577SArthur Eubanks       if (Simplifier.fold(CI, AA))
17199abc4577SArthur Eubanks         Changed = true;
17209abc4577SArthur Eubanks     }
17219abc4577SArthur Eubanks   }
17229abc4577SArthur Eubanks   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
17239abc4577SArthur Eubanks }
17249abc4577SArthur Eubanks 
runOnFunction(Function & F)17257f37794eSStanislav Mekhanoshin bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
17267f37794eSStanislav Mekhanoshin   if (skipFunction(F) || UseNative.empty())
17277f37794eSStanislav Mekhanoshin     return false;
17287f37794eSStanislav Mekhanoshin 
17297f37794eSStanislav Mekhanoshin   bool Changed = false;
17307f37794eSStanislav Mekhanoshin   for (auto &BB : F) {
17317f37794eSStanislav Mekhanoshin     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
17327f37794eSStanislav Mekhanoshin       // Ignore non-calls.
17337f37794eSStanislav Mekhanoshin       CallInst *CI = dyn_cast<CallInst>(I);
17347f37794eSStanislav Mekhanoshin       ++I;
17357f37794eSStanislav Mekhanoshin       if (!CI) continue;
17367f37794eSStanislav Mekhanoshin 
17377f37794eSStanislav Mekhanoshin       // Ignore indirect calls.
17387f37794eSStanislav Mekhanoshin       Function *Callee = CI->getCalledFunction();
17395a667c0eSKazu Hirata       if (Callee == nullptr)
17405a667c0eSKazu Hirata         continue;
17417f37794eSStanislav Mekhanoshin 
17427f37794eSStanislav Mekhanoshin       if (Simplifier.useNative(CI))
17437f37794eSStanislav Mekhanoshin         Changed = true;
17447f37794eSStanislav Mekhanoshin     }
17457f37794eSStanislav Mekhanoshin   }
17467f37794eSStanislav Mekhanoshin   return Changed;
17477f37794eSStanislav Mekhanoshin }
17489abc4577SArthur Eubanks 
run(Function & F,FunctionAnalysisManager & AM)17499abc4577SArthur Eubanks PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F,
17509abc4577SArthur Eubanks                                                 FunctionAnalysisManager &AM) {
17519abc4577SArthur Eubanks   if (UseNative.empty())
17529abc4577SArthur Eubanks     return PreservedAnalyses::all();
17539abc4577SArthur Eubanks 
17549abc4577SArthur Eubanks   AMDGPULibCalls Simplifier;
17559abc4577SArthur Eubanks   Simplifier.initNativeFuncs();
17569abc4577SArthur Eubanks 
17579abc4577SArthur Eubanks   bool Changed = false;
17589abc4577SArthur Eubanks   for (auto &BB : F) {
17599abc4577SArthur Eubanks     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
17609abc4577SArthur Eubanks       // Ignore non-calls.
17619abc4577SArthur Eubanks       CallInst *CI = dyn_cast<CallInst>(I);
17629abc4577SArthur Eubanks       ++I;
17639abc4577SArthur Eubanks       if (!CI)
17649abc4577SArthur Eubanks         continue;
17659abc4577SArthur Eubanks 
17669abc4577SArthur Eubanks       // Ignore indirect calls.
17679abc4577SArthur Eubanks       Function *Callee = CI->getCalledFunction();
17685a667c0eSKazu Hirata       if (Callee == nullptr)
17699abc4577SArthur Eubanks         continue;
17709abc4577SArthur Eubanks 
17719abc4577SArthur Eubanks       if (Simplifier.useNative(CI))
17729abc4577SArthur Eubanks         Changed = true;
17739abc4577SArthur Eubanks     }
17749abc4577SArthur Eubanks   }
17759abc4577SArthur Eubanks   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
17769abc4577SArthur Eubanks }
1777