17f37794eSStanislav Mekhanoshin //===- AMDGPULibCalls.cpp -------------------------------------------------===// 27f37794eSStanislav Mekhanoshin // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 67f37794eSStanislav Mekhanoshin // 77f37794eSStanislav Mekhanoshin //===----------------------------------------------------------------------===// 87f37794eSStanislav Mekhanoshin // 97f37794eSStanislav Mekhanoshin /// \file 105f8f34e4SAdrian Prantl /// This file does AMD library function optimizations. 117f37794eSStanislav Mekhanoshin // 127f37794eSStanislav Mekhanoshin //===----------------------------------------------------------------------===// 137f37794eSStanislav Mekhanoshin 147f37794eSStanislav Mekhanoshin #include "AMDGPU.h" 157f37794eSStanislav Mekhanoshin #include "AMDGPULibFunc.h" 16560d7e04Sdfukalov #include "GCNSubtarget.h" 177f37794eSStanislav Mekhanoshin #include "llvm/Analysis/AliasAnalysis.h" 187f37794eSStanislav Mekhanoshin #include "llvm/Analysis/Loads.h" 196a87e9b0Sdfukalov #include "llvm/IR/IntrinsicsAMDGPU.h" 2099142003SNikita Popov #include "llvm/IR/IRBuilder.h" 2105da2fe5SReid Kleckner #include "llvm/InitializePasses.h" 22a9191c84SStanislav Mekhanoshin #include "llvm/Target/TargetMachine.h" 2305da2fe5SReid Kleckner 2405da2fe5SReid Kleckner #define DEBUG_TYPE "amdgpu-simplifylib" 257f37794eSStanislav Mekhanoshin 267f37794eSStanislav Mekhanoshin using namespace llvm; 277f37794eSStanislav Mekhanoshin 287f37794eSStanislav Mekhanoshin static cl::opt<bool> EnablePreLink("amdgpu-prelink", 297f37794eSStanislav Mekhanoshin cl::desc("Enable pre-link mode optimizations"), 307f37794eSStanislav Mekhanoshin cl::init(false), 317f37794eSStanislav Mekhanoshin cl::Hidden); 327f37794eSStanislav Mekhanoshin 337f37794eSStanislav Mekhanoshin static cl::list<std::string> UseNative("amdgpu-use-native", 347f37794eSStanislav Mekhanoshin cl::desc("Comma separated list of functions to replace with native, or all"), 357f37794eSStanislav Mekhanoshin cl::CommaSeparated, cl::ValueOptional, 367f37794eSStanislav Mekhanoshin cl::Hidden); 377f37794eSStanislav Mekhanoshin 38c57a9dc4SEvandro Menezes #define MATH_PI numbers::pi 39c57a9dc4SEvandro Menezes #define MATH_E numbers::e 40c57a9dc4SEvandro Menezes #define MATH_SQRT2 numbers::sqrt2 41c57a9dc4SEvandro Menezes #define MATH_SQRT1_2 numbers::inv_sqrt2 427f37794eSStanislav Mekhanoshin 437f37794eSStanislav Mekhanoshin namespace llvm { 447f37794eSStanislav Mekhanoshin 457f37794eSStanislav Mekhanoshin class AMDGPULibCalls { 467f37794eSStanislav Mekhanoshin private: 477f37794eSStanislav Mekhanoshin 487f37794eSStanislav Mekhanoshin typedef llvm::AMDGPULibFunc FuncInfo; 497f37794eSStanislav Mekhanoshin 50a9191c84SStanislav Mekhanoshin const TargetMachine *TM; 51a9191c84SStanislav Mekhanoshin 527f37794eSStanislav Mekhanoshin // -fuse-native. 537f37794eSStanislav Mekhanoshin bool AllNative = false; 547f37794eSStanislav Mekhanoshin 557f37794eSStanislav Mekhanoshin bool useNativeFunc(const StringRef F) const; 567f37794eSStanislav Mekhanoshin 57*dc6e8dfdSJacob Lambert // Return a pointer (pointer expr) to the function if function definition with 587f37794eSStanislav Mekhanoshin // "FuncName" exists. It may create a new function prototype in pre-link mode. 5913680223SJames Y Knight FunctionCallee getFunction(Module *M, const FuncInfo &fInfo); 607f37794eSStanislav Mekhanoshin 617f37794eSStanislav Mekhanoshin // Replace a normal function with its native version. 627f37794eSStanislav Mekhanoshin bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo); 637f37794eSStanislav Mekhanoshin 647f37794eSStanislav Mekhanoshin bool parseFunctionName(const StringRef& FMangledName, 657f37794eSStanislav Mekhanoshin FuncInfo *FInfo=nullptr /*out*/); 667f37794eSStanislav Mekhanoshin 677f37794eSStanislav Mekhanoshin bool TDOFold(CallInst *CI, const FuncInfo &FInfo); 687f37794eSStanislav Mekhanoshin 697f37794eSStanislav Mekhanoshin /* Specialized optimizations */ 707f37794eSStanislav Mekhanoshin 717f37794eSStanislav Mekhanoshin // recip (half or native) 727f37794eSStanislav Mekhanoshin bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 737f37794eSStanislav Mekhanoshin 747f37794eSStanislav Mekhanoshin // divide (half or native) 757f37794eSStanislav Mekhanoshin bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 767f37794eSStanislav Mekhanoshin 777f37794eSStanislav Mekhanoshin // pow/powr/pown 787f37794eSStanislav Mekhanoshin bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 797f37794eSStanislav Mekhanoshin 807f37794eSStanislav Mekhanoshin // rootn 817f37794eSStanislav Mekhanoshin bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 827f37794eSStanislav Mekhanoshin 837f37794eSStanislav Mekhanoshin // fma/mad 847f37794eSStanislav Mekhanoshin bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 857f37794eSStanislav Mekhanoshin 867f37794eSStanislav Mekhanoshin // -fuse-native for sincos 877f37794eSStanislav Mekhanoshin bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo); 887f37794eSStanislav Mekhanoshin 897f37794eSStanislav Mekhanoshin // evaluate calls if calls' arguments are constants. 907f37794eSStanislav Mekhanoshin bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0, 917f37794eSStanislav Mekhanoshin double& Res1, Constant *copr0, Constant *copr1, Constant *copr2); 927f37794eSStanislav Mekhanoshin bool evaluateCall(CallInst *aCI, FuncInfo &FInfo); 937f37794eSStanislav Mekhanoshin 947f37794eSStanislav Mekhanoshin // exp 957f37794eSStanislav Mekhanoshin bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 967f37794eSStanislav Mekhanoshin 977f37794eSStanislav Mekhanoshin // exp2 987f37794eSStanislav Mekhanoshin bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 997f37794eSStanislav Mekhanoshin 1007f37794eSStanislav Mekhanoshin // exp10 1017f37794eSStanislav Mekhanoshin bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 1027f37794eSStanislav Mekhanoshin 1037f37794eSStanislav Mekhanoshin // log 1047f37794eSStanislav Mekhanoshin bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 1057f37794eSStanislav Mekhanoshin 1067f37794eSStanislav Mekhanoshin // log2 1077f37794eSStanislav Mekhanoshin bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 1087f37794eSStanislav Mekhanoshin 1097f37794eSStanislav Mekhanoshin // log10 1107f37794eSStanislav Mekhanoshin bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 1117f37794eSStanislav Mekhanoshin 1127f37794eSStanislav Mekhanoshin // sqrt 1137f37794eSStanislav Mekhanoshin bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 1147f37794eSStanislav Mekhanoshin 1157f37794eSStanislav Mekhanoshin // sin/cos 1167f37794eSStanislav Mekhanoshin bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA); 1177f37794eSStanislav Mekhanoshin 118fc5121a7SYaxun Liu // __read_pipe/__write_pipe 119fc5121a7SYaxun Liu bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo); 120fc5121a7SYaxun Liu 121a9191c84SStanislav Mekhanoshin // llvm.amdgcn.wavefrontsize 122a9191c84SStanislav Mekhanoshin bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B); 123a9191c84SStanislav Mekhanoshin 1247f37794eSStanislav Mekhanoshin // Get insertion point at entry. 1257f37794eSStanislav Mekhanoshin BasicBlock::iterator getEntryIns(CallInst * UI); 1267f37794eSStanislav Mekhanoshin // Insert an Alloc instruction. 1277f37794eSStanislav Mekhanoshin AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix); 1287f37794eSStanislav Mekhanoshin // Get a scalar native builtin signle argument FP function 12913680223SJames Y Knight FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo); 1307f37794eSStanislav Mekhanoshin 1317f37794eSStanislav Mekhanoshin protected: 1327f37794eSStanislav Mekhanoshin CallInst *CI; 1337f37794eSStanislav Mekhanoshin 1347f37794eSStanislav Mekhanoshin bool isUnsafeMath(const CallInst *CI) const; 1357f37794eSStanislav Mekhanoshin 1367f37794eSStanislav Mekhanoshin void replaceCall(Value *With) { 1377f37794eSStanislav Mekhanoshin CI->replaceAllUsesWith(With); 1387f37794eSStanislav Mekhanoshin CI->eraseFromParent(); 1397f37794eSStanislav Mekhanoshin } 1407f37794eSStanislav Mekhanoshin 1417f37794eSStanislav Mekhanoshin public: 142a9191c84SStanislav Mekhanoshin AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {} 143a9191c84SStanislav Mekhanoshin 1447f37794eSStanislav Mekhanoshin bool fold(CallInst *CI, AliasAnalysis *AA = nullptr); 1457f37794eSStanislav Mekhanoshin 1467f37794eSStanislav Mekhanoshin void initNativeFuncs(); 1477f37794eSStanislav Mekhanoshin 1487f37794eSStanislav Mekhanoshin // Replace a normal math function call with that native version 1497f37794eSStanislav Mekhanoshin bool useNative(CallInst *CI); 1507f37794eSStanislav Mekhanoshin }; 1517f37794eSStanislav Mekhanoshin 1527f37794eSStanislav Mekhanoshin } // end llvm namespace 1537f37794eSStanislav Mekhanoshin 1547f37794eSStanislav Mekhanoshin namespace { 1557f37794eSStanislav Mekhanoshin 1567f37794eSStanislav Mekhanoshin class AMDGPUSimplifyLibCalls : public FunctionPass { 1577f37794eSStanislav Mekhanoshin 158a9191c84SStanislav Mekhanoshin AMDGPULibCalls Simplifier; 159a9191c84SStanislav Mekhanoshin 1607f37794eSStanislav Mekhanoshin public: 1617f37794eSStanislav Mekhanoshin static char ID; // Pass identification 1627f37794eSStanislav Mekhanoshin 163348735b7SMatt Arsenault AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr) 164348735b7SMatt Arsenault : FunctionPass(ID), Simplifier(TM) { 1657f37794eSStanislav Mekhanoshin initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); 1667f37794eSStanislav Mekhanoshin } 1677f37794eSStanislav Mekhanoshin 1687f37794eSStanislav Mekhanoshin void getAnalysisUsage(AnalysisUsage &AU) const override { 1697f37794eSStanislav Mekhanoshin AU.addRequired<AAResultsWrapperPass>(); 1707f37794eSStanislav Mekhanoshin } 1717f37794eSStanislav Mekhanoshin 1727f37794eSStanislav Mekhanoshin bool runOnFunction(Function &M) override; 1737f37794eSStanislav Mekhanoshin }; 1747f37794eSStanislav Mekhanoshin 1757f37794eSStanislav Mekhanoshin class AMDGPUUseNativeCalls : public FunctionPass { 1767f37794eSStanislav Mekhanoshin 1777f37794eSStanislav Mekhanoshin AMDGPULibCalls Simplifier; 1787f37794eSStanislav Mekhanoshin 1797f37794eSStanislav Mekhanoshin public: 1807f37794eSStanislav Mekhanoshin static char ID; // Pass identification 1817f37794eSStanislav Mekhanoshin 1827f37794eSStanislav Mekhanoshin AMDGPUUseNativeCalls() : FunctionPass(ID) { 1837f37794eSStanislav Mekhanoshin initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry()); 1847f37794eSStanislav Mekhanoshin Simplifier.initNativeFuncs(); 1857f37794eSStanislav Mekhanoshin } 1867f37794eSStanislav Mekhanoshin 1877f37794eSStanislav Mekhanoshin bool runOnFunction(Function &F) override; 1887f37794eSStanislav Mekhanoshin }; 1897f37794eSStanislav Mekhanoshin 1907f37794eSStanislav Mekhanoshin } // end anonymous namespace. 1917f37794eSStanislav Mekhanoshin 1927f37794eSStanislav Mekhanoshin char AMDGPUSimplifyLibCalls::ID = 0; 1937f37794eSStanislav Mekhanoshin char AMDGPUUseNativeCalls::ID = 0; 1947f37794eSStanislav Mekhanoshin 1957f37794eSStanislav Mekhanoshin INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", 1967f37794eSStanislav Mekhanoshin "Simplify well-known AMD library calls", false, false) 1977f37794eSStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1987f37794eSStanislav Mekhanoshin INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", 1997f37794eSStanislav Mekhanoshin "Simplify well-known AMD library calls", false, false) 2007f37794eSStanislav Mekhanoshin 2017f37794eSStanislav Mekhanoshin INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative", 2027f37794eSStanislav Mekhanoshin "Replace builtin math calls with that native versions.", 2037f37794eSStanislav Mekhanoshin false, false) 2047f37794eSStanislav Mekhanoshin 2057f37794eSStanislav Mekhanoshin template <typename IRB> 20613680223SJames Y Knight static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, 20751ebcaafSBenjamin Kramer const Twine &Name = "") { 2087f37794eSStanislav Mekhanoshin CallInst *R = B.CreateCall(Callee, Arg, Name); 20913680223SJames Y Knight if (Function *F = dyn_cast<Function>(Callee.getCallee())) 2107f37794eSStanislav Mekhanoshin R->setCallingConv(F->getCallingConv()); 2117f37794eSStanislav Mekhanoshin return R; 2127f37794eSStanislav Mekhanoshin } 2137f37794eSStanislav Mekhanoshin 2147f37794eSStanislav Mekhanoshin template <typename IRB> 21513680223SJames Y Knight static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, 21613680223SJames Y Knight Value *Arg2, const Twine &Name = "") { 2177f37794eSStanislav Mekhanoshin CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name); 21813680223SJames Y Knight if (Function *F = dyn_cast<Function>(Callee.getCallee())) 2197f37794eSStanislav Mekhanoshin R->setCallingConv(F->getCallingConv()); 2207f37794eSStanislav Mekhanoshin return R; 2217f37794eSStanislav Mekhanoshin } 2227f37794eSStanislav Mekhanoshin 2237f37794eSStanislav Mekhanoshin // Data structures for table-driven optimizations. 2247f37794eSStanislav Mekhanoshin // FuncTbl works for both f32 and f64 functions with 1 input argument 2257f37794eSStanislav Mekhanoshin 2267f37794eSStanislav Mekhanoshin struct TableEntry { 2277f37794eSStanislav Mekhanoshin double result; 2287f37794eSStanislav Mekhanoshin double input; 2297f37794eSStanislav Mekhanoshin }; 2307f37794eSStanislav Mekhanoshin 2317f37794eSStanislav Mekhanoshin /* a list of {result, input} */ 2327f37794eSStanislav Mekhanoshin static const TableEntry tbl_acos[] = { 2337f37794eSStanislav Mekhanoshin {MATH_PI / 2.0, 0.0}, 2347f37794eSStanislav Mekhanoshin {MATH_PI / 2.0, -0.0}, 2357f37794eSStanislav Mekhanoshin {0.0, 1.0}, 2367f37794eSStanislav Mekhanoshin {MATH_PI, -1.0} 2377f37794eSStanislav Mekhanoshin }; 2387f37794eSStanislav Mekhanoshin static const TableEntry tbl_acosh[] = { 2397f37794eSStanislav Mekhanoshin {0.0, 1.0} 2407f37794eSStanislav Mekhanoshin }; 2417f37794eSStanislav Mekhanoshin static const TableEntry tbl_acospi[] = { 2427f37794eSStanislav Mekhanoshin {0.5, 0.0}, 2437f37794eSStanislav Mekhanoshin {0.5, -0.0}, 2447f37794eSStanislav Mekhanoshin {0.0, 1.0}, 2457f37794eSStanislav Mekhanoshin {1.0, -1.0} 2467f37794eSStanislav Mekhanoshin }; 2477f37794eSStanislav Mekhanoshin static const TableEntry tbl_asin[] = { 2487f37794eSStanislav Mekhanoshin {0.0, 0.0}, 2497f37794eSStanislav Mekhanoshin {-0.0, -0.0}, 2507f37794eSStanislav Mekhanoshin {MATH_PI / 2.0, 1.0}, 2517f37794eSStanislav Mekhanoshin {-MATH_PI / 2.0, -1.0} 2527f37794eSStanislav Mekhanoshin }; 2537f37794eSStanislav Mekhanoshin static const TableEntry tbl_asinh[] = { 2547f37794eSStanislav Mekhanoshin {0.0, 0.0}, 2557f37794eSStanislav Mekhanoshin {-0.0, -0.0} 2567f37794eSStanislav Mekhanoshin }; 2577f37794eSStanislav Mekhanoshin static const TableEntry tbl_asinpi[] = { 2587f37794eSStanislav Mekhanoshin {0.0, 0.0}, 2597f37794eSStanislav Mekhanoshin {-0.0, -0.0}, 2607f37794eSStanislav Mekhanoshin {0.5, 1.0}, 2617f37794eSStanislav Mekhanoshin {-0.5, -1.0} 2627f37794eSStanislav Mekhanoshin }; 2637f37794eSStanislav Mekhanoshin static const TableEntry tbl_atan[] = { 2647f37794eSStanislav Mekhanoshin {0.0, 0.0}, 2657f37794eSStanislav Mekhanoshin {-0.0, -0.0}, 2667f37794eSStanislav Mekhanoshin {MATH_PI / 4.0, 1.0}, 2677f37794eSStanislav Mekhanoshin {-MATH_PI / 4.0, -1.0} 2687f37794eSStanislav Mekhanoshin }; 2697f37794eSStanislav Mekhanoshin static const TableEntry tbl_atanh[] = { 2707f37794eSStanislav Mekhanoshin {0.0, 0.0}, 2717f37794eSStanislav Mekhanoshin {-0.0, -0.0} 2727f37794eSStanislav Mekhanoshin }; 2737f37794eSStanislav Mekhanoshin static const TableEntry tbl_atanpi[] = { 2747f37794eSStanislav Mekhanoshin {0.0, 0.0}, 2757f37794eSStanislav Mekhanoshin {-0.0, -0.0}, 2767f37794eSStanislav Mekhanoshin {0.25, 1.0}, 2777f37794eSStanislav Mekhanoshin {-0.25, -1.0} 2787f37794eSStanislav Mekhanoshin }; 2797f37794eSStanislav Mekhanoshin static const TableEntry tbl_cbrt[] = { 2807f37794eSStanislav Mekhanoshin {0.0, 0.0}, 2817f37794eSStanislav Mekhanoshin {-0.0, -0.0}, 2827f37794eSStanislav Mekhanoshin {1.0, 1.0}, 2837f37794eSStanislav Mekhanoshin {-1.0, -1.0}, 2847f37794eSStanislav Mekhanoshin }; 2857f37794eSStanislav Mekhanoshin static const TableEntry tbl_cos[] = { 2867f37794eSStanislav Mekhanoshin {1.0, 0.0}, 2877f37794eSStanislav Mekhanoshin {1.0, -0.0} 2887f37794eSStanislav Mekhanoshin }; 2897f37794eSStanislav Mekhanoshin static const TableEntry tbl_cosh[] = { 2907f37794eSStanislav Mekhanoshin {1.0, 0.0}, 2917f37794eSStanislav Mekhanoshin {1.0, -0.0} 2927f37794eSStanislav Mekhanoshin }; 2937f37794eSStanislav Mekhanoshin static const TableEntry tbl_cospi[] = { 2947f37794eSStanislav Mekhanoshin {1.0, 0.0}, 2957f37794eSStanislav Mekhanoshin {1.0, -0.0} 2967f37794eSStanislav Mekhanoshin }; 2977f37794eSStanislav Mekhanoshin static const TableEntry tbl_erfc[] = { 2987f37794eSStanislav Mekhanoshin {1.0, 0.0}, 2997f37794eSStanislav Mekhanoshin {1.0, -0.0} 3007f37794eSStanislav Mekhanoshin }; 3017f37794eSStanislav Mekhanoshin static const TableEntry tbl_erf[] = { 3027f37794eSStanislav Mekhanoshin {0.0, 0.0}, 3037f37794eSStanislav Mekhanoshin {-0.0, -0.0} 3047f37794eSStanislav Mekhanoshin }; 3057f37794eSStanislav Mekhanoshin static const TableEntry tbl_exp[] = { 3067f37794eSStanislav Mekhanoshin {1.0, 0.0}, 3077f37794eSStanislav Mekhanoshin {1.0, -0.0}, 3087f37794eSStanislav Mekhanoshin {MATH_E, 1.0} 3097f37794eSStanislav Mekhanoshin }; 3107f37794eSStanislav Mekhanoshin static const TableEntry tbl_exp2[] = { 3117f37794eSStanislav Mekhanoshin {1.0, 0.0}, 3127f37794eSStanislav Mekhanoshin {1.0, -0.0}, 3137f37794eSStanislav Mekhanoshin {2.0, 1.0} 3147f37794eSStanislav Mekhanoshin }; 3157f37794eSStanislav Mekhanoshin static const TableEntry tbl_exp10[] = { 3167f37794eSStanislav Mekhanoshin {1.0, 0.0}, 3177f37794eSStanislav Mekhanoshin {1.0, -0.0}, 3187f37794eSStanislav Mekhanoshin {10.0, 1.0} 3197f37794eSStanislav Mekhanoshin }; 3207f37794eSStanislav Mekhanoshin static const TableEntry tbl_expm1[] = { 3217f37794eSStanislav Mekhanoshin {0.0, 0.0}, 3227f37794eSStanislav Mekhanoshin {-0.0, -0.0} 3237f37794eSStanislav Mekhanoshin }; 3247f37794eSStanislav Mekhanoshin static const TableEntry tbl_log[] = { 3257f37794eSStanislav Mekhanoshin {0.0, 1.0}, 3267f37794eSStanislav Mekhanoshin {1.0, MATH_E} 3277f37794eSStanislav Mekhanoshin }; 3287f37794eSStanislav Mekhanoshin static const TableEntry tbl_log2[] = { 3297f37794eSStanislav Mekhanoshin {0.0, 1.0}, 3307f37794eSStanislav Mekhanoshin {1.0, 2.0} 3317f37794eSStanislav Mekhanoshin }; 3327f37794eSStanislav Mekhanoshin static const TableEntry tbl_log10[] = { 3337f37794eSStanislav Mekhanoshin {0.0, 1.0}, 3347f37794eSStanislav Mekhanoshin {1.0, 10.0} 3357f37794eSStanislav Mekhanoshin }; 3367f37794eSStanislav Mekhanoshin static const TableEntry tbl_rsqrt[] = { 3377f37794eSStanislav Mekhanoshin {1.0, 1.0}, 338c57a9dc4SEvandro Menezes {MATH_SQRT1_2, 2.0} 3397f37794eSStanislav Mekhanoshin }; 3407f37794eSStanislav Mekhanoshin static const TableEntry tbl_sin[] = { 3417f37794eSStanislav Mekhanoshin {0.0, 0.0}, 3427f37794eSStanislav Mekhanoshin {-0.0, -0.0} 3437f37794eSStanislav Mekhanoshin }; 3447f37794eSStanislav Mekhanoshin static const TableEntry tbl_sinh[] = { 3457f37794eSStanislav Mekhanoshin {0.0, 0.0}, 3467f37794eSStanislav Mekhanoshin {-0.0, -0.0} 3477f37794eSStanislav Mekhanoshin }; 3487f37794eSStanislav Mekhanoshin static const TableEntry tbl_sinpi[] = { 3497f37794eSStanislav Mekhanoshin {0.0, 0.0}, 3507f37794eSStanislav Mekhanoshin {-0.0, -0.0} 3517f37794eSStanislav Mekhanoshin }; 3527f37794eSStanislav Mekhanoshin static const TableEntry tbl_sqrt[] = { 3537f37794eSStanislav Mekhanoshin {0.0, 0.0}, 3547f37794eSStanislav Mekhanoshin {1.0, 1.0}, 3557f37794eSStanislav Mekhanoshin {MATH_SQRT2, 2.0} 3567f37794eSStanislav Mekhanoshin }; 3577f37794eSStanislav Mekhanoshin static const TableEntry tbl_tan[] = { 3587f37794eSStanislav Mekhanoshin {0.0, 0.0}, 3597f37794eSStanislav Mekhanoshin {-0.0, -0.0} 3607f37794eSStanislav Mekhanoshin }; 3617f37794eSStanislav Mekhanoshin static const TableEntry tbl_tanh[] = { 3627f37794eSStanislav Mekhanoshin {0.0, 0.0}, 3637f37794eSStanislav Mekhanoshin {-0.0, -0.0} 3647f37794eSStanislav Mekhanoshin }; 3657f37794eSStanislav Mekhanoshin static const TableEntry tbl_tanpi[] = { 3667f37794eSStanislav Mekhanoshin {0.0, 0.0}, 3677f37794eSStanislav Mekhanoshin {-0.0, -0.0} 3687f37794eSStanislav Mekhanoshin }; 3697f37794eSStanislav Mekhanoshin static const TableEntry tbl_tgamma[] = { 3707f37794eSStanislav Mekhanoshin {1.0, 1.0}, 3717f37794eSStanislav Mekhanoshin {1.0, 2.0}, 3727f37794eSStanislav Mekhanoshin {2.0, 3.0}, 3737f37794eSStanislav Mekhanoshin {6.0, 4.0} 3747f37794eSStanislav Mekhanoshin }; 3757f37794eSStanislav Mekhanoshin 3767f37794eSStanislav Mekhanoshin static bool HasNative(AMDGPULibFunc::EFuncId id) { 3777f37794eSStanislav Mekhanoshin switch(id) { 3787f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_DIVIDE: 3797f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_COS: 3807f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_EXP: 3817f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_EXP2: 3827f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_EXP10: 3837f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_LOG: 3847f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_LOG2: 3857f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_LOG10: 3867f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_POWR: 3877f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_RECIP: 3887f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_RSQRT: 3897f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SIN: 3907f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SINCOS: 3917f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SQRT: 3927f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_TAN: 3937f37794eSStanislav Mekhanoshin return true; 3947f37794eSStanislav Mekhanoshin default:; 3957f37794eSStanislav Mekhanoshin } 3967f37794eSStanislav Mekhanoshin return false; 3977f37794eSStanislav Mekhanoshin } 3987f37794eSStanislav Mekhanoshin 3997f37794eSStanislav Mekhanoshin struct TableRef { 4007f37794eSStanislav Mekhanoshin size_t size; 4017f37794eSStanislav Mekhanoshin const TableEntry *table; // variable size: from 0 to (size - 1) 4027f37794eSStanislav Mekhanoshin 4037f37794eSStanislav Mekhanoshin TableRef() : size(0), table(nullptr) {} 4047f37794eSStanislav Mekhanoshin 4057f37794eSStanislav Mekhanoshin template <size_t N> 4067f37794eSStanislav Mekhanoshin TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {} 4077f37794eSStanislav Mekhanoshin }; 4087f37794eSStanislav Mekhanoshin 4097f37794eSStanislav Mekhanoshin static TableRef getOptTable(AMDGPULibFunc::EFuncId id) { 4107f37794eSStanislav Mekhanoshin switch(id) { 4117f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos); 4127f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh); 4137f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi); 4147f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin); 4157f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh); 4167f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi); 4177f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan); 4187f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh); 4197f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi); 4207f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt); 4217f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_NCOS: 4227f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos); 4237f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh); 4247f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi); 4257f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc); 4267f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf); 4277f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp); 4287f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_NEXP2: 4297f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2); 4307f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10); 4317f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1); 4327f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log); 4337f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_NLOG2: 4347f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2); 4357f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10); 4367f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_NRSQRT: 4377f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt); 4387f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_NSIN: 4397f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin); 4407f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh); 4417f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi); 4427f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_NSQRT: 4437f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt); 4447f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan); 4457f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh); 4467f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi); 4477f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma); 4487f37794eSStanislav Mekhanoshin default:; 4497f37794eSStanislav Mekhanoshin } 4507f37794eSStanislav Mekhanoshin return TableRef(); 4517f37794eSStanislav Mekhanoshin } 4527f37794eSStanislav Mekhanoshin 4537f37794eSStanislav Mekhanoshin static inline int getVecSize(const AMDGPULibFunc& FInfo) { 454fc5121a7SYaxun Liu return FInfo.getLeads()[0].VectorSize; 4557f37794eSStanislav Mekhanoshin } 4567f37794eSStanislav Mekhanoshin 4577f37794eSStanislav Mekhanoshin static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) { 458fc5121a7SYaxun Liu return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType; 4597f37794eSStanislav Mekhanoshin } 4607f37794eSStanislav Mekhanoshin 46113680223SJames Y Knight FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) { 4627f37794eSStanislav Mekhanoshin // If we are doing PreLinkOpt, the function is external. So it is safe to 4637f37794eSStanislav Mekhanoshin // use getOrInsertFunction() at this stage. 4647f37794eSStanislav Mekhanoshin 4657f37794eSStanislav Mekhanoshin return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo) 4667f37794eSStanislav Mekhanoshin : AMDGPULibFunc::getFunction(M, fInfo); 4677f37794eSStanislav Mekhanoshin } 4687f37794eSStanislav Mekhanoshin 4697f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName, 4707f37794eSStanislav Mekhanoshin FuncInfo *FInfo) { 4717f37794eSStanislav Mekhanoshin return AMDGPULibFunc::parse(FMangledName, *FInfo); 4727f37794eSStanislav Mekhanoshin } 4737f37794eSStanislav Mekhanoshin 4747f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const { 4757f37794eSStanislav Mekhanoshin if (auto Op = dyn_cast<FPMathOperator>(CI)) 476629c4115SSanjay Patel if (Op->isFast()) 4777f37794eSStanislav Mekhanoshin return true; 4787f37794eSStanislav Mekhanoshin const Function *F = CI->getParent()->getParent(); 4797f37794eSStanislav Mekhanoshin Attribute Attr = F->getFnAttribute("unsafe-fp-math"); 480d6de1e1aSSerge Guelton return Attr.getValueAsBool(); 4817f37794eSStanislav Mekhanoshin } 4827f37794eSStanislav Mekhanoshin 4837f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::useNativeFunc(const StringRef F) const { 484902cbcd5SKazu Hirata return AllNative || llvm::is_contained(UseNative, F); 4857f37794eSStanislav Mekhanoshin } 4867f37794eSStanislav Mekhanoshin 4877f37794eSStanislav Mekhanoshin void AMDGPULibCalls::initNativeFuncs() { 4887f37794eSStanislav Mekhanoshin AllNative = useNativeFunc("all") || 4897f37794eSStanislav Mekhanoshin (UseNative.getNumOccurrences() && UseNative.size() == 1 && 4907f37794eSStanislav Mekhanoshin UseNative.begin()->empty()); 4917f37794eSStanislav Mekhanoshin } 4927f37794eSStanislav Mekhanoshin 4937f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) { 4947f37794eSStanislav Mekhanoshin bool native_sin = useNativeFunc("sin"); 4957f37794eSStanislav Mekhanoshin bool native_cos = useNativeFunc("cos"); 4967f37794eSStanislav Mekhanoshin 4977f37794eSStanislav Mekhanoshin if (native_sin && native_cos) { 4987f37794eSStanislav Mekhanoshin Module *M = aCI->getModule(); 4997f37794eSStanislav Mekhanoshin Value *opr0 = aCI->getArgOperand(0); 5007f37794eSStanislav Mekhanoshin 5017f37794eSStanislav Mekhanoshin AMDGPULibFunc nf; 502fc5121a7SYaxun Liu nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType; 503fc5121a7SYaxun Liu nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize; 5047f37794eSStanislav Mekhanoshin 5057f37794eSStanislav Mekhanoshin nf.setPrefix(AMDGPULibFunc::NATIVE); 5067f37794eSStanislav Mekhanoshin nf.setId(AMDGPULibFunc::EI_SIN); 50713680223SJames Y Knight FunctionCallee sinExpr = getFunction(M, nf); 5087f37794eSStanislav Mekhanoshin 5097f37794eSStanislav Mekhanoshin nf.setPrefix(AMDGPULibFunc::NATIVE); 5107f37794eSStanislav Mekhanoshin nf.setId(AMDGPULibFunc::EI_COS); 51113680223SJames Y Knight FunctionCallee cosExpr = getFunction(M, nf); 5127f37794eSStanislav Mekhanoshin if (sinExpr && cosExpr) { 5137f37794eSStanislav Mekhanoshin Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI); 5147f37794eSStanislav Mekhanoshin Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI); 5157f37794eSStanislav Mekhanoshin new StoreInst(cosval, aCI->getArgOperand(1), aCI); 5167f37794eSStanislav Mekhanoshin 5177f37794eSStanislav Mekhanoshin DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI 5187f37794eSStanislav Mekhanoshin << " with native version of sin/cos"); 5197f37794eSStanislav Mekhanoshin 5207f37794eSStanislav Mekhanoshin replaceCall(sinval); 5217f37794eSStanislav Mekhanoshin return true; 5227f37794eSStanislav Mekhanoshin } 5237f37794eSStanislav Mekhanoshin } 5247f37794eSStanislav Mekhanoshin return false; 5257f37794eSStanislav Mekhanoshin } 5267f37794eSStanislav Mekhanoshin 5277f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::useNative(CallInst *aCI) { 5287f37794eSStanislav Mekhanoshin CI = aCI; 5297f37794eSStanislav Mekhanoshin Function *Callee = aCI->getCalledFunction(); 5307f37794eSStanislav Mekhanoshin 5317f37794eSStanislav Mekhanoshin FuncInfo FInfo; 532fc5121a7SYaxun Liu if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() || 5337f37794eSStanislav Mekhanoshin FInfo.getPrefix() != AMDGPULibFunc::NOPFX || 534fc5121a7SYaxun Liu getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) || 5357f37794eSStanislav Mekhanoshin !(AllNative || useNativeFunc(FInfo.getName()))) { 5367f37794eSStanislav Mekhanoshin return false; 5377f37794eSStanislav Mekhanoshin } 5387f37794eSStanislav Mekhanoshin 5397f37794eSStanislav Mekhanoshin if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS) 5407f37794eSStanislav Mekhanoshin return sincosUseNative(aCI, FInfo); 5417f37794eSStanislav Mekhanoshin 5427f37794eSStanislav Mekhanoshin FInfo.setPrefix(AMDGPULibFunc::NATIVE); 54313680223SJames Y Knight FunctionCallee F = getFunction(aCI->getModule(), FInfo); 5447f37794eSStanislav Mekhanoshin if (!F) 5457f37794eSStanislav Mekhanoshin return false; 5467f37794eSStanislav Mekhanoshin 5477f37794eSStanislav Mekhanoshin aCI->setCalledFunction(F); 5487f37794eSStanislav Mekhanoshin DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI 5497f37794eSStanislav Mekhanoshin << " with native version"); 5507f37794eSStanislav Mekhanoshin return true; 5517f37794eSStanislav Mekhanoshin } 5527f37794eSStanislav Mekhanoshin 553fc5121a7SYaxun Liu // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe 554fc5121a7SYaxun Liu // builtin, with appended type size and alignment arguments, where 2 or 4 555fc5121a7SYaxun Liu // indicates the original number of arguments. The library has optimized version 556fc5121a7SYaxun Liu // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same 557fc5121a7SYaxun Liu // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N 558fc5121a7SYaxun Liu // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ..., 559fc5121a7SYaxun Liu // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4. 560fc5121a7SYaxun Liu bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, 561fc5121a7SYaxun Liu FuncInfo &FInfo) { 562fc5121a7SYaxun Liu auto *Callee = CI->getCalledFunction(); 563fc5121a7SYaxun Liu if (!Callee->isDeclaration()) 564fc5121a7SYaxun Liu return false; 565fc5121a7SYaxun Liu 566fc5121a7SYaxun Liu assert(Callee->hasName() && "Invalid read_pipe/write_pipe function"); 567fc5121a7SYaxun Liu auto *M = Callee->getParent(); 568fc5121a7SYaxun Liu auto &Ctx = M->getContext(); 569adcd0268SBenjamin Kramer std::string Name = std::string(Callee->getName()); 570fc5121a7SYaxun Liu auto NumArg = CI->getNumArgOperands(); 571fc5121a7SYaxun Liu if (NumArg != 4 && NumArg != 6) 572fc5121a7SYaxun Liu return false; 573fc5121a7SYaxun Liu auto *PacketSize = CI->getArgOperand(NumArg - 2); 574fc5121a7SYaxun Liu auto *PacketAlign = CI->getArgOperand(NumArg - 1); 575fc5121a7SYaxun Liu if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign)) 576fc5121a7SYaxun Liu return false; 577fc5121a7SYaxun Liu unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue(); 57887e2751cSGuillaume Chatelet Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue(); 57987e2751cSGuillaume Chatelet if (Alignment != Size) 580fc5121a7SYaxun Liu return false; 581fc5121a7SYaxun Liu 582fc5121a7SYaxun Liu Type *PtrElemTy; 583fc5121a7SYaxun Liu if (Size <= 8) 584fc5121a7SYaxun Liu PtrElemTy = Type::getIntNTy(Ctx, Size * 8); 585fc5121a7SYaxun Liu else 586aad93654SChristopher Tetreault PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8); 587fc5121a7SYaxun Liu unsigned PtrArgLoc = CI->getNumArgOperands() - 3; 588fc5121a7SYaxun Liu auto PtrArg = CI->getArgOperand(PtrArgLoc); 589fc5121a7SYaxun Liu unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace(); 590fc5121a7SYaxun Liu auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS); 591fc5121a7SYaxun Liu 592fc5121a7SYaxun Liu SmallVector<llvm::Type *, 6> ArgTys; 593fc5121a7SYaxun Liu for (unsigned I = 0; I != PtrArgLoc; ++I) 594fc5121a7SYaxun Liu ArgTys.push_back(CI->getArgOperand(I)->getType()); 595fc5121a7SYaxun Liu ArgTys.push_back(PtrTy); 596fc5121a7SYaxun Liu 597fc5121a7SYaxun Liu Name = Name + "_" + std::to_string(Size); 598fc5121a7SYaxun Liu auto *FTy = FunctionType::get(Callee->getReturnType(), 599fc5121a7SYaxun Liu ArrayRef<Type *>(ArgTys), false); 600fc5121a7SYaxun Liu AMDGPULibFunc NewLibFunc(Name, FTy); 60113680223SJames Y Knight FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc); 602fc5121a7SYaxun Liu if (!F) 603fc5121a7SYaxun Liu return false; 604fc5121a7SYaxun Liu 605fc5121a7SYaxun Liu auto *BCast = B.CreatePointerCast(PtrArg, PtrTy); 606fc5121a7SYaxun Liu SmallVector<Value *, 6> Args; 607fc5121a7SYaxun Liu for (unsigned I = 0; I != PtrArgLoc; ++I) 608fc5121a7SYaxun Liu Args.push_back(CI->getArgOperand(I)); 609fc5121a7SYaxun Liu Args.push_back(BCast); 610fc5121a7SYaxun Liu 611fc5121a7SYaxun Liu auto *NCI = B.CreateCall(F, Args); 612fc5121a7SYaxun Liu NCI->setAttributes(CI->getAttributes()); 613fc5121a7SYaxun Liu CI->replaceAllUsesWith(NCI); 614fc5121a7SYaxun Liu CI->dropAllReferences(); 615fc5121a7SYaxun Liu CI->eraseFromParent(); 616fc5121a7SYaxun Liu 617fc5121a7SYaxun Liu return true; 618fc5121a7SYaxun Liu } 619fc5121a7SYaxun Liu 6207f37794eSStanislav Mekhanoshin // This function returns false if no change; return true otherwise. 6217f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) { 6227f37794eSStanislav Mekhanoshin this->CI = CI; 6237f37794eSStanislav Mekhanoshin Function *Callee = CI->getCalledFunction(); 6247f37794eSStanislav Mekhanoshin 6257f37794eSStanislav Mekhanoshin // Ignore indirect calls. 6267f37794eSStanislav Mekhanoshin if (Callee == 0) return false; 6277f37794eSStanislav Mekhanoshin 6287f37794eSStanislav Mekhanoshin BasicBlock *BB = CI->getParent(); 6297f37794eSStanislav Mekhanoshin LLVMContext &Context = CI->getParent()->getContext(); 6307f37794eSStanislav Mekhanoshin IRBuilder<> B(Context); 6317f37794eSStanislav Mekhanoshin 6327f37794eSStanislav Mekhanoshin // Set the builder to the instruction after the call. 6337f37794eSStanislav Mekhanoshin B.SetInsertPoint(BB, CI->getIterator()); 6347f37794eSStanislav Mekhanoshin 6357f37794eSStanislav Mekhanoshin // Copy fast flags from the original call. 6367f37794eSStanislav Mekhanoshin if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI)) 6377f37794eSStanislav Mekhanoshin B.setFastMathFlags(FPOp->getFastMathFlags()); 6387f37794eSStanislav Mekhanoshin 639a9191c84SStanislav Mekhanoshin switch (Callee->getIntrinsicID()) { 640a9191c84SStanislav Mekhanoshin default: 641a9191c84SStanislav Mekhanoshin break; 642a9191c84SStanislav Mekhanoshin case Intrinsic::amdgcn_wavefrontsize: 643a9191c84SStanislav Mekhanoshin return !EnablePreLink && fold_wavefrontsize(CI, B); 644a9191c84SStanislav Mekhanoshin } 645a9191c84SStanislav Mekhanoshin 646a9191c84SStanislav Mekhanoshin FuncInfo FInfo; 647a9191c84SStanislav Mekhanoshin if (!parseFunctionName(Callee->getName(), &FInfo)) 648a9191c84SStanislav Mekhanoshin return false; 649a9191c84SStanislav Mekhanoshin 650a9191c84SStanislav Mekhanoshin // Further check the number of arguments to see if they match. 651a9191c84SStanislav Mekhanoshin if (CI->getNumArgOperands() != FInfo.getNumArgs()) 652a9191c84SStanislav Mekhanoshin return false; 653a9191c84SStanislav Mekhanoshin 6547f37794eSStanislav Mekhanoshin if (TDOFold(CI, FInfo)) 6557f37794eSStanislav Mekhanoshin return true; 6567f37794eSStanislav Mekhanoshin 6577f37794eSStanislav Mekhanoshin // Under unsafe-math, evaluate calls if possible. 6587f37794eSStanislav Mekhanoshin // According to Brian Sumner, we can do this for all f32 function calls 6597f37794eSStanislav Mekhanoshin // using host's double function calls. 6607f37794eSStanislav Mekhanoshin if (isUnsafeMath(CI) && evaluateCall(CI, FInfo)) 6617f37794eSStanislav Mekhanoshin return true; 6627f37794eSStanislav Mekhanoshin 663*dc6e8dfdSJacob Lambert // Specialized optimizations for each function call 6647f37794eSStanislav Mekhanoshin switch (FInfo.getId()) { 6657f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_RECIP: 6667f37794eSStanislav Mekhanoshin // skip vector function 6677f37794eSStanislav Mekhanoshin assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE || 6687f37794eSStanislav Mekhanoshin FInfo.getPrefix() == AMDGPULibFunc::HALF) && 6697f37794eSStanislav Mekhanoshin "recip must be an either native or half function"); 6707f37794eSStanislav Mekhanoshin return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo); 6717f37794eSStanislav Mekhanoshin 6727f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_DIVIDE: 6737f37794eSStanislav Mekhanoshin // skip vector function 6747f37794eSStanislav Mekhanoshin assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE || 6757f37794eSStanislav Mekhanoshin FInfo.getPrefix() == AMDGPULibFunc::HALF) && 6767f37794eSStanislav Mekhanoshin "divide must be an either native or half function"); 6777f37794eSStanislav Mekhanoshin return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo); 6787f37794eSStanislav Mekhanoshin 6797f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_POW: 6807f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_POWR: 6817f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_POWN: 6827f37794eSStanislav Mekhanoshin return fold_pow(CI, B, FInfo); 6837f37794eSStanislav Mekhanoshin 6847f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ROOTN: 6857f37794eSStanislav Mekhanoshin // skip vector function 6867f37794eSStanislav Mekhanoshin return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo); 6877f37794eSStanislav Mekhanoshin 6887f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_FMA: 6897f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_MAD: 6907f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_NFMA: 6917f37794eSStanislav Mekhanoshin // skip vector function 6927f37794eSStanislav Mekhanoshin return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo); 6937f37794eSStanislav Mekhanoshin 6947f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SQRT: 6957f37794eSStanislav Mekhanoshin return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo); 6967f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_COS: 6977f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SIN: 6987f37794eSStanislav Mekhanoshin if ((getArgType(FInfo) == AMDGPULibFunc::F32 || 6997f37794eSStanislav Mekhanoshin getArgType(FInfo) == AMDGPULibFunc::F64) 7007f37794eSStanislav Mekhanoshin && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX)) 7017f37794eSStanislav Mekhanoshin return fold_sincos(CI, B, AA); 7027f37794eSStanislav Mekhanoshin 7037f37794eSStanislav Mekhanoshin break; 704fc5121a7SYaxun Liu case AMDGPULibFunc::EI_READ_PIPE_2: 705fc5121a7SYaxun Liu case AMDGPULibFunc::EI_READ_PIPE_4: 706fc5121a7SYaxun Liu case AMDGPULibFunc::EI_WRITE_PIPE_2: 707fc5121a7SYaxun Liu case AMDGPULibFunc::EI_WRITE_PIPE_4: 708fc5121a7SYaxun Liu return fold_read_write_pipe(CI, B, FInfo); 7097f37794eSStanislav Mekhanoshin 7107f37794eSStanislav Mekhanoshin default: 7117f37794eSStanislav Mekhanoshin break; 7127f37794eSStanislav Mekhanoshin } 7137f37794eSStanislav Mekhanoshin 7147f37794eSStanislav Mekhanoshin return false; 7157f37794eSStanislav Mekhanoshin } 7167f37794eSStanislav Mekhanoshin 7177f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { 7187f37794eSStanislav Mekhanoshin // Table-Driven optimization 7197f37794eSStanislav Mekhanoshin const TableRef tr = getOptTable(FInfo.getId()); 7207f37794eSStanislav Mekhanoshin if (tr.size==0) 7217f37794eSStanislav Mekhanoshin return false; 7227f37794eSStanislav Mekhanoshin 7237f37794eSStanislav Mekhanoshin int const sz = (int)tr.size; 7247f37794eSStanislav Mekhanoshin const TableEntry * const ftbl = tr.table; 7257f37794eSStanislav Mekhanoshin Value *opr0 = CI->getArgOperand(0); 7267f37794eSStanislav Mekhanoshin 7277f37794eSStanislav Mekhanoshin if (getVecSize(FInfo) > 1) { 7287f37794eSStanislav Mekhanoshin if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) { 7297f37794eSStanislav Mekhanoshin SmallVector<double, 0> DVal; 7307f37794eSStanislav Mekhanoshin for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) { 7317f37794eSStanislav Mekhanoshin ConstantFP *eltval = dyn_cast<ConstantFP>( 7327f37794eSStanislav Mekhanoshin CV->getElementAsConstant((unsigned)eltNo)); 7337f37794eSStanislav Mekhanoshin assert(eltval && "Non-FP arguments in math function!"); 7347f37794eSStanislav Mekhanoshin bool found = false; 7357f37794eSStanislav Mekhanoshin for (int i=0; i < sz; ++i) { 7367f37794eSStanislav Mekhanoshin if (eltval->isExactlyValue(ftbl[i].input)) { 7377f37794eSStanislav Mekhanoshin DVal.push_back(ftbl[i].result); 7387f37794eSStanislav Mekhanoshin found = true; 7397f37794eSStanislav Mekhanoshin break; 7407f37794eSStanislav Mekhanoshin } 7417f37794eSStanislav Mekhanoshin } 7427f37794eSStanislav Mekhanoshin if (!found) { 7437f37794eSStanislav Mekhanoshin // This vector constants not handled yet. 7447f37794eSStanislav Mekhanoshin return false; 7457f37794eSStanislav Mekhanoshin } 7467f37794eSStanislav Mekhanoshin } 7477f37794eSStanislav Mekhanoshin LLVMContext &context = CI->getParent()->getParent()->getContext(); 7487f37794eSStanislav Mekhanoshin Constant *nval; 7497f37794eSStanislav Mekhanoshin if (getArgType(FInfo) == AMDGPULibFunc::F32) { 7507f37794eSStanislav Mekhanoshin SmallVector<float, 0> FVal; 7517f37794eSStanislav Mekhanoshin for (unsigned i = 0; i < DVal.size(); ++i) { 7527f37794eSStanislav Mekhanoshin FVal.push_back((float)DVal[i]); 7537f37794eSStanislav Mekhanoshin } 7547f37794eSStanislav Mekhanoshin ArrayRef<float> tmp(FVal); 7557f37794eSStanislav Mekhanoshin nval = ConstantDataVector::get(context, tmp); 7567f37794eSStanislav Mekhanoshin } else { // F64 7577f37794eSStanislav Mekhanoshin ArrayRef<double> tmp(DVal); 7587f37794eSStanislav Mekhanoshin nval = ConstantDataVector::get(context, tmp); 7597f37794eSStanislav Mekhanoshin } 760d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 7617f37794eSStanislav Mekhanoshin replaceCall(nval); 7627f37794eSStanislav Mekhanoshin return true; 7637f37794eSStanislav Mekhanoshin } 7647f37794eSStanislav Mekhanoshin } else { 7657f37794eSStanislav Mekhanoshin // Scalar version 7667f37794eSStanislav Mekhanoshin if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { 7677f37794eSStanislav Mekhanoshin for (int i = 0; i < sz; ++i) { 7687f37794eSStanislav Mekhanoshin if (CF->isExactlyValue(ftbl[i].input)) { 7697f37794eSStanislav Mekhanoshin Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result); 770d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 7717f37794eSStanislav Mekhanoshin replaceCall(nval); 7727f37794eSStanislav Mekhanoshin return true; 7737f37794eSStanislav Mekhanoshin } 7747f37794eSStanislav Mekhanoshin } 7757f37794eSStanislav Mekhanoshin } 7767f37794eSStanislav Mekhanoshin } 7777f37794eSStanislav Mekhanoshin 7787f37794eSStanislav Mekhanoshin return false; 7797f37794eSStanislav Mekhanoshin } 7807f37794eSStanislav Mekhanoshin 7817f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) { 7827f37794eSStanislav Mekhanoshin Module *M = CI->getModule(); 7837f37794eSStanislav Mekhanoshin if (getArgType(FInfo) != AMDGPULibFunc::F32 || 7847f37794eSStanislav Mekhanoshin FInfo.getPrefix() != AMDGPULibFunc::NOPFX || 7857f37794eSStanislav Mekhanoshin !HasNative(FInfo.getId())) 7867f37794eSStanislav Mekhanoshin return false; 7877f37794eSStanislav Mekhanoshin 7887f37794eSStanislav Mekhanoshin AMDGPULibFunc nf = FInfo; 7897f37794eSStanislav Mekhanoshin nf.setPrefix(AMDGPULibFunc::NATIVE); 79013680223SJames Y Knight if (FunctionCallee FPExpr = getFunction(M, nf)) { 791d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> "); 7927f37794eSStanislav Mekhanoshin 7937f37794eSStanislav Mekhanoshin CI->setCalledFunction(FPExpr); 7947f37794eSStanislav Mekhanoshin 795d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << *CI << '\n'); 7967f37794eSStanislav Mekhanoshin 7977f37794eSStanislav Mekhanoshin return true; 7987f37794eSStanislav Mekhanoshin } 7997f37794eSStanislav Mekhanoshin return false; 8007f37794eSStanislav Mekhanoshin } 8017f37794eSStanislav Mekhanoshin 8027f37794eSStanislav Mekhanoshin // [native_]half_recip(c) ==> 1.0/c 8037f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B, 8047f37794eSStanislav Mekhanoshin const FuncInfo &FInfo) { 8057f37794eSStanislav Mekhanoshin Value *opr0 = CI->getArgOperand(0); 8067f37794eSStanislav Mekhanoshin if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { 8077f37794eSStanislav Mekhanoshin // Just create a normal div. Later, InstCombine will be able 8087f37794eSStanislav Mekhanoshin // to compute the divide into a constant (avoid check float infinity 8097f37794eSStanislav Mekhanoshin // or subnormal at this point). 8107f37794eSStanislav Mekhanoshin Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0), 8117f37794eSStanislav Mekhanoshin opr0, 8127f37794eSStanislav Mekhanoshin "recip2div"); 813d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 8147f37794eSStanislav Mekhanoshin replaceCall(nval); 8157f37794eSStanislav Mekhanoshin return true; 8167f37794eSStanislav Mekhanoshin } 8177f37794eSStanislav Mekhanoshin return false; 8187f37794eSStanislav Mekhanoshin } 8197f37794eSStanislav Mekhanoshin 8207f37794eSStanislav Mekhanoshin // [native_]half_divide(x, c) ==> x/c 8217f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B, 8227f37794eSStanislav Mekhanoshin const FuncInfo &FInfo) { 8237f37794eSStanislav Mekhanoshin Value *opr0 = CI->getArgOperand(0); 8247f37794eSStanislav Mekhanoshin Value *opr1 = CI->getArgOperand(1); 8257f37794eSStanislav Mekhanoshin ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0); 8267f37794eSStanislav Mekhanoshin ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1); 8277f37794eSStanislav Mekhanoshin 8287f37794eSStanislav Mekhanoshin if ((CF0 && CF1) || // both are constants 8297f37794eSStanislav Mekhanoshin (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32))) 8307f37794eSStanislav Mekhanoshin // CF1 is constant && f32 divide 8317f37794eSStanislav Mekhanoshin { 8327f37794eSStanislav Mekhanoshin Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0), 8337f37794eSStanislav Mekhanoshin opr1, "__div2recip"); 8347f37794eSStanislav Mekhanoshin Value *nval = B.CreateFMul(opr0, nval1, "__div2mul"); 8357f37794eSStanislav Mekhanoshin replaceCall(nval); 8367f37794eSStanislav Mekhanoshin return true; 8377f37794eSStanislav Mekhanoshin } 8387f37794eSStanislav Mekhanoshin return false; 8397f37794eSStanislav Mekhanoshin } 8407f37794eSStanislav Mekhanoshin 8417f37794eSStanislav Mekhanoshin namespace llvm { 8427f37794eSStanislav Mekhanoshin static double log2(double V) { 843ae79a2c3SDavid Tenty #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L 8447f37794eSStanislav Mekhanoshin return ::log2(V); 8457f37794eSStanislav Mekhanoshin #else 846c57a9dc4SEvandro Menezes return log(V) / numbers::ln2; 8477f37794eSStanislav Mekhanoshin #endif 8487f37794eSStanislav Mekhanoshin } 8497f37794eSStanislav Mekhanoshin } 8507f37794eSStanislav Mekhanoshin 8517f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, 8527f37794eSStanislav Mekhanoshin const FuncInfo &FInfo) { 8537f37794eSStanislav Mekhanoshin assert((FInfo.getId() == AMDGPULibFunc::EI_POW || 8547f37794eSStanislav Mekhanoshin FInfo.getId() == AMDGPULibFunc::EI_POWR || 8557f37794eSStanislav Mekhanoshin FInfo.getId() == AMDGPULibFunc::EI_POWN) && 8567f37794eSStanislav Mekhanoshin "fold_pow: encounter a wrong function call"); 8577f37794eSStanislav Mekhanoshin 8587f37794eSStanislav Mekhanoshin Value *opr0, *opr1; 8597f37794eSStanislav Mekhanoshin ConstantFP *CF; 8607f37794eSStanislav Mekhanoshin ConstantInt *CINT; 8617f37794eSStanislav Mekhanoshin ConstantAggregateZero *CZero; 8627f37794eSStanislav Mekhanoshin Type *eltType; 8637f37794eSStanislav Mekhanoshin 8647f37794eSStanislav Mekhanoshin opr0 = CI->getArgOperand(0); 8657f37794eSStanislav Mekhanoshin opr1 = CI->getArgOperand(1); 8667f37794eSStanislav Mekhanoshin CZero = dyn_cast<ConstantAggregateZero>(opr1); 8677f37794eSStanislav Mekhanoshin if (getVecSize(FInfo) == 1) { 8687f37794eSStanislav Mekhanoshin eltType = opr0->getType(); 8697f37794eSStanislav Mekhanoshin CF = dyn_cast<ConstantFP>(opr1); 8707f37794eSStanislav Mekhanoshin CINT = dyn_cast<ConstantInt>(opr1); 8717f37794eSStanislav Mekhanoshin } else { 8727f37794eSStanislav Mekhanoshin VectorType *VTy = dyn_cast<VectorType>(opr0->getType()); 8737f37794eSStanislav Mekhanoshin assert(VTy && "Oprand of vector function should be of vectortype"); 8747f37794eSStanislav Mekhanoshin eltType = VTy->getElementType(); 8757f37794eSStanislav Mekhanoshin ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1); 8767f37794eSStanislav Mekhanoshin 8777f37794eSStanislav Mekhanoshin // Now, only Handle vector const whose elements have the same value. 8787f37794eSStanislav Mekhanoshin CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr; 8797f37794eSStanislav Mekhanoshin CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr; 8807f37794eSStanislav Mekhanoshin } 8817f37794eSStanislav Mekhanoshin 8827f37794eSStanislav Mekhanoshin // No unsafe math , no constant argument, do nothing 8837f37794eSStanislav Mekhanoshin if (!isUnsafeMath(CI) && !CF && !CINT && !CZero) 8847f37794eSStanislav Mekhanoshin return false; 8857f37794eSStanislav Mekhanoshin 8867f37794eSStanislav Mekhanoshin // 0x1111111 means that we don't do anything for this call. 8877f37794eSStanislav Mekhanoshin int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111); 8887f37794eSStanislav Mekhanoshin 8897f37794eSStanislav Mekhanoshin if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) { 8907f37794eSStanislav Mekhanoshin // pow/powr/pown(x, 0) == 1 891d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n"); 8927f37794eSStanislav Mekhanoshin Constant *cnval = ConstantFP::get(eltType, 1.0); 8937f37794eSStanislav Mekhanoshin if (getVecSize(FInfo) > 1) { 8947f37794eSStanislav Mekhanoshin cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 8957f37794eSStanislav Mekhanoshin } 8967f37794eSStanislav Mekhanoshin replaceCall(cnval); 8977f37794eSStanislav Mekhanoshin return true; 8987f37794eSStanislav Mekhanoshin } 8997f37794eSStanislav Mekhanoshin if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) { 9007f37794eSStanislav Mekhanoshin // pow/powr/pown(x, 1.0) = x 901d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n"); 9027f37794eSStanislav Mekhanoshin replaceCall(opr0); 9037f37794eSStanislav Mekhanoshin return true; 9047f37794eSStanislav Mekhanoshin } 9057f37794eSStanislav Mekhanoshin if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) { 9067f37794eSStanislav Mekhanoshin // pow/powr/pown(x, 2.0) = x*x 907d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0 908d34e60caSNicola Zaghen << "\n"); 9097f37794eSStanislav Mekhanoshin Value *nval = B.CreateFMul(opr0, opr0, "__pow2"); 9107f37794eSStanislav Mekhanoshin replaceCall(nval); 9117f37794eSStanislav Mekhanoshin return true; 9127f37794eSStanislav Mekhanoshin } 9137f37794eSStanislav Mekhanoshin if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) { 9147f37794eSStanislav Mekhanoshin // pow/powr/pown(x, -1.0) = 1.0/x 915d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n"); 9167f37794eSStanislav Mekhanoshin Constant *cnval = ConstantFP::get(eltType, 1.0); 9177f37794eSStanislav Mekhanoshin if (getVecSize(FInfo) > 1) { 9187f37794eSStanislav Mekhanoshin cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 9197f37794eSStanislav Mekhanoshin } 9207f37794eSStanislav Mekhanoshin Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip"); 9217f37794eSStanislav Mekhanoshin replaceCall(nval); 9227f37794eSStanislav Mekhanoshin return true; 9237f37794eSStanislav Mekhanoshin } 9247f37794eSStanislav Mekhanoshin 9257f37794eSStanislav Mekhanoshin Module *M = CI->getModule(); 9267f37794eSStanislav Mekhanoshin if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) { 9277f37794eSStanislav Mekhanoshin // pow[r](x, [-]0.5) = sqrt(x) 9287f37794eSStanislav Mekhanoshin bool issqrt = CF->isExactlyValue(0.5); 92913680223SJames Y Knight if (FunctionCallee FPExpr = 93013680223SJames Y Knight getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT 93113680223SJames Y Knight : AMDGPULibFunc::EI_RSQRT, 93213680223SJames Y Knight FInfo))) { 933d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " 9347f37794eSStanislav Mekhanoshin << FInfo.getName().c_str() << "(" << *opr0 << ")\n"); 9357f37794eSStanislav Mekhanoshin Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt" 9367f37794eSStanislav Mekhanoshin : "__pow2rsqrt"); 9377f37794eSStanislav Mekhanoshin replaceCall(nval); 9387f37794eSStanislav Mekhanoshin return true; 9397f37794eSStanislav Mekhanoshin } 9407f37794eSStanislav Mekhanoshin } 9417f37794eSStanislav Mekhanoshin 9427f37794eSStanislav Mekhanoshin if (!isUnsafeMath(CI)) 9437f37794eSStanislav Mekhanoshin return false; 9447f37794eSStanislav Mekhanoshin 9457f37794eSStanislav Mekhanoshin // Unsafe Math optimization 9467f37794eSStanislav Mekhanoshin 9477f37794eSStanislav Mekhanoshin // Remember that ci_opr1 is set if opr1 is integral 9487f37794eSStanislav Mekhanoshin if (CF) { 9497f37794eSStanislav Mekhanoshin double dval = (getArgType(FInfo) == AMDGPULibFunc::F32) 9507f37794eSStanislav Mekhanoshin ? (double)CF->getValueAPF().convertToFloat() 9517f37794eSStanislav Mekhanoshin : CF->getValueAPF().convertToDouble(); 9527f37794eSStanislav Mekhanoshin int ival = (int)dval; 9537f37794eSStanislav Mekhanoshin if ((double)ival == dval) { 9547f37794eSStanislav Mekhanoshin ci_opr1 = ival; 9557f37794eSStanislav Mekhanoshin } else 9567f37794eSStanislav Mekhanoshin ci_opr1 = 0x11111111; 9577f37794eSStanislav Mekhanoshin } 9587f37794eSStanislav Mekhanoshin 9597f37794eSStanislav Mekhanoshin // pow/powr/pown(x, c) = [1/](x*x*..x); where 9607f37794eSStanislav Mekhanoshin // trunc(c) == c && the number of x == c && |c| <= 12 9617f37794eSStanislav Mekhanoshin unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1; 9627f37794eSStanislav Mekhanoshin if (abs_opr1 <= 12) { 9637f37794eSStanislav Mekhanoshin Constant *cnval; 9647f37794eSStanislav Mekhanoshin Value *nval; 9657f37794eSStanislav Mekhanoshin if (abs_opr1 == 0) { 9667f37794eSStanislav Mekhanoshin cnval = ConstantFP::get(eltType, 1.0); 9677f37794eSStanislav Mekhanoshin if (getVecSize(FInfo) > 1) { 9687f37794eSStanislav Mekhanoshin cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 9697f37794eSStanislav Mekhanoshin } 9707f37794eSStanislav Mekhanoshin nval = cnval; 9717f37794eSStanislav Mekhanoshin } else { 9727f37794eSStanislav Mekhanoshin Value *valx2 = nullptr; 9737f37794eSStanislav Mekhanoshin nval = nullptr; 9747f37794eSStanislav Mekhanoshin while (abs_opr1 > 0) { 9757f37794eSStanislav Mekhanoshin valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0; 9767f37794eSStanislav Mekhanoshin if (abs_opr1 & 1) { 9777f37794eSStanislav Mekhanoshin nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2; 9787f37794eSStanislav Mekhanoshin } 9797f37794eSStanislav Mekhanoshin abs_opr1 >>= 1; 9807f37794eSStanislav Mekhanoshin } 9817f37794eSStanislav Mekhanoshin } 9827f37794eSStanislav Mekhanoshin 9837f37794eSStanislav Mekhanoshin if (ci_opr1 < 0) { 9847f37794eSStanislav Mekhanoshin cnval = ConstantFP::get(eltType, 1.0); 9857f37794eSStanislav Mekhanoshin if (getVecSize(FInfo) > 1) { 9867f37794eSStanislav Mekhanoshin cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 9877f37794eSStanislav Mekhanoshin } 9887f37794eSStanislav Mekhanoshin nval = B.CreateFDiv(cnval, nval, "__1powprod"); 9897f37794eSStanislav Mekhanoshin } 990d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " 991d34e60caSNicola Zaghen << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 992d34e60caSNicola Zaghen << ")\n"); 9937f37794eSStanislav Mekhanoshin replaceCall(nval); 9947f37794eSStanislav Mekhanoshin return true; 9957f37794eSStanislav Mekhanoshin } 9967f37794eSStanislav Mekhanoshin 9977f37794eSStanislav Mekhanoshin // powr ---> exp2(y * log2(x)) 9987f37794eSStanislav Mekhanoshin // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31)) 99913680223SJames Y Knight FunctionCallee ExpExpr = 100013680223SJames Y Knight getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo)); 10017f37794eSStanislav Mekhanoshin if (!ExpExpr) 10027f37794eSStanislav Mekhanoshin return false; 10037f37794eSStanislav Mekhanoshin 10047f37794eSStanislav Mekhanoshin bool needlog = false; 10057f37794eSStanislav Mekhanoshin bool needabs = false; 10067f37794eSStanislav Mekhanoshin bool needcopysign = false; 10077f37794eSStanislav Mekhanoshin Constant *cnval = nullptr; 10087f37794eSStanislav Mekhanoshin if (getVecSize(FInfo) == 1) { 10097f37794eSStanislav Mekhanoshin CF = dyn_cast<ConstantFP>(opr0); 10107f37794eSStanislav Mekhanoshin 10117f37794eSStanislav Mekhanoshin if (CF) { 10127f37794eSStanislav Mekhanoshin double V = (getArgType(FInfo) == AMDGPULibFunc::F32) 10137f37794eSStanislav Mekhanoshin ? (double)CF->getValueAPF().convertToFloat() 10147f37794eSStanislav Mekhanoshin : CF->getValueAPF().convertToDouble(); 10157f37794eSStanislav Mekhanoshin 10167f37794eSStanislav Mekhanoshin V = log2(std::abs(V)); 10177f37794eSStanislav Mekhanoshin cnval = ConstantFP::get(eltType, V); 10187f37794eSStanislav Mekhanoshin needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) && 10197f37794eSStanislav Mekhanoshin CF->isNegative(); 10207f37794eSStanislav Mekhanoshin } else { 10217f37794eSStanislav Mekhanoshin needlog = true; 10227f37794eSStanislav Mekhanoshin needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR && 10237f37794eSStanislav Mekhanoshin (!CF || CF->isNegative()); 10247f37794eSStanislav Mekhanoshin } 10257f37794eSStanislav Mekhanoshin } else { 10267f37794eSStanislav Mekhanoshin ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0); 10277f37794eSStanislav Mekhanoshin 10287f37794eSStanislav Mekhanoshin if (!CDV) { 10297f37794eSStanislav Mekhanoshin needlog = true; 10307f37794eSStanislav Mekhanoshin needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR; 10317f37794eSStanislav Mekhanoshin } else { 10327f37794eSStanislav Mekhanoshin assert ((int)CDV->getNumElements() == getVecSize(FInfo) && 10337f37794eSStanislav Mekhanoshin "Wrong vector size detected"); 10347f37794eSStanislav Mekhanoshin 10357f37794eSStanislav Mekhanoshin SmallVector<double, 0> DVal; 10367f37794eSStanislav Mekhanoshin for (int i=0; i < getVecSize(FInfo); ++i) { 10377f37794eSStanislav Mekhanoshin double V = (getArgType(FInfo) == AMDGPULibFunc::F32) 10387f37794eSStanislav Mekhanoshin ? (double)CDV->getElementAsFloat(i) 10397f37794eSStanislav Mekhanoshin : CDV->getElementAsDouble(i); 10407f37794eSStanislav Mekhanoshin if (V < 0.0) needcopysign = true; 10417f37794eSStanislav Mekhanoshin V = log2(std::abs(V)); 10427f37794eSStanislav Mekhanoshin DVal.push_back(V); 10437f37794eSStanislav Mekhanoshin } 10447f37794eSStanislav Mekhanoshin if (getArgType(FInfo) == AMDGPULibFunc::F32) { 10457f37794eSStanislav Mekhanoshin SmallVector<float, 0> FVal; 10467f37794eSStanislav Mekhanoshin for (unsigned i=0; i < DVal.size(); ++i) { 10477f37794eSStanislav Mekhanoshin FVal.push_back((float)DVal[i]); 10487f37794eSStanislav Mekhanoshin } 10497f37794eSStanislav Mekhanoshin ArrayRef<float> tmp(FVal); 10507f37794eSStanislav Mekhanoshin cnval = ConstantDataVector::get(M->getContext(), tmp); 10517f37794eSStanislav Mekhanoshin } else { 10527f37794eSStanislav Mekhanoshin ArrayRef<double> tmp(DVal); 10537f37794eSStanislav Mekhanoshin cnval = ConstantDataVector::get(M->getContext(), tmp); 10547f37794eSStanislav Mekhanoshin } 10557f37794eSStanislav Mekhanoshin } 10567f37794eSStanislav Mekhanoshin } 10577f37794eSStanislav Mekhanoshin 10587f37794eSStanislav Mekhanoshin if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) { 10597f37794eSStanislav Mekhanoshin // We cannot handle corner cases for a general pow() function, give up 10607f37794eSStanislav Mekhanoshin // unless y is a constant integral value. Then proceed as if it were pown. 10617f37794eSStanislav Mekhanoshin if (getVecSize(FInfo) == 1) { 10627f37794eSStanislav Mekhanoshin if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) { 10637f37794eSStanislav Mekhanoshin double y = (getArgType(FInfo) == AMDGPULibFunc::F32) 10647f37794eSStanislav Mekhanoshin ? (double)CF->getValueAPF().convertToFloat() 10657f37794eSStanislav Mekhanoshin : CF->getValueAPF().convertToDouble(); 10667f37794eSStanislav Mekhanoshin if (y != (double)(int64_t)y) 10677f37794eSStanislav Mekhanoshin return false; 10687f37794eSStanislav Mekhanoshin } else 10697f37794eSStanislav Mekhanoshin return false; 10707f37794eSStanislav Mekhanoshin } else { 10717f37794eSStanislav Mekhanoshin if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) { 10727f37794eSStanislav Mekhanoshin for (int i=0; i < getVecSize(FInfo); ++i) { 10737f37794eSStanislav Mekhanoshin double y = (getArgType(FInfo) == AMDGPULibFunc::F32) 10747f37794eSStanislav Mekhanoshin ? (double)CDV->getElementAsFloat(i) 10757f37794eSStanislav Mekhanoshin : CDV->getElementAsDouble(i); 10767f37794eSStanislav Mekhanoshin if (y != (double)(int64_t)y) 10777f37794eSStanislav Mekhanoshin return false; 10787f37794eSStanislav Mekhanoshin } 10797f37794eSStanislav Mekhanoshin } else 10807f37794eSStanislav Mekhanoshin return false; 10817f37794eSStanislav Mekhanoshin } 10827f37794eSStanislav Mekhanoshin } 10837f37794eSStanislav Mekhanoshin 10847f37794eSStanislav Mekhanoshin Value *nval; 10857f37794eSStanislav Mekhanoshin if (needabs) { 108613680223SJames Y Knight FunctionCallee AbsExpr = 108713680223SJames Y Knight getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo)); 10887f37794eSStanislav Mekhanoshin if (!AbsExpr) 10897f37794eSStanislav Mekhanoshin return false; 10907f37794eSStanislav Mekhanoshin nval = CreateCallEx(B, AbsExpr, opr0, "__fabs"); 10917f37794eSStanislav Mekhanoshin } else { 10927f37794eSStanislav Mekhanoshin nval = cnval ? cnval : opr0; 10937f37794eSStanislav Mekhanoshin } 10947f37794eSStanislav Mekhanoshin if (needlog) { 109513680223SJames Y Knight FunctionCallee LogExpr = 109613680223SJames Y Knight getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo)); 10977f37794eSStanislav Mekhanoshin if (!LogExpr) 10987f37794eSStanislav Mekhanoshin return false; 10997f37794eSStanislav Mekhanoshin nval = CreateCallEx(B,LogExpr, nval, "__log2"); 11007f37794eSStanislav Mekhanoshin } 11017f37794eSStanislav Mekhanoshin 11027f37794eSStanislav Mekhanoshin if (FInfo.getId() == AMDGPULibFunc::EI_POWN) { 11037f37794eSStanislav Mekhanoshin // convert int(32) to fp(f32 or f64) 11047f37794eSStanislav Mekhanoshin opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F"); 11057f37794eSStanislav Mekhanoshin } 11067f37794eSStanislav Mekhanoshin nval = B.CreateFMul(opr1, nval, "__ylogx"); 11077f37794eSStanislav Mekhanoshin nval = CreateCallEx(B,ExpExpr, nval, "__exp2"); 11087f37794eSStanislav Mekhanoshin 11097f37794eSStanislav Mekhanoshin if (needcopysign) { 11107f37794eSStanislav Mekhanoshin Value *opr_n; 11117f37794eSStanislav Mekhanoshin Type* rTy = opr0->getType(); 11127f37794eSStanislav Mekhanoshin Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty(); 11137f37794eSStanislav Mekhanoshin Type *nTy = nTyS; 11143254a001SChristopher Tetreault if (const auto *vTy = dyn_cast<FixedVectorType>(rTy)) 11153254a001SChristopher Tetreault nTy = FixedVectorType::get(nTyS, vTy); 11167f37794eSStanislav Mekhanoshin unsigned size = nTy->getScalarSizeInBits(); 11177f37794eSStanislav Mekhanoshin opr_n = CI->getArgOperand(1); 11187f37794eSStanislav Mekhanoshin if (opr_n->getType()->isIntegerTy()) 11197f37794eSStanislav Mekhanoshin opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou"); 11207f37794eSStanislav Mekhanoshin else 11217f37794eSStanislav Mekhanoshin opr_n = B.CreateFPToSI(opr1, nTy, "__ytou"); 11227f37794eSStanislav Mekhanoshin 11237f37794eSStanislav Mekhanoshin Value *sign = B.CreateShl(opr_n, size-1, "__yeven"); 11247f37794eSStanislav Mekhanoshin sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign"); 11257f37794eSStanislav Mekhanoshin nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign); 11267f37794eSStanislav Mekhanoshin nval = B.CreateBitCast(nval, opr0->getType()); 11277f37794eSStanislav Mekhanoshin } 11287f37794eSStanislav Mekhanoshin 1129d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " 11307f37794eSStanislav Mekhanoshin << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n"); 11317f37794eSStanislav Mekhanoshin replaceCall(nval); 11327f37794eSStanislav Mekhanoshin 11337f37794eSStanislav Mekhanoshin return true; 11347f37794eSStanislav Mekhanoshin } 11357f37794eSStanislav Mekhanoshin 11367f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B, 11377f37794eSStanislav Mekhanoshin const FuncInfo &FInfo) { 11387f37794eSStanislav Mekhanoshin Value *opr0 = CI->getArgOperand(0); 11397f37794eSStanislav Mekhanoshin Value *opr1 = CI->getArgOperand(1); 11407f37794eSStanislav Mekhanoshin 11417f37794eSStanislav Mekhanoshin ConstantInt *CINT = dyn_cast<ConstantInt>(opr1); 11427f37794eSStanislav Mekhanoshin if (!CINT) { 11437f37794eSStanislav Mekhanoshin return false; 11447f37794eSStanislav Mekhanoshin } 11457f37794eSStanislav Mekhanoshin int ci_opr1 = (int)CINT->getSExtValue(); 11467f37794eSStanislav Mekhanoshin if (ci_opr1 == 1) { // rootn(x, 1) = x 1147d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n"); 11487f37794eSStanislav Mekhanoshin replaceCall(opr0); 11497f37794eSStanislav Mekhanoshin return true; 11507f37794eSStanislav Mekhanoshin } 11517f37794eSStanislav Mekhanoshin if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x) 11527f37794eSStanislav Mekhanoshin Module *M = CI->getModule(); 115313680223SJames Y Knight if (FunctionCallee FPExpr = 115413680223SJames Y Knight getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { 1155d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n"); 11567f37794eSStanislav Mekhanoshin Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt"); 11577f37794eSStanislav Mekhanoshin replaceCall(nval); 11587f37794eSStanislav Mekhanoshin return true; 11597f37794eSStanislav Mekhanoshin } 11607f37794eSStanislav Mekhanoshin } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x) 11617f37794eSStanislav Mekhanoshin Module *M = CI->getModule(); 116213680223SJames Y Knight if (FunctionCallee FPExpr = 116313680223SJames Y Knight getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) { 1164d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n"); 11657f37794eSStanislav Mekhanoshin Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt"); 11667f37794eSStanislav Mekhanoshin replaceCall(nval); 11677f37794eSStanislav Mekhanoshin return true; 11687f37794eSStanislav Mekhanoshin } 11697f37794eSStanislav Mekhanoshin } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x 1170d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n"); 11717f37794eSStanislav Mekhanoshin Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), 11727f37794eSStanislav Mekhanoshin opr0, 11737f37794eSStanislav Mekhanoshin "__rootn2div"); 11747f37794eSStanislav Mekhanoshin replaceCall(nval); 11757f37794eSStanislav Mekhanoshin return true; 11767f37794eSStanislav Mekhanoshin } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x) 11777f37794eSStanislav Mekhanoshin Module *M = CI->getModule(); 117813680223SJames Y Knight if (FunctionCallee FPExpr = 117913680223SJames Y Knight getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) { 1180d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0 1181d34e60caSNicola Zaghen << ")\n"); 11827f37794eSStanislav Mekhanoshin Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt"); 11837f37794eSStanislav Mekhanoshin replaceCall(nval); 11847f37794eSStanislav Mekhanoshin return true; 11857f37794eSStanislav Mekhanoshin } 11867f37794eSStanislav Mekhanoshin } 11877f37794eSStanislav Mekhanoshin return false; 11887f37794eSStanislav Mekhanoshin } 11897f37794eSStanislav Mekhanoshin 11907f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B, 11917f37794eSStanislav Mekhanoshin const FuncInfo &FInfo) { 11927f37794eSStanislav Mekhanoshin Value *opr0 = CI->getArgOperand(0); 11937f37794eSStanislav Mekhanoshin Value *opr1 = CI->getArgOperand(1); 11947f37794eSStanislav Mekhanoshin Value *opr2 = CI->getArgOperand(2); 11957f37794eSStanislav Mekhanoshin 11967f37794eSStanislav Mekhanoshin ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0); 11977f37794eSStanislav Mekhanoshin ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1); 11987f37794eSStanislav Mekhanoshin if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) { 11997f37794eSStanislav Mekhanoshin // fma/mad(a, b, c) = c if a=0 || b=0 1200d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n"); 12017f37794eSStanislav Mekhanoshin replaceCall(opr2); 12027f37794eSStanislav Mekhanoshin return true; 12037f37794eSStanislav Mekhanoshin } 12047f37794eSStanislav Mekhanoshin if (CF0 && CF0->isExactlyValue(1.0f)) { 12057f37794eSStanislav Mekhanoshin // fma/mad(a, b, c) = b+c if a=1 1206d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2 1207d34e60caSNicola Zaghen << "\n"); 12087f37794eSStanislav Mekhanoshin Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd"); 12097f37794eSStanislav Mekhanoshin replaceCall(nval); 12107f37794eSStanislav Mekhanoshin return true; 12117f37794eSStanislav Mekhanoshin } 12127f37794eSStanislav Mekhanoshin if (CF1 && CF1->isExactlyValue(1.0f)) { 12137f37794eSStanislav Mekhanoshin // fma/mad(a, b, c) = a+c if b=1 1214d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2 1215d34e60caSNicola Zaghen << "\n"); 12167f37794eSStanislav Mekhanoshin Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd"); 12177f37794eSStanislav Mekhanoshin replaceCall(nval); 12187f37794eSStanislav Mekhanoshin return true; 12197f37794eSStanislav Mekhanoshin } 12207f37794eSStanislav Mekhanoshin if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) { 12217f37794eSStanislav Mekhanoshin if (CF->isZero()) { 12227f37794eSStanislav Mekhanoshin // fma/mad(a, b, c) = a*b if c=0 1223d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " 1224d34e60caSNicola Zaghen << *opr1 << "\n"); 12257f37794eSStanislav Mekhanoshin Value *nval = B.CreateFMul(opr0, opr1, "fmamul"); 12267f37794eSStanislav Mekhanoshin replaceCall(nval); 12277f37794eSStanislav Mekhanoshin return true; 12287f37794eSStanislav Mekhanoshin } 12297f37794eSStanislav Mekhanoshin } 12307f37794eSStanislav Mekhanoshin 12317f37794eSStanislav Mekhanoshin return false; 12327f37794eSStanislav Mekhanoshin } 12337f37794eSStanislav Mekhanoshin 1234*dc6e8dfdSJacob Lambert // Get a scalar native builtin single argument FP function 123513680223SJames Y Knight FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M, 123613680223SJames Y Knight const FuncInfo &FInfo) { 1237312c557bSStanislav Mekhanoshin if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId())) 1238312c557bSStanislav Mekhanoshin return nullptr; 12397f37794eSStanislav Mekhanoshin FuncInfo nf = FInfo; 12407f37794eSStanislav Mekhanoshin nf.setPrefix(AMDGPULibFunc::NATIVE); 12417f37794eSStanislav Mekhanoshin return getFunction(M, nf); 12427f37794eSStanislav Mekhanoshin } 12437f37794eSStanislav Mekhanoshin 12447f37794eSStanislav Mekhanoshin // fold sqrt -> native_sqrt (x) 12457f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B, 12467f37794eSStanislav Mekhanoshin const FuncInfo &FInfo) { 1247312c557bSStanislav Mekhanoshin if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) && 12487f37794eSStanislav Mekhanoshin (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) { 124913680223SJames Y Knight if (FunctionCallee FPExpr = getNativeFunction( 12507f37794eSStanislav Mekhanoshin CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { 12517f37794eSStanislav Mekhanoshin Value *opr0 = CI->getArgOperand(0); 1252d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " 12537f37794eSStanislav Mekhanoshin << "sqrt(" << *opr0 << ")\n"); 12547f37794eSStanislav Mekhanoshin Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt"); 12557f37794eSStanislav Mekhanoshin replaceCall(nval); 12567f37794eSStanislav Mekhanoshin return true; 12577f37794eSStanislav Mekhanoshin } 12587f37794eSStanislav Mekhanoshin } 12597f37794eSStanislav Mekhanoshin return false; 12607f37794eSStanislav Mekhanoshin } 12617f37794eSStanislav Mekhanoshin 12627f37794eSStanislav Mekhanoshin // fold sin, cos -> sincos. 12637f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, 12647f37794eSStanislav Mekhanoshin AliasAnalysis *AA) { 12657f37794eSStanislav Mekhanoshin AMDGPULibFunc fInfo; 12667f37794eSStanislav Mekhanoshin if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo)) 12677f37794eSStanislav Mekhanoshin return false; 12687f37794eSStanislav Mekhanoshin 12697f37794eSStanislav Mekhanoshin assert(fInfo.getId() == AMDGPULibFunc::EI_SIN || 12707f37794eSStanislav Mekhanoshin fInfo.getId() == AMDGPULibFunc::EI_COS); 12717f37794eSStanislav Mekhanoshin bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN; 12727f37794eSStanislav Mekhanoshin 12737f37794eSStanislav Mekhanoshin Value *CArgVal = CI->getArgOperand(0); 12747f37794eSStanislav Mekhanoshin BasicBlock * const CBB = CI->getParent(); 12757f37794eSStanislav Mekhanoshin 12767f37794eSStanislav Mekhanoshin int const MaxScan = 30; 1277c9821cecSStanislav Mekhanoshin bool Changed = false; 12787f37794eSStanislav Mekhanoshin 12797f37794eSStanislav Mekhanoshin { // fold in load value. 12807f37794eSStanislav Mekhanoshin LoadInst *LI = dyn_cast<LoadInst>(CArgVal); 12817f37794eSStanislav Mekhanoshin if (LI && LI->getParent() == CBB) { 12827f37794eSStanislav Mekhanoshin BasicBlock::iterator BBI = LI->getIterator(); 12837f37794eSStanislav Mekhanoshin Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA); 12847f37794eSStanislav Mekhanoshin if (AvailableVal) { 1285c9821cecSStanislav Mekhanoshin Changed = true; 12867f37794eSStanislav Mekhanoshin CArgVal->replaceAllUsesWith(AvailableVal); 12877f37794eSStanislav Mekhanoshin if (CArgVal->getNumUses() == 0) 12887f37794eSStanislav Mekhanoshin LI->eraseFromParent(); 12897f37794eSStanislav Mekhanoshin CArgVal = CI->getArgOperand(0); 12907f37794eSStanislav Mekhanoshin } 12917f37794eSStanislav Mekhanoshin } 12927f37794eSStanislav Mekhanoshin } 12937f37794eSStanislav Mekhanoshin 12947f37794eSStanislav Mekhanoshin Module *M = CI->getModule(); 12957f37794eSStanislav Mekhanoshin fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN); 12967f37794eSStanislav Mekhanoshin std::string const PairName = fInfo.mangle(); 12977f37794eSStanislav Mekhanoshin 12987f37794eSStanislav Mekhanoshin CallInst *UI = nullptr; 12997f37794eSStanislav Mekhanoshin for (User* U : CArgVal->users()) { 13007f37794eSStanislav Mekhanoshin CallInst *XI = dyn_cast_or_null<CallInst>(U); 13017f37794eSStanislav Mekhanoshin if (!XI || XI == CI || XI->getParent() != CBB) 13027f37794eSStanislav Mekhanoshin continue; 13037f37794eSStanislav Mekhanoshin 13047f37794eSStanislav Mekhanoshin Function *UCallee = XI->getCalledFunction(); 13057f37794eSStanislav Mekhanoshin if (!UCallee || !UCallee->getName().equals(PairName)) 13067f37794eSStanislav Mekhanoshin continue; 13077f37794eSStanislav Mekhanoshin 13087f37794eSStanislav Mekhanoshin BasicBlock::iterator BBI = CI->getIterator(); 13097f37794eSStanislav Mekhanoshin if (BBI == CI->getParent()->begin()) 13107f37794eSStanislav Mekhanoshin break; 13117f37794eSStanislav Mekhanoshin --BBI; 13127f37794eSStanislav Mekhanoshin for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) { 13137f37794eSStanislav Mekhanoshin if (cast<Instruction>(BBI) == XI) { 13147f37794eSStanislav Mekhanoshin UI = XI; 13157f37794eSStanislav Mekhanoshin break; 13167f37794eSStanislav Mekhanoshin } 13177f37794eSStanislav Mekhanoshin } 13187f37794eSStanislav Mekhanoshin if (UI) break; 13197f37794eSStanislav Mekhanoshin } 13207f37794eSStanislav Mekhanoshin 1321c9821cecSStanislav Mekhanoshin if (!UI) 1322c9821cecSStanislav Mekhanoshin return Changed; 13237f37794eSStanislav Mekhanoshin 13247f37794eSStanislav Mekhanoshin // Merge the sin and cos. 13257f37794eSStanislav Mekhanoshin 13267f37794eSStanislav Mekhanoshin // for OpenCL 2.0 we have only generic implementation of sincos 13277f37794eSStanislav Mekhanoshin // function. 13287f37794eSStanislav Mekhanoshin AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); 13290da6350dSMatt Arsenault nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); 133013680223SJames Y Knight FunctionCallee Fsincos = getFunction(M, nf); 1331c9821cecSStanislav Mekhanoshin if (!Fsincos) 1332c9821cecSStanislav Mekhanoshin return Changed; 13337f37794eSStanislav Mekhanoshin 13347f37794eSStanislav Mekhanoshin BasicBlock::iterator ItOld = B.GetInsertPoint(); 13357f37794eSStanislav Mekhanoshin AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_"); 13367f37794eSStanislav Mekhanoshin B.SetInsertPoint(UI); 13377f37794eSStanislav Mekhanoshin 13387f37794eSStanislav Mekhanoshin Value *P = Alloc; 133913680223SJames Y Knight Type *PTy = Fsincos.getFunctionType()->getParamType(1); 13407f37794eSStanislav Mekhanoshin // The allocaInst allocates the memory in private address space. This need 13417f37794eSStanislav Mekhanoshin // to be bitcasted to point to the address space of cos pointer type. 13427f37794eSStanislav Mekhanoshin // In OpenCL 2.0 this is generic, while in 1.2 that is private. 13430da6350dSMatt Arsenault if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) 13447f37794eSStanislav Mekhanoshin P = B.CreateAddrSpaceCast(Alloc, PTy); 13457f37794eSStanislav Mekhanoshin CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P); 13467f37794eSStanislav Mekhanoshin 1347d34e60caSNicola Zaghen LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with " 1348d34e60caSNicola Zaghen << *Call << "\n"); 13497f37794eSStanislav Mekhanoshin 13507f37794eSStanislav Mekhanoshin if (!isSin) { // CI->cos, UI->sin 13517f37794eSStanislav Mekhanoshin B.SetInsertPoint(&*ItOld); 13527f37794eSStanislav Mekhanoshin UI->replaceAllUsesWith(&*Call); 135314359ef1SJames Y Knight Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); 13547f37794eSStanislav Mekhanoshin CI->replaceAllUsesWith(Reload); 13557f37794eSStanislav Mekhanoshin UI->eraseFromParent(); 13567f37794eSStanislav Mekhanoshin CI->eraseFromParent(); 13577f37794eSStanislav Mekhanoshin } else { // CI->sin, UI->cos 135814359ef1SJames Y Knight Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); 13597f37794eSStanislav Mekhanoshin UI->replaceAllUsesWith(Reload); 13607f37794eSStanislav Mekhanoshin CI->replaceAllUsesWith(Call); 13617f37794eSStanislav Mekhanoshin UI->eraseFromParent(); 13627f37794eSStanislav Mekhanoshin CI->eraseFromParent(); 13637f37794eSStanislav Mekhanoshin } 13647f37794eSStanislav Mekhanoshin return true; 13657f37794eSStanislav Mekhanoshin } 13667f37794eSStanislav Mekhanoshin 1367a9191c84SStanislav Mekhanoshin bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) { 1368a9191c84SStanislav Mekhanoshin if (!TM) 1369a9191c84SStanislav Mekhanoshin return false; 1370a9191c84SStanislav Mekhanoshin 1371a9191c84SStanislav Mekhanoshin StringRef CPU = TM->getTargetCPU(); 1372a9191c84SStanislav Mekhanoshin StringRef Features = TM->getTargetFeatureString(); 137342f74e82SMartin Storsjö if ((CPU.empty() || CPU.equals_insensitive("generic")) && 1374a9191c84SStanislav Mekhanoshin (Features.empty() || 137542f74e82SMartin Storsjö Features.find_insensitive("wavefrontsize") == StringRef::npos)) 1376a9191c84SStanislav Mekhanoshin return false; 1377a9191c84SStanislav Mekhanoshin 1378a9191c84SStanislav Mekhanoshin Function *F = CI->getParent()->getParent(); 1379a9191c84SStanislav Mekhanoshin const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F); 1380a9191c84SStanislav Mekhanoshin unsigned N = ST.getWavefrontSize(); 1381a9191c84SStanislav Mekhanoshin 1382a9191c84SStanislav Mekhanoshin LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with " 1383a9191c84SStanislav Mekhanoshin << N << "\n"); 1384a9191c84SStanislav Mekhanoshin 1385a9191c84SStanislav Mekhanoshin CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N)); 1386a9191c84SStanislav Mekhanoshin CI->eraseFromParent(); 1387a9191c84SStanislav Mekhanoshin return true; 1388a9191c84SStanislav Mekhanoshin } 1389a9191c84SStanislav Mekhanoshin 13907f37794eSStanislav Mekhanoshin // Get insertion point at entry. 13917f37794eSStanislav Mekhanoshin BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) { 13927f37794eSStanislav Mekhanoshin Function * Func = UI->getParent()->getParent(); 13937f37794eSStanislav Mekhanoshin BasicBlock * BB = &Func->getEntryBlock(); 13947f37794eSStanislav Mekhanoshin assert(BB && "Entry block not found!"); 13957f37794eSStanislav Mekhanoshin BasicBlock::iterator ItNew = BB->begin(); 13967f37794eSStanislav Mekhanoshin return ItNew; 13977f37794eSStanislav Mekhanoshin } 13987f37794eSStanislav Mekhanoshin 13997f37794eSStanislav Mekhanoshin // Insert a AllocsInst at the beginning of function entry block. 14007f37794eSStanislav Mekhanoshin AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B, 14017f37794eSStanislav Mekhanoshin const char *prefix) { 14027f37794eSStanislav Mekhanoshin BasicBlock::iterator ItNew = getEntryIns(UI); 14037f37794eSStanislav Mekhanoshin Function *UCallee = UI->getCalledFunction(); 14047f37794eSStanislav Mekhanoshin Type *RetType = UCallee->getReturnType(); 14057f37794eSStanislav Mekhanoshin B.SetInsertPoint(&*ItNew); 14067f37794eSStanislav Mekhanoshin AllocaInst *Alloc = B.CreateAlloca(RetType, 0, 14077f37794eSStanislav Mekhanoshin std::string(prefix) + UI->getName()); 14084f04db4bSEli Friedman Alloc->setAlignment( 14094f04db4bSEli Friedman Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType))); 14107f37794eSStanislav Mekhanoshin return Alloc; 14117f37794eSStanislav Mekhanoshin } 14127f37794eSStanislav Mekhanoshin 14137f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo, 14147f37794eSStanislav Mekhanoshin double& Res0, double& Res1, 14157f37794eSStanislav Mekhanoshin Constant *copr0, Constant *copr1, 14167f37794eSStanislav Mekhanoshin Constant *copr2) { 14177f37794eSStanislav Mekhanoshin // By default, opr0/opr1/opr3 holds values of float/double type. 14187f37794eSStanislav Mekhanoshin // If they are not float/double, each function has to its 14197f37794eSStanislav Mekhanoshin // operand separately. 14207f37794eSStanislav Mekhanoshin double opr0=0.0, opr1=0.0, opr2=0.0; 14217f37794eSStanislav Mekhanoshin ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0); 14227f37794eSStanislav Mekhanoshin ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1); 14237f37794eSStanislav Mekhanoshin ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2); 14247f37794eSStanislav Mekhanoshin if (fpopr0) { 14257f37794eSStanislav Mekhanoshin opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64) 14267f37794eSStanislav Mekhanoshin ? fpopr0->getValueAPF().convertToDouble() 14277f37794eSStanislav Mekhanoshin : (double)fpopr0->getValueAPF().convertToFloat(); 14287f37794eSStanislav Mekhanoshin } 14297f37794eSStanislav Mekhanoshin 14307f37794eSStanislav Mekhanoshin if (fpopr1) { 14317f37794eSStanislav Mekhanoshin opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64) 14327f37794eSStanislav Mekhanoshin ? fpopr1->getValueAPF().convertToDouble() 14337f37794eSStanislav Mekhanoshin : (double)fpopr1->getValueAPF().convertToFloat(); 14347f37794eSStanislav Mekhanoshin } 14357f37794eSStanislav Mekhanoshin 14367f37794eSStanislav Mekhanoshin if (fpopr2) { 14377f37794eSStanislav Mekhanoshin opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64) 14387f37794eSStanislav Mekhanoshin ? fpopr2->getValueAPF().convertToDouble() 14397f37794eSStanislav Mekhanoshin : (double)fpopr2->getValueAPF().convertToFloat(); 14407f37794eSStanislav Mekhanoshin } 14417f37794eSStanislav Mekhanoshin 14427f37794eSStanislav Mekhanoshin switch (FInfo.getId()) { 14437f37794eSStanislav Mekhanoshin default : return false; 14447f37794eSStanislav Mekhanoshin 14457f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ACOS: 14467f37794eSStanislav Mekhanoshin Res0 = acos(opr0); 14477f37794eSStanislav Mekhanoshin return true; 14487f37794eSStanislav Mekhanoshin 14497f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ACOSH: 14507f37794eSStanislav Mekhanoshin // acosh(x) == log(x + sqrt(x*x - 1)) 14517f37794eSStanislav Mekhanoshin Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0)); 14527f37794eSStanislav Mekhanoshin return true; 14537f37794eSStanislav Mekhanoshin 14547f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ACOSPI: 14557f37794eSStanislav Mekhanoshin Res0 = acos(opr0) / MATH_PI; 14567f37794eSStanislav Mekhanoshin return true; 14577f37794eSStanislav Mekhanoshin 14587f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ASIN: 14597f37794eSStanislav Mekhanoshin Res0 = asin(opr0); 14607f37794eSStanislav Mekhanoshin return true; 14617f37794eSStanislav Mekhanoshin 14627f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ASINH: 14637f37794eSStanislav Mekhanoshin // asinh(x) == log(x + sqrt(x*x + 1)) 14647f37794eSStanislav Mekhanoshin Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0)); 14657f37794eSStanislav Mekhanoshin return true; 14667f37794eSStanislav Mekhanoshin 14677f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ASINPI: 14687f37794eSStanislav Mekhanoshin Res0 = asin(opr0) / MATH_PI; 14697f37794eSStanislav Mekhanoshin return true; 14707f37794eSStanislav Mekhanoshin 14717f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ATAN: 14727f37794eSStanislav Mekhanoshin Res0 = atan(opr0); 14737f37794eSStanislav Mekhanoshin return true; 14747f37794eSStanislav Mekhanoshin 14757f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ATANH: 14767f37794eSStanislav Mekhanoshin // atanh(x) == (log(x+1) - log(x-1))/2; 14777f37794eSStanislav Mekhanoshin Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0; 14787f37794eSStanislav Mekhanoshin return true; 14797f37794eSStanislav Mekhanoshin 14807f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ATANPI: 14817f37794eSStanislav Mekhanoshin Res0 = atan(opr0) / MATH_PI; 14827f37794eSStanislav Mekhanoshin return true; 14837f37794eSStanislav Mekhanoshin 14847f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_CBRT: 14857f37794eSStanislav Mekhanoshin Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0); 14867f37794eSStanislav Mekhanoshin return true; 14877f37794eSStanislav Mekhanoshin 14887f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_COS: 14897f37794eSStanislav Mekhanoshin Res0 = cos(opr0); 14907f37794eSStanislav Mekhanoshin return true; 14917f37794eSStanislav Mekhanoshin 14927f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_COSH: 14937f37794eSStanislav Mekhanoshin Res0 = cosh(opr0); 14947f37794eSStanislav Mekhanoshin return true; 14957f37794eSStanislav Mekhanoshin 14967f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_COSPI: 14977f37794eSStanislav Mekhanoshin Res0 = cos(MATH_PI * opr0); 14987f37794eSStanislav Mekhanoshin return true; 14997f37794eSStanislav Mekhanoshin 15007f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_EXP: 15017f37794eSStanislav Mekhanoshin Res0 = exp(opr0); 15027f37794eSStanislav Mekhanoshin return true; 15037f37794eSStanislav Mekhanoshin 15047f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_EXP2: 15057f37794eSStanislav Mekhanoshin Res0 = pow(2.0, opr0); 15067f37794eSStanislav Mekhanoshin return true; 15077f37794eSStanislav Mekhanoshin 15087f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_EXP10: 15097f37794eSStanislav Mekhanoshin Res0 = pow(10.0, opr0); 15107f37794eSStanislav Mekhanoshin return true; 15117f37794eSStanislav Mekhanoshin 15127f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_EXPM1: 15137f37794eSStanislav Mekhanoshin Res0 = exp(opr0) - 1.0; 15147f37794eSStanislav Mekhanoshin return true; 15157f37794eSStanislav Mekhanoshin 15167f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_LOG: 15177f37794eSStanislav Mekhanoshin Res0 = log(opr0); 15187f37794eSStanislav Mekhanoshin return true; 15197f37794eSStanislav Mekhanoshin 15207f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_LOG2: 15217f37794eSStanislav Mekhanoshin Res0 = log(opr0) / log(2.0); 15227f37794eSStanislav Mekhanoshin return true; 15237f37794eSStanislav Mekhanoshin 15247f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_LOG10: 15257f37794eSStanislav Mekhanoshin Res0 = log(opr0) / log(10.0); 15267f37794eSStanislav Mekhanoshin return true; 15277f37794eSStanislav Mekhanoshin 15287f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_RSQRT: 15297f37794eSStanislav Mekhanoshin Res0 = 1.0 / sqrt(opr0); 15307f37794eSStanislav Mekhanoshin return true; 15317f37794eSStanislav Mekhanoshin 15327f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SIN: 15337f37794eSStanislav Mekhanoshin Res0 = sin(opr0); 15347f37794eSStanislav Mekhanoshin return true; 15357f37794eSStanislav Mekhanoshin 15367f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SINH: 15377f37794eSStanislav Mekhanoshin Res0 = sinh(opr0); 15387f37794eSStanislav Mekhanoshin return true; 15397f37794eSStanislav Mekhanoshin 15407f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SINPI: 15417f37794eSStanislav Mekhanoshin Res0 = sin(MATH_PI * opr0); 15427f37794eSStanislav Mekhanoshin return true; 15437f37794eSStanislav Mekhanoshin 15447f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SQRT: 15457f37794eSStanislav Mekhanoshin Res0 = sqrt(opr0); 15467f37794eSStanislav Mekhanoshin return true; 15477f37794eSStanislav Mekhanoshin 15487f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_TAN: 15497f37794eSStanislav Mekhanoshin Res0 = tan(opr0); 15507f37794eSStanislav Mekhanoshin return true; 15517f37794eSStanislav Mekhanoshin 15527f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_TANH: 15537f37794eSStanislav Mekhanoshin Res0 = tanh(opr0); 15547f37794eSStanislav Mekhanoshin return true; 15557f37794eSStanislav Mekhanoshin 15567f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_TANPI: 15577f37794eSStanislav Mekhanoshin Res0 = tan(MATH_PI * opr0); 15587f37794eSStanislav Mekhanoshin return true; 15597f37794eSStanislav Mekhanoshin 15607f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_RECIP: 15617f37794eSStanislav Mekhanoshin Res0 = 1.0 / opr0; 15627f37794eSStanislav Mekhanoshin return true; 15637f37794eSStanislav Mekhanoshin 15647f37794eSStanislav Mekhanoshin // two-arg functions 15657f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_DIVIDE: 15667f37794eSStanislav Mekhanoshin Res0 = opr0 / opr1; 15677f37794eSStanislav Mekhanoshin return true; 15687f37794eSStanislav Mekhanoshin 15697f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_POW: 15707f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_POWR: 15717f37794eSStanislav Mekhanoshin Res0 = pow(opr0, opr1); 15727f37794eSStanislav Mekhanoshin return true; 15737f37794eSStanislav Mekhanoshin 15747f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_POWN: { 15757f37794eSStanislav Mekhanoshin if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { 15767f37794eSStanislav Mekhanoshin double val = (double)iopr1->getSExtValue(); 15777f37794eSStanislav Mekhanoshin Res0 = pow(opr0, val); 15787f37794eSStanislav Mekhanoshin return true; 15797f37794eSStanislav Mekhanoshin } 15807f37794eSStanislav Mekhanoshin return false; 15817f37794eSStanislav Mekhanoshin } 15827f37794eSStanislav Mekhanoshin 15837f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_ROOTN: { 15847f37794eSStanislav Mekhanoshin if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { 15857f37794eSStanislav Mekhanoshin double val = (double)iopr1->getSExtValue(); 15867f37794eSStanislav Mekhanoshin Res0 = pow(opr0, 1.0 / val); 15877f37794eSStanislav Mekhanoshin return true; 15887f37794eSStanislav Mekhanoshin } 15897f37794eSStanislav Mekhanoshin return false; 15907f37794eSStanislav Mekhanoshin } 15917f37794eSStanislav Mekhanoshin 15927f37794eSStanislav Mekhanoshin // with ptr arg 15937f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_SINCOS: 15947f37794eSStanislav Mekhanoshin Res0 = sin(opr0); 15957f37794eSStanislav Mekhanoshin Res1 = cos(opr0); 15967f37794eSStanislav Mekhanoshin return true; 15977f37794eSStanislav Mekhanoshin 15987f37794eSStanislav Mekhanoshin // three-arg functions 15997f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_FMA: 16007f37794eSStanislav Mekhanoshin case AMDGPULibFunc::EI_MAD: 16017f37794eSStanislav Mekhanoshin Res0 = opr0 * opr1 + opr2; 16027f37794eSStanislav Mekhanoshin return true; 16037f37794eSStanislav Mekhanoshin } 16047f37794eSStanislav Mekhanoshin 16057f37794eSStanislav Mekhanoshin return false; 16067f37794eSStanislav Mekhanoshin } 16077f37794eSStanislav Mekhanoshin 16087f37794eSStanislav Mekhanoshin bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) { 16097f37794eSStanislav Mekhanoshin int numArgs = (int)aCI->getNumArgOperands(); 16107f37794eSStanislav Mekhanoshin if (numArgs > 3) 16117f37794eSStanislav Mekhanoshin return false; 16127f37794eSStanislav Mekhanoshin 16137f37794eSStanislav Mekhanoshin Constant *copr0 = nullptr; 16147f37794eSStanislav Mekhanoshin Constant *copr1 = nullptr; 16157f37794eSStanislav Mekhanoshin Constant *copr2 = nullptr; 16167f37794eSStanislav Mekhanoshin if (numArgs > 0) { 16177f37794eSStanislav Mekhanoshin if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr) 16187f37794eSStanislav Mekhanoshin return false; 16197f37794eSStanislav Mekhanoshin } 16207f37794eSStanislav Mekhanoshin 16217f37794eSStanislav Mekhanoshin if (numArgs > 1) { 16227f37794eSStanislav Mekhanoshin if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) { 16237f37794eSStanislav Mekhanoshin if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS) 16247f37794eSStanislav Mekhanoshin return false; 16257f37794eSStanislav Mekhanoshin } 16267f37794eSStanislav Mekhanoshin } 16277f37794eSStanislav Mekhanoshin 16287f37794eSStanislav Mekhanoshin if (numArgs > 2) { 16297f37794eSStanislav Mekhanoshin if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr) 16307f37794eSStanislav Mekhanoshin return false; 16317f37794eSStanislav Mekhanoshin } 16327f37794eSStanislav Mekhanoshin 16337f37794eSStanislav Mekhanoshin // At this point, all arguments to aCI are constants. 16347f37794eSStanislav Mekhanoshin 16357f37794eSStanislav Mekhanoshin // max vector size is 16, and sincos will generate two results. 16367f37794eSStanislav Mekhanoshin double DVal0[16], DVal1[16]; 16377f37794eSStanislav Mekhanoshin bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS); 16387f37794eSStanislav Mekhanoshin if (getVecSize(FInfo) == 1) { 16397f37794eSStanislav Mekhanoshin if (!evaluateScalarMathFunc(FInfo, DVal0[0], 16407f37794eSStanislav Mekhanoshin DVal1[0], copr0, copr1, copr2)) { 16417f37794eSStanislav Mekhanoshin return false; 16427f37794eSStanislav Mekhanoshin } 16437f37794eSStanislav Mekhanoshin } else { 16447f37794eSStanislav Mekhanoshin ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0); 16457f37794eSStanislav Mekhanoshin ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1); 16467f37794eSStanislav Mekhanoshin ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2); 16477f37794eSStanislav Mekhanoshin for (int i=0; i < getVecSize(FInfo); ++i) { 16487f37794eSStanislav Mekhanoshin Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr; 16497f37794eSStanislav Mekhanoshin Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr; 16507f37794eSStanislav Mekhanoshin Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr; 16517f37794eSStanislav Mekhanoshin if (!evaluateScalarMathFunc(FInfo, DVal0[i], 16527f37794eSStanislav Mekhanoshin DVal1[i], celt0, celt1, celt2)) { 16537f37794eSStanislav Mekhanoshin return false; 16547f37794eSStanislav Mekhanoshin } 16557f37794eSStanislav Mekhanoshin } 16567f37794eSStanislav Mekhanoshin } 16577f37794eSStanislav Mekhanoshin 16587f37794eSStanislav Mekhanoshin LLVMContext &context = CI->getParent()->getParent()->getContext(); 16597f37794eSStanislav Mekhanoshin Constant *nval0, *nval1; 16607f37794eSStanislav Mekhanoshin if (getVecSize(FInfo) == 1) { 16617f37794eSStanislav Mekhanoshin nval0 = ConstantFP::get(CI->getType(), DVal0[0]); 16627f37794eSStanislav Mekhanoshin if (hasTwoResults) 16637f37794eSStanislav Mekhanoshin nval1 = ConstantFP::get(CI->getType(), DVal1[0]); 16647f37794eSStanislav Mekhanoshin } else { 16657f37794eSStanislav Mekhanoshin if (getArgType(FInfo) == AMDGPULibFunc::F32) { 16667f37794eSStanislav Mekhanoshin SmallVector <float, 0> FVal0, FVal1; 16677f37794eSStanislav Mekhanoshin for (int i=0; i < getVecSize(FInfo); ++i) 16687f37794eSStanislav Mekhanoshin FVal0.push_back((float)DVal0[i]); 16697f37794eSStanislav Mekhanoshin ArrayRef<float> tmp0(FVal0); 16707f37794eSStanislav Mekhanoshin nval0 = ConstantDataVector::get(context, tmp0); 16717f37794eSStanislav Mekhanoshin if (hasTwoResults) { 16727f37794eSStanislav Mekhanoshin for (int i=0; i < getVecSize(FInfo); ++i) 16737f37794eSStanislav Mekhanoshin FVal1.push_back((float)DVal1[i]); 16747f37794eSStanislav Mekhanoshin ArrayRef<float> tmp1(FVal1); 16757f37794eSStanislav Mekhanoshin nval1 = ConstantDataVector::get(context, tmp1); 16767f37794eSStanislav Mekhanoshin } 16777f37794eSStanislav Mekhanoshin } else { 16787f37794eSStanislav Mekhanoshin ArrayRef<double> tmp0(DVal0); 16797f37794eSStanislav Mekhanoshin nval0 = ConstantDataVector::get(context, tmp0); 16807f37794eSStanislav Mekhanoshin if (hasTwoResults) { 16817f37794eSStanislav Mekhanoshin ArrayRef<double> tmp1(DVal1); 16827f37794eSStanislav Mekhanoshin nval1 = ConstantDataVector::get(context, tmp1); 16837f37794eSStanislav Mekhanoshin } 16847f37794eSStanislav Mekhanoshin } 16857f37794eSStanislav Mekhanoshin } 16867f37794eSStanislav Mekhanoshin 16877f37794eSStanislav Mekhanoshin if (hasTwoResults) { 16887f37794eSStanislav Mekhanoshin // sincos 16897f37794eSStanislav Mekhanoshin assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS && 16907f37794eSStanislav Mekhanoshin "math function with ptr arg not supported yet"); 16917f37794eSStanislav Mekhanoshin new StoreInst(nval1, aCI->getArgOperand(1), aCI); 16927f37794eSStanislav Mekhanoshin } 16937f37794eSStanislav Mekhanoshin 16947f37794eSStanislav Mekhanoshin replaceCall(nval0); 16957f37794eSStanislav Mekhanoshin return true; 16967f37794eSStanislav Mekhanoshin } 16977f37794eSStanislav Mekhanoshin 16987f37794eSStanislav Mekhanoshin // Public interface to the Simplify LibCalls pass. 1699348735b7SMatt Arsenault FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) { 1700348735b7SMatt Arsenault return new AMDGPUSimplifyLibCalls(TM); 17017f37794eSStanislav Mekhanoshin } 17027f37794eSStanislav Mekhanoshin 17037f37794eSStanislav Mekhanoshin FunctionPass *llvm::createAMDGPUUseNativeCallsPass() { 17047f37794eSStanislav Mekhanoshin return new AMDGPUUseNativeCalls(); 17057f37794eSStanislav Mekhanoshin } 17067f37794eSStanislav Mekhanoshin 17077f37794eSStanislav Mekhanoshin bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) { 17087f37794eSStanislav Mekhanoshin if (skipFunction(F)) 17097f37794eSStanislav Mekhanoshin return false; 17107f37794eSStanislav Mekhanoshin 17117f37794eSStanislav Mekhanoshin bool Changed = false; 17127f37794eSStanislav Mekhanoshin auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 17137f37794eSStanislav Mekhanoshin 1714d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "AMDIC: process function "; 1715d34e60caSNicola Zaghen F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';); 17167f37794eSStanislav Mekhanoshin 17177f37794eSStanislav Mekhanoshin for (auto &BB : F) { 17187f37794eSStanislav Mekhanoshin for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) { 17197f37794eSStanislav Mekhanoshin // Ignore non-calls. 17207f37794eSStanislav Mekhanoshin CallInst *CI = dyn_cast<CallInst>(I); 17217f37794eSStanislav Mekhanoshin ++I; 17226a31a9a5Sdfukalov // Ignore intrinsics that do not become real instructions. 17236a31a9a5Sdfukalov if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd()) 17246a31a9a5Sdfukalov continue; 17257f37794eSStanislav Mekhanoshin 17267f37794eSStanislav Mekhanoshin // Ignore indirect calls. 17277f37794eSStanislav Mekhanoshin Function *Callee = CI->getCalledFunction(); 17287f37794eSStanislav Mekhanoshin if (Callee == 0) continue; 17297f37794eSStanislav Mekhanoshin 1730d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n"; 17317f37794eSStanislav Mekhanoshin dbgs().flush()); 17327f37794eSStanislav Mekhanoshin if(Simplifier.fold(CI, AA)) 17337f37794eSStanislav Mekhanoshin Changed = true; 17347f37794eSStanislav Mekhanoshin } 17357f37794eSStanislav Mekhanoshin } 17367f37794eSStanislav Mekhanoshin return Changed; 17377f37794eSStanislav Mekhanoshin } 17387f37794eSStanislav Mekhanoshin 17399abc4577SArthur Eubanks PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F, 17409abc4577SArthur Eubanks FunctionAnalysisManager &AM) { 17418e293fe6SArthur Eubanks AMDGPULibCalls Simplifier(&TM); 17429abc4577SArthur Eubanks Simplifier.initNativeFuncs(); 17439abc4577SArthur Eubanks 17449abc4577SArthur Eubanks bool Changed = false; 17459abc4577SArthur Eubanks auto AA = &AM.getResult<AAManager>(F); 17469abc4577SArthur Eubanks 17479abc4577SArthur Eubanks LLVM_DEBUG(dbgs() << "AMDIC: process function "; 17489abc4577SArthur Eubanks F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';); 17499abc4577SArthur Eubanks 17509abc4577SArthur Eubanks for (auto &BB : F) { 17519abc4577SArthur Eubanks for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { 17529abc4577SArthur Eubanks // Ignore non-calls. 17539abc4577SArthur Eubanks CallInst *CI = dyn_cast<CallInst>(I); 17549abc4577SArthur Eubanks ++I; 17559abc4577SArthur Eubanks // Ignore intrinsics that do not become real instructions. 17569abc4577SArthur Eubanks if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd()) 17579abc4577SArthur Eubanks continue; 17589abc4577SArthur Eubanks 17599abc4577SArthur Eubanks // Ignore indirect calls. 17609abc4577SArthur Eubanks Function *Callee = CI->getCalledFunction(); 17619abc4577SArthur Eubanks if (Callee == 0) 17629abc4577SArthur Eubanks continue; 17639abc4577SArthur Eubanks 17649abc4577SArthur Eubanks LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n"; 17659abc4577SArthur Eubanks dbgs().flush()); 17669abc4577SArthur Eubanks if (Simplifier.fold(CI, AA)) 17679abc4577SArthur Eubanks Changed = true; 17689abc4577SArthur Eubanks } 17699abc4577SArthur Eubanks } 17709abc4577SArthur Eubanks return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 17719abc4577SArthur Eubanks } 17729abc4577SArthur Eubanks 17737f37794eSStanislav Mekhanoshin bool AMDGPUUseNativeCalls::runOnFunction(Function &F) { 17747f37794eSStanislav Mekhanoshin if (skipFunction(F) || UseNative.empty()) 17757f37794eSStanislav Mekhanoshin return false; 17767f37794eSStanislav Mekhanoshin 17777f37794eSStanislav Mekhanoshin bool Changed = false; 17787f37794eSStanislav Mekhanoshin for (auto &BB : F) { 17797f37794eSStanislav Mekhanoshin for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) { 17807f37794eSStanislav Mekhanoshin // Ignore non-calls. 17817f37794eSStanislav Mekhanoshin CallInst *CI = dyn_cast<CallInst>(I); 17827f37794eSStanislav Mekhanoshin ++I; 17837f37794eSStanislav Mekhanoshin if (!CI) continue; 17847f37794eSStanislav Mekhanoshin 17857f37794eSStanislav Mekhanoshin // Ignore indirect calls. 17867f37794eSStanislav Mekhanoshin Function *Callee = CI->getCalledFunction(); 17877f37794eSStanislav Mekhanoshin if (Callee == 0) continue; 17887f37794eSStanislav Mekhanoshin 17897f37794eSStanislav Mekhanoshin if(Simplifier.useNative(CI)) 17907f37794eSStanislav Mekhanoshin Changed = true; 17917f37794eSStanislav Mekhanoshin } 17927f37794eSStanislav Mekhanoshin } 17937f37794eSStanislav Mekhanoshin return Changed; 17947f37794eSStanislav Mekhanoshin } 17959abc4577SArthur Eubanks 17969abc4577SArthur Eubanks PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F, 17979abc4577SArthur Eubanks FunctionAnalysisManager &AM) { 17989abc4577SArthur Eubanks if (UseNative.empty()) 17999abc4577SArthur Eubanks return PreservedAnalyses::all(); 18009abc4577SArthur Eubanks 18019abc4577SArthur Eubanks AMDGPULibCalls Simplifier; 18029abc4577SArthur Eubanks Simplifier.initNativeFuncs(); 18039abc4577SArthur Eubanks 18049abc4577SArthur Eubanks bool Changed = false; 18059abc4577SArthur Eubanks for (auto &BB : F) { 18069abc4577SArthur Eubanks for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { 18079abc4577SArthur Eubanks // Ignore non-calls. 18089abc4577SArthur Eubanks CallInst *CI = dyn_cast<CallInst>(I); 18099abc4577SArthur Eubanks ++I; 18109abc4577SArthur Eubanks if (!CI) 18119abc4577SArthur Eubanks continue; 18129abc4577SArthur Eubanks 18139abc4577SArthur Eubanks // Ignore indirect calls. 18149abc4577SArthur Eubanks Function *Callee = CI->getCalledFunction(); 18159abc4577SArthur Eubanks if (Callee == 0) 18169abc4577SArthur Eubanks continue; 18179abc4577SArthur Eubanks 18189abc4577SArthur Eubanks if (Simplifier.useNative(CI)) 18199abc4577SArthur Eubanks Changed = true; 18209abc4577SArthur Eubanks } 18219abc4577SArthur Eubanks } 18229abc4577SArthur Eubanks return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 18239abc4577SArthur Eubanks } 1824