12cab237bSDimitry Andric //===- AMDGPULibCalls.cpp -------------------------------------------------===//
22cab237bSDimitry Andric //
32cab237bSDimitry Andric // The LLVM Compiler Infrastructure
42cab237bSDimitry Andric //
52cab237bSDimitry Andric // This file is distributed under the University of Illinois Open Source
62cab237bSDimitry Andric // License. See LICENSE.TXT for details.
72cab237bSDimitry Andric //
82cab237bSDimitry Andric //===----------------------------------------------------------------------===//
92cab237bSDimitry Andric //
102cab237bSDimitry Andric /// \file
114ba319b5SDimitry Andric /// This file does AMD library function optimizations.
122cab237bSDimitry Andric //
132cab237bSDimitry Andric //===----------------------------------------------------------------------===//
142cab237bSDimitry Andric
152cab237bSDimitry Andric #define DEBUG_TYPE "amdgpu-simplifylib"
162cab237bSDimitry Andric
172cab237bSDimitry Andric #include "AMDGPU.h"
182cab237bSDimitry Andric #include "AMDGPULibFunc.h"
192cab237bSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
202cab237bSDimitry Andric #include "llvm/Analysis/Loads.h"
212cab237bSDimitry Andric #include "llvm/ADT/StringSet.h"
222cab237bSDimitry Andric #include "llvm/ADT/StringRef.h"
232cab237bSDimitry Andric #include "llvm/IR/Constants.h"
242cab237bSDimitry Andric #include "llvm/IR/DerivedTypes.h"
252cab237bSDimitry Andric #include "llvm/IR/Instructions.h"
262cab237bSDimitry Andric #include "llvm/IR/IRBuilder.h"
272cab237bSDimitry Andric #include "llvm/IR/Function.h"
282cab237bSDimitry Andric #include "llvm/IR/LLVMContext.h"
292cab237bSDimitry Andric #include "llvm/IR/Module.h"
302cab237bSDimitry Andric #include "llvm/IR/ValueSymbolTable.h"
312cab237bSDimitry Andric #include "llvm/Support/Debug.h"
322cab237bSDimitry Andric #include "llvm/Support/raw_ostream.h"
332cab237bSDimitry Andric #include "llvm/Target/TargetOptions.h"
342cab237bSDimitry Andric #include <vector>
352cab237bSDimitry Andric #include <cmath>
362cab237bSDimitry Andric
372cab237bSDimitry Andric using namespace llvm;
382cab237bSDimitry Andric
392cab237bSDimitry Andric static cl::opt<bool> EnablePreLink("amdgpu-prelink",
402cab237bSDimitry Andric cl::desc("Enable pre-link mode optimizations"),
412cab237bSDimitry Andric cl::init(false),
422cab237bSDimitry Andric cl::Hidden);
432cab237bSDimitry Andric
442cab237bSDimitry Andric static cl::list<std::string> UseNative("amdgpu-use-native",
452cab237bSDimitry Andric cl::desc("Comma separated list of functions to replace with native, or all"),
462cab237bSDimitry Andric cl::CommaSeparated, cl::ValueOptional,
472cab237bSDimitry Andric cl::Hidden);
482cab237bSDimitry Andric
492cab237bSDimitry Andric #define MATH_PI 3.14159265358979323846264338327950288419716939937511
502cab237bSDimitry Andric #define MATH_E 2.71828182845904523536028747135266249775724709369996
512cab237bSDimitry Andric #define MATH_SQRT2 1.41421356237309504880168872420969807856967187537695
522cab237bSDimitry Andric
532cab237bSDimitry Andric #define MATH_LOG2E 1.4426950408889634073599246810018921374266459541529859
542cab237bSDimitry Andric #define MATH_LOG10E 0.4342944819032518276511289189166050822943970058036665
552cab237bSDimitry Andric // Value of log2(10)
562cab237bSDimitry Andric #define MATH_LOG2_10 3.3219280948873623478703194294893901758648313930245806
572cab237bSDimitry Andric // Value of 1 / log2(10)
582cab237bSDimitry Andric #define MATH_RLOG2_10 0.3010299956639811952137388947244930267681898814621085
592cab237bSDimitry Andric // Value of 1 / M_LOG2E_F = 1 / log2(e)
602cab237bSDimitry Andric #define MATH_RLOG2_E 0.6931471805599453094172321214581765680755001343602552
612cab237bSDimitry Andric
622cab237bSDimitry Andric namespace llvm {
632cab237bSDimitry Andric
642cab237bSDimitry Andric class AMDGPULibCalls {
652cab237bSDimitry Andric private:
662cab237bSDimitry Andric
672cab237bSDimitry Andric typedef llvm::AMDGPULibFunc FuncInfo;
682cab237bSDimitry Andric
692cab237bSDimitry Andric // -fuse-native.
702cab237bSDimitry Andric bool AllNative = false;
712cab237bSDimitry Andric
722cab237bSDimitry Andric bool useNativeFunc(const StringRef F) const;
732cab237bSDimitry Andric
742cab237bSDimitry Andric // Return a pointer (pointer expr) to the function if function defintion with
752cab237bSDimitry Andric // "FuncName" exists. It may create a new function prototype in pre-link mode.
762cab237bSDimitry Andric Constant *getFunction(Module *M, const FuncInfo& fInfo);
772cab237bSDimitry Andric
782cab237bSDimitry Andric // Replace a normal function with its native version.
792cab237bSDimitry Andric bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
802cab237bSDimitry Andric
812cab237bSDimitry Andric bool parseFunctionName(const StringRef& FMangledName,
822cab237bSDimitry Andric FuncInfo *FInfo=nullptr /*out*/);
832cab237bSDimitry Andric
842cab237bSDimitry Andric bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
852cab237bSDimitry Andric
862cab237bSDimitry Andric /* Specialized optimizations */
872cab237bSDimitry Andric
882cab237bSDimitry Andric // recip (half or native)
892cab237bSDimitry Andric bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
902cab237bSDimitry Andric
912cab237bSDimitry Andric // divide (half or native)
922cab237bSDimitry Andric bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
932cab237bSDimitry Andric
942cab237bSDimitry Andric // pow/powr/pown
952cab237bSDimitry Andric bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
962cab237bSDimitry Andric
972cab237bSDimitry Andric // rootn
982cab237bSDimitry Andric bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
992cab237bSDimitry Andric
1002cab237bSDimitry Andric // fma/mad
1012cab237bSDimitry Andric bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1022cab237bSDimitry Andric
1032cab237bSDimitry Andric // -fuse-native for sincos
1042cab237bSDimitry Andric bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
1052cab237bSDimitry Andric
1062cab237bSDimitry Andric // evaluate calls if calls' arguments are constants.
1072cab237bSDimitry Andric bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
1082cab237bSDimitry Andric double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
1092cab237bSDimitry Andric bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);
1102cab237bSDimitry Andric
1112cab237bSDimitry Andric // exp
1122cab237bSDimitry Andric bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1132cab237bSDimitry Andric
1142cab237bSDimitry Andric // exp2
1152cab237bSDimitry Andric bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1162cab237bSDimitry Andric
1172cab237bSDimitry Andric // exp10
1182cab237bSDimitry Andric bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1192cab237bSDimitry Andric
1202cab237bSDimitry Andric // log
1212cab237bSDimitry Andric bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1222cab237bSDimitry Andric
1232cab237bSDimitry Andric // log2
1242cab237bSDimitry Andric bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1252cab237bSDimitry Andric
1262cab237bSDimitry Andric // log10
1272cab237bSDimitry Andric bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1282cab237bSDimitry Andric
1292cab237bSDimitry Andric // sqrt
1302cab237bSDimitry Andric bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
1312cab237bSDimitry Andric
1322cab237bSDimitry Andric // sin/cos
1332cab237bSDimitry Andric bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
1342cab237bSDimitry Andric
1352cab237bSDimitry Andric // __read_pipe/__write_pipe
1362cab237bSDimitry Andric bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
1372cab237bSDimitry Andric
1382cab237bSDimitry Andric // Get insertion point at entry.
1392cab237bSDimitry Andric BasicBlock::iterator getEntryIns(CallInst * UI);
1402cab237bSDimitry Andric // Insert an Alloc instruction.
1412cab237bSDimitry Andric AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
1422cab237bSDimitry Andric // Get a scalar native builtin signle argument FP function
1432cab237bSDimitry Andric Constant* getNativeFunction(Module* M, const FuncInfo &FInfo);
1442cab237bSDimitry Andric
1452cab237bSDimitry Andric protected:
1462cab237bSDimitry Andric CallInst *CI;
1472cab237bSDimitry Andric
1482cab237bSDimitry Andric bool isUnsafeMath(const CallInst *CI) const;
1492cab237bSDimitry Andric
replaceCall(Value * With)1502cab237bSDimitry Andric void replaceCall(Value *With) {
1512cab237bSDimitry Andric CI->replaceAllUsesWith(With);
1522cab237bSDimitry Andric CI->eraseFromParent();
1532cab237bSDimitry Andric }
1542cab237bSDimitry Andric
1552cab237bSDimitry Andric public:
1562cab237bSDimitry Andric bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
1572cab237bSDimitry Andric
1582cab237bSDimitry Andric void initNativeFuncs();
1592cab237bSDimitry Andric
1602cab237bSDimitry Andric // Replace a normal math function call with that native version
1612cab237bSDimitry Andric bool useNative(CallInst *CI);
1622cab237bSDimitry Andric };
1632cab237bSDimitry Andric
1642cab237bSDimitry Andric } // end llvm namespace
1652cab237bSDimitry Andric
1662cab237bSDimitry Andric namespace {
1672cab237bSDimitry Andric
1682cab237bSDimitry Andric class AMDGPUSimplifyLibCalls : public FunctionPass {
1692cab237bSDimitry Andric
1702cab237bSDimitry Andric AMDGPULibCalls Simplifier;
1712cab237bSDimitry Andric
1722cab237bSDimitry Andric const TargetOptions Options;
1732cab237bSDimitry Andric
1742cab237bSDimitry Andric public:
1752cab237bSDimitry Andric static char ID; // Pass identification
1762cab237bSDimitry Andric
AMDGPUSimplifyLibCalls(const TargetOptions & Opt=TargetOptions ())1772cab237bSDimitry Andric AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
1782cab237bSDimitry Andric : FunctionPass(ID), Options(Opt) {
1792cab237bSDimitry Andric initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
1802cab237bSDimitry Andric }
1812cab237bSDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const1822cab237bSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
1832cab237bSDimitry Andric AU.addRequired<AAResultsWrapperPass>();
1842cab237bSDimitry Andric }
1852cab237bSDimitry Andric
1862cab237bSDimitry Andric bool runOnFunction(Function &M) override;
1872cab237bSDimitry Andric };
1882cab237bSDimitry Andric
1892cab237bSDimitry Andric class AMDGPUUseNativeCalls : public FunctionPass {
1902cab237bSDimitry Andric
1912cab237bSDimitry Andric AMDGPULibCalls Simplifier;
1922cab237bSDimitry Andric
1932cab237bSDimitry Andric public:
1942cab237bSDimitry Andric static char ID; // Pass identification
1952cab237bSDimitry Andric
AMDGPUUseNativeCalls()1962cab237bSDimitry Andric AMDGPUUseNativeCalls() : FunctionPass(ID) {
1972cab237bSDimitry Andric initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
1982cab237bSDimitry Andric Simplifier.initNativeFuncs();
1992cab237bSDimitry Andric }
2002cab237bSDimitry Andric
2012cab237bSDimitry Andric bool runOnFunction(Function &F) override;
2022cab237bSDimitry Andric };
2032cab237bSDimitry Andric
2042cab237bSDimitry Andric } // end anonymous namespace.
2052cab237bSDimitry Andric
2062cab237bSDimitry Andric char AMDGPUSimplifyLibCalls::ID = 0;
2072cab237bSDimitry Andric char AMDGPUUseNativeCalls::ID = 0;
2082cab237bSDimitry Andric
2092cab237bSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
2102cab237bSDimitry Andric "Simplify well-known AMD library calls", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)2112cab237bSDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2122cab237bSDimitry Andric INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
2132cab237bSDimitry Andric "Simplify well-known AMD library calls", false, false)
2142cab237bSDimitry Andric
2152cab237bSDimitry Andric INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
2162cab237bSDimitry Andric "Replace builtin math calls with that native versions.",
2172cab237bSDimitry Andric false, false)
2182cab237bSDimitry Andric
2192cab237bSDimitry Andric template <typename IRB>
2202cab237bSDimitry Andric static CallInst *CreateCallEx(IRB &B, Value *Callee, Value *Arg,
2212cab237bSDimitry Andric const Twine &Name = "") {
2222cab237bSDimitry Andric CallInst *R = B.CreateCall(Callee, Arg, Name);
2232cab237bSDimitry Andric if (Function* F = dyn_cast<Function>(Callee))
2242cab237bSDimitry Andric R->setCallingConv(F->getCallingConv());
2252cab237bSDimitry Andric return R;
2262cab237bSDimitry Andric }
2272cab237bSDimitry Andric
2282cab237bSDimitry Andric template <typename IRB>
CreateCallEx2(IRB & B,Value * Callee,Value * Arg1,Value * Arg2,const Twine & Name="")2292cab237bSDimitry Andric static CallInst *CreateCallEx2(IRB &B, Value *Callee, Value *Arg1, Value *Arg2,
2302cab237bSDimitry Andric const Twine &Name = "") {
2312cab237bSDimitry Andric CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
2322cab237bSDimitry Andric if (Function* F = dyn_cast<Function>(Callee))
2332cab237bSDimitry Andric R->setCallingConv(F->getCallingConv());
2342cab237bSDimitry Andric return R;
2352cab237bSDimitry Andric }
2362cab237bSDimitry Andric
2372cab237bSDimitry Andric // Data structures for table-driven optimizations.
2382cab237bSDimitry Andric // FuncTbl works for both f32 and f64 functions with 1 input argument
2392cab237bSDimitry Andric
2402cab237bSDimitry Andric struct TableEntry {
2412cab237bSDimitry Andric double result;
2422cab237bSDimitry Andric double input;
2432cab237bSDimitry Andric };
2442cab237bSDimitry Andric
2452cab237bSDimitry Andric /* a list of {result, input} */
2462cab237bSDimitry Andric static const TableEntry tbl_acos[] = {
2472cab237bSDimitry Andric {MATH_PI/2.0, 0.0},
2482cab237bSDimitry Andric {MATH_PI/2.0, -0.0},
2492cab237bSDimitry Andric {0.0, 1.0},
2502cab237bSDimitry Andric {MATH_PI, -1.0}
2512cab237bSDimitry Andric };
2522cab237bSDimitry Andric static const TableEntry tbl_acosh[] = {
2532cab237bSDimitry Andric {0.0, 1.0}
2542cab237bSDimitry Andric };
2552cab237bSDimitry Andric static const TableEntry tbl_acospi[] = {
2562cab237bSDimitry Andric {0.5, 0.0},
2572cab237bSDimitry Andric {0.5, -0.0},
2582cab237bSDimitry Andric {0.0, 1.0},
2592cab237bSDimitry Andric {1.0, -1.0}
2602cab237bSDimitry Andric };
2612cab237bSDimitry Andric static const TableEntry tbl_asin[] = {
2622cab237bSDimitry Andric {0.0, 0.0},
2632cab237bSDimitry Andric {-0.0, -0.0},
2642cab237bSDimitry Andric {MATH_PI/2.0, 1.0},
2652cab237bSDimitry Andric {-MATH_PI/2.0, -1.0}
2662cab237bSDimitry Andric };
2672cab237bSDimitry Andric static const TableEntry tbl_asinh[] = {
2682cab237bSDimitry Andric {0.0, 0.0},
2692cab237bSDimitry Andric {-0.0, -0.0}
2702cab237bSDimitry Andric };
2712cab237bSDimitry Andric static const TableEntry tbl_asinpi[] = {
2722cab237bSDimitry Andric {0.0, 0.0},
2732cab237bSDimitry Andric {-0.0, -0.0},
2742cab237bSDimitry Andric {0.5, 1.0},
2752cab237bSDimitry Andric {-0.5, -1.0}
2762cab237bSDimitry Andric };
2772cab237bSDimitry Andric static const TableEntry tbl_atan[] = {
2782cab237bSDimitry Andric {0.0, 0.0},
2792cab237bSDimitry Andric {-0.0, -0.0},
2802cab237bSDimitry Andric {MATH_PI/4.0, 1.0},
2812cab237bSDimitry Andric {-MATH_PI/4.0, -1.0}
2822cab237bSDimitry Andric };
2832cab237bSDimitry Andric static const TableEntry tbl_atanh[] = {
2842cab237bSDimitry Andric {0.0, 0.0},
2852cab237bSDimitry Andric {-0.0, -0.0}
2862cab237bSDimitry Andric };
2872cab237bSDimitry Andric static const TableEntry tbl_atanpi[] = {
2882cab237bSDimitry Andric {0.0, 0.0},
2892cab237bSDimitry Andric {-0.0, -0.0},
2902cab237bSDimitry Andric {0.25, 1.0},
2912cab237bSDimitry Andric {-0.25, -1.0}
2922cab237bSDimitry Andric };
2932cab237bSDimitry Andric static const TableEntry tbl_cbrt[] = {
2942cab237bSDimitry Andric {0.0, 0.0},
2952cab237bSDimitry Andric {-0.0, -0.0},
2962cab237bSDimitry Andric {1.0, 1.0},
2972cab237bSDimitry Andric {-1.0, -1.0},
2982cab237bSDimitry Andric };
2992cab237bSDimitry Andric static const TableEntry tbl_cos[] = {
3002cab237bSDimitry Andric {1.0, 0.0},
3012cab237bSDimitry Andric {1.0, -0.0}
3022cab237bSDimitry Andric };
3032cab237bSDimitry Andric static const TableEntry tbl_cosh[] = {
3042cab237bSDimitry Andric {1.0, 0.0},
3052cab237bSDimitry Andric {1.0, -0.0}
3062cab237bSDimitry Andric };
3072cab237bSDimitry Andric static const TableEntry tbl_cospi[] = {
3082cab237bSDimitry Andric {1.0, 0.0},
3092cab237bSDimitry Andric {1.0, -0.0}
3102cab237bSDimitry Andric };
3112cab237bSDimitry Andric static const TableEntry tbl_erfc[] = {
3122cab237bSDimitry Andric {1.0, 0.0},
3132cab237bSDimitry Andric {1.0, -0.0}
3142cab237bSDimitry Andric };
3152cab237bSDimitry Andric static const TableEntry tbl_erf[] = {
3162cab237bSDimitry Andric {0.0, 0.0},
3172cab237bSDimitry Andric {-0.0, -0.0}
3182cab237bSDimitry Andric };
3192cab237bSDimitry Andric static const TableEntry tbl_exp[] = {
3202cab237bSDimitry Andric {1.0, 0.0},
3212cab237bSDimitry Andric {1.0, -0.0},
3222cab237bSDimitry Andric {MATH_E, 1.0}
3232cab237bSDimitry Andric };
3242cab237bSDimitry Andric static const TableEntry tbl_exp2[] = {
3252cab237bSDimitry Andric {1.0, 0.0},
3262cab237bSDimitry Andric {1.0, -0.0},
3272cab237bSDimitry Andric {2.0, 1.0}
3282cab237bSDimitry Andric };
3292cab237bSDimitry Andric static const TableEntry tbl_exp10[] = {
3302cab237bSDimitry Andric {1.0, 0.0},
3312cab237bSDimitry Andric {1.0, -0.0},
3322cab237bSDimitry Andric {10.0, 1.0}
3332cab237bSDimitry Andric };
3342cab237bSDimitry Andric static const TableEntry tbl_expm1[] = {
3352cab237bSDimitry Andric {0.0, 0.0},
3362cab237bSDimitry Andric {-0.0, -0.0}
3372cab237bSDimitry Andric };
3382cab237bSDimitry Andric static const TableEntry tbl_log[] = {
3392cab237bSDimitry Andric {0.0, 1.0},
3402cab237bSDimitry Andric {1.0, MATH_E}
3412cab237bSDimitry Andric };
3422cab237bSDimitry Andric static const TableEntry tbl_log2[] = {
3432cab237bSDimitry Andric {0.0, 1.0},
3442cab237bSDimitry Andric {1.0, 2.0}
3452cab237bSDimitry Andric };
3462cab237bSDimitry Andric static const TableEntry tbl_log10[] = {
3472cab237bSDimitry Andric {0.0, 1.0},
3482cab237bSDimitry Andric {1.0, 10.0}
3492cab237bSDimitry Andric };
3502cab237bSDimitry Andric static const TableEntry tbl_rsqrt[] = {
3512cab237bSDimitry Andric {1.0, 1.0},
3522cab237bSDimitry Andric {1.0/MATH_SQRT2, 2.0}
3532cab237bSDimitry Andric };
3542cab237bSDimitry Andric static const TableEntry tbl_sin[] = {
3552cab237bSDimitry Andric {0.0, 0.0},
3562cab237bSDimitry Andric {-0.0, -0.0}
3572cab237bSDimitry Andric };
3582cab237bSDimitry Andric static const TableEntry tbl_sinh[] = {
3592cab237bSDimitry Andric {0.0, 0.0},
3602cab237bSDimitry Andric {-0.0, -0.0}
3612cab237bSDimitry Andric };
3622cab237bSDimitry Andric static const TableEntry tbl_sinpi[] = {
3632cab237bSDimitry Andric {0.0, 0.0},
3642cab237bSDimitry Andric {-0.0, -0.0}
3652cab237bSDimitry Andric };
3662cab237bSDimitry Andric static const TableEntry tbl_sqrt[] = {
3672cab237bSDimitry Andric {0.0, 0.0},
3682cab237bSDimitry Andric {1.0, 1.0},
3692cab237bSDimitry Andric {MATH_SQRT2, 2.0}
3702cab237bSDimitry Andric };
3712cab237bSDimitry Andric static const TableEntry tbl_tan[] = {
3722cab237bSDimitry Andric {0.0, 0.0},
3732cab237bSDimitry Andric {-0.0, -0.0}
3742cab237bSDimitry Andric };
3752cab237bSDimitry Andric static const TableEntry tbl_tanh[] = {
3762cab237bSDimitry Andric {0.0, 0.0},
3772cab237bSDimitry Andric {-0.0, -0.0}
3782cab237bSDimitry Andric };
3792cab237bSDimitry Andric static const TableEntry tbl_tanpi[] = {
3802cab237bSDimitry Andric {0.0, 0.0},
3812cab237bSDimitry Andric {-0.0, -0.0}
3822cab237bSDimitry Andric };
3832cab237bSDimitry Andric static const TableEntry tbl_tgamma[] = {
3842cab237bSDimitry Andric {1.0, 1.0},
3852cab237bSDimitry Andric {1.0, 2.0},
3862cab237bSDimitry Andric {2.0, 3.0},
3872cab237bSDimitry Andric {6.0, 4.0}
3882cab237bSDimitry Andric };
3892cab237bSDimitry Andric
HasNative(AMDGPULibFunc::EFuncId id)3902cab237bSDimitry Andric static bool HasNative(AMDGPULibFunc::EFuncId id) {
3912cab237bSDimitry Andric switch(id) {
3922cab237bSDimitry Andric case AMDGPULibFunc::EI_DIVIDE:
3932cab237bSDimitry Andric case AMDGPULibFunc::EI_COS:
3942cab237bSDimitry Andric case AMDGPULibFunc::EI_EXP:
3952cab237bSDimitry Andric case AMDGPULibFunc::EI_EXP2:
3962cab237bSDimitry Andric case AMDGPULibFunc::EI_EXP10:
3972cab237bSDimitry Andric case AMDGPULibFunc::EI_LOG:
3982cab237bSDimitry Andric case AMDGPULibFunc::EI_LOG2:
3992cab237bSDimitry Andric case AMDGPULibFunc::EI_LOG10:
4002cab237bSDimitry Andric case AMDGPULibFunc::EI_POWR:
4012cab237bSDimitry Andric case AMDGPULibFunc::EI_RECIP:
4022cab237bSDimitry Andric case AMDGPULibFunc::EI_RSQRT:
4032cab237bSDimitry Andric case AMDGPULibFunc::EI_SIN:
4042cab237bSDimitry Andric case AMDGPULibFunc::EI_SINCOS:
4052cab237bSDimitry Andric case AMDGPULibFunc::EI_SQRT:
4062cab237bSDimitry Andric case AMDGPULibFunc::EI_TAN:
4072cab237bSDimitry Andric return true;
4082cab237bSDimitry Andric default:;
4092cab237bSDimitry Andric }
4102cab237bSDimitry Andric return false;
4112cab237bSDimitry Andric }
4122cab237bSDimitry Andric
4132cab237bSDimitry Andric struct TableRef {
4142cab237bSDimitry Andric size_t size;
4152cab237bSDimitry Andric const TableEntry *table; // variable size: from 0 to (size - 1)
4162cab237bSDimitry Andric
TableRefTableRef4172cab237bSDimitry Andric TableRef() : size(0), table(nullptr) {}
4182cab237bSDimitry Andric
4192cab237bSDimitry Andric template <size_t N>
TableRefTableRef4202cab237bSDimitry Andric TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
4212cab237bSDimitry Andric };
4222cab237bSDimitry Andric
getOptTable(AMDGPULibFunc::EFuncId id)4232cab237bSDimitry Andric static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
4242cab237bSDimitry Andric switch(id) {
4252cab237bSDimitry Andric case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos);
4262cab237bSDimitry Andric case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh);
4272cab237bSDimitry Andric case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi);
4282cab237bSDimitry Andric case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin);
4292cab237bSDimitry Andric case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh);
4302cab237bSDimitry Andric case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi);
4312cab237bSDimitry Andric case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan);
4322cab237bSDimitry Andric case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh);
4332cab237bSDimitry Andric case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi);
4342cab237bSDimitry Andric case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt);
4352cab237bSDimitry Andric case AMDGPULibFunc::EI_NCOS:
4362cab237bSDimitry Andric case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos);
4372cab237bSDimitry Andric case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh);
4382cab237bSDimitry Andric case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi);
4392cab237bSDimitry Andric case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc);
4402cab237bSDimitry Andric case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf);
4412cab237bSDimitry Andric case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp);
4422cab237bSDimitry Andric case AMDGPULibFunc::EI_NEXP2:
4432cab237bSDimitry Andric case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2);
4442cab237bSDimitry Andric case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10);
4452cab237bSDimitry Andric case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1);
4462cab237bSDimitry Andric case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log);
4472cab237bSDimitry Andric case AMDGPULibFunc::EI_NLOG2:
4482cab237bSDimitry Andric case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2);
4492cab237bSDimitry Andric case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10);
4502cab237bSDimitry Andric case AMDGPULibFunc::EI_NRSQRT:
4512cab237bSDimitry Andric case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt);
4522cab237bSDimitry Andric case AMDGPULibFunc::EI_NSIN:
4532cab237bSDimitry Andric case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin);
4542cab237bSDimitry Andric case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh);
4552cab237bSDimitry Andric case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi);
4562cab237bSDimitry Andric case AMDGPULibFunc::EI_NSQRT:
4572cab237bSDimitry Andric case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt);
4582cab237bSDimitry Andric case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan);
4592cab237bSDimitry Andric case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh);
4602cab237bSDimitry Andric case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi);
4612cab237bSDimitry Andric case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma);
4622cab237bSDimitry Andric default:;
4632cab237bSDimitry Andric }
4642cab237bSDimitry Andric return TableRef();
4652cab237bSDimitry Andric }
4662cab237bSDimitry Andric
getVecSize(const AMDGPULibFunc & FInfo)4672cab237bSDimitry Andric static inline int getVecSize(const AMDGPULibFunc& FInfo) {
4682cab237bSDimitry Andric return FInfo.getLeads()[0].VectorSize;
4692cab237bSDimitry Andric }
4702cab237bSDimitry Andric
getArgType(const AMDGPULibFunc & FInfo)4712cab237bSDimitry Andric static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
4722cab237bSDimitry Andric return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
4732cab237bSDimitry Andric }
4742cab237bSDimitry Andric
getFunction(Module * M,const FuncInfo & fInfo)4752cab237bSDimitry Andric Constant *AMDGPULibCalls::getFunction(Module *M, const FuncInfo& fInfo) {
4762cab237bSDimitry Andric // If we are doing PreLinkOpt, the function is external. So it is safe to
4772cab237bSDimitry Andric // use getOrInsertFunction() at this stage.
4782cab237bSDimitry Andric
4792cab237bSDimitry Andric return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
4802cab237bSDimitry Andric : AMDGPULibFunc::getFunction(M, fInfo);
4812cab237bSDimitry Andric }
4822cab237bSDimitry Andric
parseFunctionName(const StringRef & FMangledName,FuncInfo * FInfo)4832cab237bSDimitry Andric bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
4842cab237bSDimitry Andric FuncInfo *FInfo) {
4852cab237bSDimitry Andric return AMDGPULibFunc::parse(FMangledName, *FInfo);
4862cab237bSDimitry Andric }
4872cab237bSDimitry Andric
isUnsafeMath(const CallInst * CI) const4882cab237bSDimitry Andric bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
4892cab237bSDimitry Andric if (auto Op = dyn_cast<FPMathOperator>(CI))
4902cab237bSDimitry Andric if (Op->isFast())
4912cab237bSDimitry Andric return true;
4922cab237bSDimitry Andric const Function *F = CI->getParent()->getParent();
4932cab237bSDimitry Andric Attribute Attr = F->getFnAttribute("unsafe-fp-math");
4942cab237bSDimitry Andric return Attr.getValueAsString() == "true";
4952cab237bSDimitry Andric }
4962cab237bSDimitry Andric
useNativeFunc(const StringRef F) const4972cab237bSDimitry Andric bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
4982cab237bSDimitry Andric return AllNative ||
4992cab237bSDimitry Andric std::find(UseNative.begin(), UseNative.end(), F) != UseNative.end();
5002cab237bSDimitry Andric }
5012cab237bSDimitry Andric
initNativeFuncs()5022cab237bSDimitry Andric void AMDGPULibCalls::initNativeFuncs() {
5032cab237bSDimitry Andric AllNative = useNativeFunc("all") ||
5042cab237bSDimitry Andric (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
5052cab237bSDimitry Andric UseNative.begin()->empty());
5062cab237bSDimitry Andric }
5072cab237bSDimitry Andric
sincosUseNative(CallInst * aCI,const FuncInfo & FInfo)5082cab237bSDimitry Andric bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
5092cab237bSDimitry Andric bool native_sin = useNativeFunc("sin");
5102cab237bSDimitry Andric bool native_cos = useNativeFunc("cos");
5112cab237bSDimitry Andric
5122cab237bSDimitry Andric if (native_sin && native_cos) {
5132cab237bSDimitry Andric Module *M = aCI->getModule();
5142cab237bSDimitry Andric Value *opr0 = aCI->getArgOperand(0);
5152cab237bSDimitry Andric
5162cab237bSDimitry Andric AMDGPULibFunc nf;
5172cab237bSDimitry Andric nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
5182cab237bSDimitry Andric nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
5192cab237bSDimitry Andric
5202cab237bSDimitry Andric nf.setPrefix(AMDGPULibFunc::NATIVE);
5212cab237bSDimitry Andric nf.setId(AMDGPULibFunc::EI_SIN);
5222cab237bSDimitry Andric Constant *sinExpr = getFunction(M, nf);
5232cab237bSDimitry Andric
5242cab237bSDimitry Andric nf.setPrefix(AMDGPULibFunc::NATIVE);
5252cab237bSDimitry Andric nf.setId(AMDGPULibFunc::EI_COS);
5262cab237bSDimitry Andric Constant *cosExpr = getFunction(M, nf);
5272cab237bSDimitry Andric if (sinExpr && cosExpr) {
5282cab237bSDimitry Andric Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
5292cab237bSDimitry Andric Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
5302cab237bSDimitry Andric new StoreInst(cosval, aCI->getArgOperand(1), aCI);
5312cab237bSDimitry Andric
5322cab237bSDimitry Andric DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
5332cab237bSDimitry Andric << " with native version of sin/cos");
5342cab237bSDimitry Andric
5352cab237bSDimitry Andric replaceCall(sinval);
5362cab237bSDimitry Andric return true;
5372cab237bSDimitry Andric }
5382cab237bSDimitry Andric }
5392cab237bSDimitry Andric return false;
5402cab237bSDimitry Andric }
5412cab237bSDimitry Andric
useNative(CallInst * aCI)5422cab237bSDimitry Andric bool AMDGPULibCalls::useNative(CallInst *aCI) {
5432cab237bSDimitry Andric CI = aCI;
5442cab237bSDimitry Andric Function *Callee = aCI->getCalledFunction();
5452cab237bSDimitry Andric
5462cab237bSDimitry Andric FuncInfo FInfo;
5472cab237bSDimitry Andric if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() ||
5482cab237bSDimitry Andric FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
5492cab237bSDimitry Andric getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
5502cab237bSDimitry Andric !(AllNative || useNativeFunc(FInfo.getName()))) {
5512cab237bSDimitry Andric return false;
5522cab237bSDimitry Andric }
5532cab237bSDimitry Andric
5542cab237bSDimitry Andric if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
5552cab237bSDimitry Andric return sincosUseNative(aCI, FInfo);
5562cab237bSDimitry Andric
5572cab237bSDimitry Andric FInfo.setPrefix(AMDGPULibFunc::NATIVE);
5582cab237bSDimitry Andric Constant *F = getFunction(aCI->getModule(), FInfo);
5592cab237bSDimitry Andric if (!F)
5602cab237bSDimitry Andric return false;
5612cab237bSDimitry Andric
5622cab237bSDimitry Andric aCI->setCalledFunction(F);
5632cab237bSDimitry Andric DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
5642cab237bSDimitry Andric << " with native version");
5652cab237bSDimitry Andric return true;
5662cab237bSDimitry Andric }
5672cab237bSDimitry Andric
5682cab237bSDimitry Andric // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
5692cab237bSDimitry Andric // builtin, with appended type size and alignment arguments, where 2 or 4
5702cab237bSDimitry Andric // indicates the original number of arguments. The library has optimized version
5712cab237bSDimitry Andric // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
5722cab237bSDimitry Andric // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
5732cab237bSDimitry Andric // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
5742cab237bSDimitry Andric // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
fold_read_write_pipe(CallInst * CI,IRBuilder<> & B,FuncInfo & FInfo)5752cab237bSDimitry Andric bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
5762cab237bSDimitry Andric FuncInfo &FInfo) {
5772cab237bSDimitry Andric auto *Callee = CI->getCalledFunction();
5782cab237bSDimitry Andric if (!Callee->isDeclaration())
5792cab237bSDimitry Andric return false;
5802cab237bSDimitry Andric
5812cab237bSDimitry Andric assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
5822cab237bSDimitry Andric auto *M = Callee->getParent();
5832cab237bSDimitry Andric auto &Ctx = M->getContext();
5842cab237bSDimitry Andric std::string Name = Callee->getName();
5852cab237bSDimitry Andric auto NumArg = CI->getNumArgOperands();
5862cab237bSDimitry Andric if (NumArg != 4 && NumArg != 6)
5872cab237bSDimitry Andric return false;
5882cab237bSDimitry Andric auto *PacketSize = CI->getArgOperand(NumArg - 2);
5892cab237bSDimitry Andric auto *PacketAlign = CI->getArgOperand(NumArg - 1);
5902cab237bSDimitry Andric if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
5912cab237bSDimitry Andric return false;
5922cab237bSDimitry Andric unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
5932cab237bSDimitry Andric unsigned Align = cast<ConstantInt>(PacketAlign)->getZExtValue();
5942cab237bSDimitry Andric if (Size != Align || !isPowerOf2_32(Size))
5952cab237bSDimitry Andric return false;
5962cab237bSDimitry Andric
5972cab237bSDimitry Andric Type *PtrElemTy;
5982cab237bSDimitry Andric if (Size <= 8)
5992cab237bSDimitry Andric PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
6002cab237bSDimitry Andric else
6012cab237bSDimitry Andric PtrElemTy = VectorType::get(Type::getInt64Ty(Ctx), Size / 8);
6022cab237bSDimitry Andric unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
6032cab237bSDimitry Andric auto PtrArg = CI->getArgOperand(PtrArgLoc);
6042cab237bSDimitry Andric unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
6052cab237bSDimitry Andric auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
6062cab237bSDimitry Andric
6072cab237bSDimitry Andric SmallVector<llvm::Type *, 6> ArgTys;
6082cab237bSDimitry Andric for (unsigned I = 0; I != PtrArgLoc; ++I)
6092cab237bSDimitry Andric ArgTys.push_back(CI->getArgOperand(I)->getType());
6102cab237bSDimitry Andric ArgTys.push_back(PtrTy);
6112cab237bSDimitry Andric
6122cab237bSDimitry Andric Name = Name + "_" + std::to_string(Size);
6132cab237bSDimitry Andric auto *FTy = FunctionType::get(Callee->getReturnType(),
6142cab237bSDimitry Andric ArrayRef<Type *>(ArgTys), false);
6152cab237bSDimitry Andric AMDGPULibFunc NewLibFunc(Name, FTy);
6162cab237bSDimitry Andric auto *F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
6172cab237bSDimitry Andric if (!F)
6182cab237bSDimitry Andric return false;
6192cab237bSDimitry Andric
6202cab237bSDimitry Andric auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
6212cab237bSDimitry Andric SmallVector<Value *, 6> Args;
6222cab237bSDimitry Andric for (unsigned I = 0; I != PtrArgLoc; ++I)
6232cab237bSDimitry Andric Args.push_back(CI->getArgOperand(I));
6242cab237bSDimitry Andric Args.push_back(BCast);
6252cab237bSDimitry Andric
6262cab237bSDimitry Andric auto *NCI = B.CreateCall(F, Args);
6272cab237bSDimitry Andric NCI->setAttributes(CI->getAttributes());
6282cab237bSDimitry Andric CI->replaceAllUsesWith(NCI);
6292cab237bSDimitry Andric CI->dropAllReferences();
6302cab237bSDimitry Andric CI->eraseFromParent();
6312cab237bSDimitry Andric
6322cab237bSDimitry Andric return true;
6332cab237bSDimitry Andric }
6342cab237bSDimitry Andric
6352cab237bSDimitry Andric // This function returns false if no change; return true otherwise.
fold(CallInst * CI,AliasAnalysis * AA)6362cab237bSDimitry Andric bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
6372cab237bSDimitry Andric this->CI = CI;
6382cab237bSDimitry Andric Function *Callee = CI->getCalledFunction();
6392cab237bSDimitry Andric
6402cab237bSDimitry Andric // Ignore indirect calls.
6412cab237bSDimitry Andric if (Callee == 0) return false;
6422cab237bSDimitry Andric
6432cab237bSDimitry Andric FuncInfo FInfo;
6442cab237bSDimitry Andric if (!parseFunctionName(Callee->getName(), &FInfo))
6452cab237bSDimitry Andric return false;
6462cab237bSDimitry Andric
6472cab237bSDimitry Andric // Further check the number of arguments to see if they match.
6482cab237bSDimitry Andric if (CI->getNumArgOperands() != FInfo.getNumArgs())
6492cab237bSDimitry Andric return false;
6502cab237bSDimitry Andric
6512cab237bSDimitry Andric BasicBlock *BB = CI->getParent();
6522cab237bSDimitry Andric LLVMContext &Context = CI->getParent()->getContext();
6532cab237bSDimitry Andric IRBuilder<> B(Context);
6542cab237bSDimitry Andric
6552cab237bSDimitry Andric // Set the builder to the instruction after the call.
6562cab237bSDimitry Andric B.SetInsertPoint(BB, CI->getIterator());
6572cab237bSDimitry Andric
6582cab237bSDimitry Andric // Copy fast flags from the original call.
6592cab237bSDimitry Andric if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
6602cab237bSDimitry Andric B.setFastMathFlags(FPOp->getFastMathFlags());
6612cab237bSDimitry Andric
6622cab237bSDimitry Andric if (TDOFold(CI, FInfo))
6632cab237bSDimitry Andric return true;
6642cab237bSDimitry Andric
6652cab237bSDimitry Andric // Under unsafe-math, evaluate calls if possible.
6662cab237bSDimitry Andric // According to Brian Sumner, we can do this for all f32 function calls
6672cab237bSDimitry Andric // using host's double function calls.
6682cab237bSDimitry Andric if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
6692cab237bSDimitry Andric return true;
6702cab237bSDimitry Andric
6712cab237bSDimitry Andric // Specilized optimizations for each function call
6722cab237bSDimitry Andric switch (FInfo.getId()) {
6732cab237bSDimitry Andric case AMDGPULibFunc::EI_RECIP:
6742cab237bSDimitry Andric // skip vector function
6752cab237bSDimitry Andric assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
6762cab237bSDimitry Andric FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
6772cab237bSDimitry Andric "recip must be an either native or half function");
6782cab237bSDimitry Andric return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
6792cab237bSDimitry Andric
6802cab237bSDimitry Andric case AMDGPULibFunc::EI_DIVIDE:
6812cab237bSDimitry Andric // skip vector function
6822cab237bSDimitry Andric assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
6832cab237bSDimitry Andric FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
6842cab237bSDimitry Andric "divide must be an either native or half function");
6852cab237bSDimitry Andric return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
6862cab237bSDimitry Andric
6872cab237bSDimitry Andric case AMDGPULibFunc::EI_POW:
6882cab237bSDimitry Andric case AMDGPULibFunc::EI_POWR:
6892cab237bSDimitry Andric case AMDGPULibFunc::EI_POWN:
6902cab237bSDimitry Andric return fold_pow(CI, B, FInfo);
6912cab237bSDimitry Andric
6922cab237bSDimitry Andric case AMDGPULibFunc::EI_ROOTN:
6932cab237bSDimitry Andric // skip vector function
6942cab237bSDimitry Andric return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
6952cab237bSDimitry Andric
6962cab237bSDimitry Andric case AMDGPULibFunc::EI_FMA:
6972cab237bSDimitry Andric case AMDGPULibFunc::EI_MAD:
6982cab237bSDimitry Andric case AMDGPULibFunc::EI_NFMA:
6992cab237bSDimitry Andric // skip vector function
7002cab237bSDimitry Andric return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
7012cab237bSDimitry Andric
7022cab237bSDimitry Andric case AMDGPULibFunc::EI_SQRT:
7032cab237bSDimitry Andric return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
7042cab237bSDimitry Andric case AMDGPULibFunc::EI_COS:
7052cab237bSDimitry Andric case AMDGPULibFunc::EI_SIN:
7062cab237bSDimitry Andric if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
7072cab237bSDimitry Andric getArgType(FInfo) == AMDGPULibFunc::F64)
7082cab237bSDimitry Andric && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
7092cab237bSDimitry Andric return fold_sincos(CI, B, AA);
7102cab237bSDimitry Andric
7112cab237bSDimitry Andric break;
7122cab237bSDimitry Andric case AMDGPULibFunc::EI_READ_PIPE_2:
7132cab237bSDimitry Andric case AMDGPULibFunc::EI_READ_PIPE_4:
7142cab237bSDimitry Andric case AMDGPULibFunc::EI_WRITE_PIPE_2:
7152cab237bSDimitry Andric case AMDGPULibFunc::EI_WRITE_PIPE_4:
7162cab237bSDimitry Andric return fold_read_write_pipe(CI, B, FInfo);
7172cab237bSDimitry Andric
7182cab237bSDimitry Andric default:
7192cab237bSDimitry Andric break;
7202cab237bSDimitry Andric }
7212cab237bSDimitry Andric
7222cab237bSDimitry Andric return false;
7232cab237bSDimitry Andric }
7242cab237bSDimitry Andric
TDOFold(CallInst * CI,const FuncInfo & FInfo)7252cab237bSDimitry Andric bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
7262cab237bSDimitry Andric // Table-Driven optimization
7272cab237bSDimitry Andric const TableRef tr = getOptTable(FInfo.getId());
7282cab237bSDimitry Andric if (tr.size==0)
7292cab237bSDimitry Andric return false;
7302cab237bSDimitry Andric
7312cab237bSDimitry Andric int const sz = (int)tr.size;
7322cab237bSDimitry Andric const TableEntry * const ftbl = tr.table;
7332cab237bSDimitry Andric Value *opr0 = CI->getArgOperand(0);
7342cab237bSDimitry Andric
7352cab237bSDimitry Andric if (getVecSize(FInfo) > 1) {
7362cab237bSDimitry Andric if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
7372cab237bSDimitry Andric SmallVector<double, 0> DVal;
7382cab237bSDimitry Andric for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
7392cab237bSDimitry Andric ConstantFP *eltval = dyn_cast<ConstantFP>(
7402cab237bSDimitry Andric CV->getElementAsConstant((unsigned)eltNo));
7412cab237bSDimitry Andric assert(eltval && "Non-FP arguments in math function!");
7422cab237bSDimitry Andric bool found = false;
7432cab237bSDimitry Andric for (int i=0; i < sz; ++i) {
7442cab237bSDimitry Andric if (eltval->isExactlyValue(ftbl[i].input)) {
7452cab237bSDimitry Andric DVal.push_back(ftbl[i].result);
7462cab237bSDimitry Andric found = true;
7472cab237bSDimitry Andric break;
7482cab237bSDimitry Andric }
7492cab237bSDimitry Andric }
7502cab237bSDimitry Andric if (!found) {
7512cab237bSDimitry Andric // This vector constants not handled yet.
7522cab237bSDimitry Andric return false;
7532cab237bSDimitry Andric }
7542cab237bSDimitry Andric }
7552cab237bSDimitry Andric LLVMContext &context = CI->getParent()->getParent()->getContext();
7562cab237bSDimitry Andric Constant *nval;
7572cab237bSDimitry Andric if (getArgType(FInfo) == AMDGPULibFunc::F32) {
7582cab237bSDimitry Andric SmallVector<float, 0> FVal;
7592cab237bSDimitry Andric for (unsigned i = 0; i < DVal.size(); ++i) {
7602cab237bSDimitry Andric FVal.push_back((float)DVal[i]);
7612cab237bSDimitry Andric }
7622cab237bSDimitry Andric ArrayRef<float> tmp(FVal);
7632cab237bSDimitry Andric nval = ConstantDataVector::get(context, tmp);
7642cab237bSDimitry Andric } else { // F64
7652cab237bSDimitry Andric ArrayRef<double> tmp(DVal);
7662cab237bSDimitry Andric nval = ConstantDataVector::get(context, tmp);
7672cab237bSDimitry Andric }
7684ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
7692cab237bSDimitry Andric replaceCall(nval);
7702cab237bSDimitry Andric return true;
7712cab237bSDimitry Andric }
7722cab237bSDimitry Andric } else {
7732cab237bSDimitry Andric // Scalar version
7742cab237bSDimitry Andric if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
7752cab237bSDimitry Andric for (int i = 0; i < sz; ++i) {
7762cab237bSDimitry Andric if (CF->isExactlyValue(ftbl[i].input)) {
7772cab237bSDimitry Andric Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
7784ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
7792cab237bSDimitry Andric replaceCall(nval);
7802cab237bSDimitry Andric return true;
7812cab237bSDimitry Andric }
7822cab237bSDimitry Andric }
7832cab237bSDimitry Andric }
7842cab237bSDimitry Andric }
7852cab237bSDimitry Andric
7862cab237bSDimitry Andric return false;
7872cab237bSDimitry Andric }
7882cab237bSDimitry Andric
replaceWithNative(CallInst * CI,const FuncInfo & FInfo)7892cab237bSDimitry Andric bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
7902cab237bSDimitry Andric Module *M = CI->getModule();
7912cab237bSDimitry Andric if (getArgType(FInfo) != AMDGPULibFunc::F32 ||
7922cab237bSDimitry Andric FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
7932cab237bSDimitry Andric !HasNative(FInfo.getId()))
7942cab237bSDimitry Andric return false;
7952cab237bSDimitry Andric
7962cab237bSDimitry Andric AMDGPULibFunc nf = FInfo;
7972cab237bSDimitry Andric nf.setPrefix(AMDGPULibFunc::NATIVE);
7982cab237bSDimitry Andric if (Constant *FPExpr = getFunction(M, nf)) {
7994ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
8002cab237bSDimitry Andric
8012cab237bSDimitry Andric CI->setCalledFunction(FPExpr);
8022cab237bSDimitry Andric
8034ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << *CI << '\n');
8042cab237bSDimitry Andric
8052cab237bSDimitry Andric return true;
8062cab237bSDimitry Andric }
8072cab237bSDimitry Andric return false;
8082cab237bSDimitry Andric }
8092cab237bSDimitry Andric
8102cab237bSDimitry Andric // [native_]half_recip(c) ==> 1.0/c
fold_recip(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)8112cab237bSDimitry Andric bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
8122cab237bSDimitry Andric const FuncInfo &FInfo) {
8132cab237bSDimitry Andric Value *opr0 = CI->getArgOperand(0);
8142cab237bSDimitry Andric if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
8152cab237bSDimitry Andric // Just create a normal div. Later, InstCombine will be able
8162cab237bSDimitry Andric // to compute the divide into a constant (avoid check float infinity
8172cab237bSDimitry Andric // or subnormal at this point).
8182cab237bSDimitry Andric Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
8192cab237bSDimitry Andric opr0,
8202cab237bSDimitry Andric "recip2div");
8214ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
8222cab237bSDimitry Andric replaceCall(nval);
8232cab237bSDimitry Andric return true;
8242cab237bSDimitry Andric }
8252cab237bSDimitry Andric return false;
8262cab237bSDimitry Andric }
8272cab237bSDimitry Andric
8282cab237bSDimitry Andric // [native_]half_divide(x, c) ==> x/c
fold_divide(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)8292cab237bSDimitry Andric bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
8302cab237bSDimitry Andric const FuncInfo &FInfo) {
8312cab237bSDimitry Andric Value *opr0 = CI->getArgOperand(0);
8322cab237bSDimitry Andric Value *opr1 = CI->getArgOperand(1);
8332cab237bSDimitry Andric ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
8342cab237bSDimitry Andric ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
8352cab237bSDimitry Andric
8362cab237bSDimitry Andric if ((CF0 && CF1) || // both are constants
8372cab237bSDimitry Andric (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
8382cab237bSDimitry Andric // CF1 is constant && f32 divide
8392cab237bSDimitry Andric {
8402cab237bSDimitry Andric Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
8412cab237bSDimitry Andric opr1, "__div2recip");
8422cab237bSDimitry Andric Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
8432cab237bSDimitry Andric replaceCall(nval);
8442cab237bSDimitry Andric return true;
8452cab237bSDimitry Andric }
8462cab237bSDimitry Andric return false;
8472cab237bSDimitry Andric }
8482cab237bSDimitry Andric
8492cab237bSDimitry Andric namespace llvm {
log2(double V)8502cab237bSDimitry Andric static double log2(double V) {
8512cab237bSDimitry Andric #if _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE || _POSIX_C_SOURCE >= 200112L
8522cab237bSDimitry Andric return ::log2(V);
8532cab237bSDimitry Andric #else
8542cab237bSDimitry Andric return log(V) / 0.693147180559945309417;
8552cab237bSDimitry Andric #endif
8562cab237bSDimitry Andric }
8572cab237bSDimitry Andric }
8582cab237bSDimitry Andric
fold_pow(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)8592cab237bSDimitry Andric bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
8602cab237bSDimitry Andric const FuncInfo &FInfo) {
8612cab237bSDimitry Andric assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
8622cab237bSDimitry Andric FInfo.getId() == AMDGPULibFunc::EI_POWR ||
8632cab237bSDimitry Andric FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
8642cab237bSDimitry Andric "fold_pow: encounter a wrong function call");
8652cab237bSDimitry Andric
8662cab237bSDimitry Andric Value *opr0, *opr1;
8672cab237bSDimitry Andric ConstantFP *CF;
8682cab237bSDimitry Andric ConstantInt *CINT;
8692cab237bSDimitry Andric ConstantAggregateZero *CZero;
8702cab237bSDimitry Andric Type *eltType;
8712cab237bSDimitry Andric
8722cab237bSDimitry Andric opr0 = CI->getArgOperand(0);
8732cab237bSDimitry Andric opr1 = CI->getArgOperand(1);
8742cab237bSDimitry Andric CZero = dyn_cast<ConstantAggregateZero>(opr1);
8752cab237bSDimitry Andric if (getVecSize(FInfo) == 1) {
8762cab237bSDimitry Andric eltType = opr0->getType();
8772cab237bSDimitry Andric CF = dyn_cast<ConstantFP>(opr1);
8782cab237bSDimitry Andric CINT = dyn_cast<ConstantInt>(opr1);
8792cab237bSDimitry Andric } else {
8802cab237bSDimitry Andric VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
8812cab237bSDimitry Andric assert(VTy && "Oprand of vector function should be of vectortype");
8822cab237bSDimitry Andric eltType = VTy->getElementType();
8832cab237bSDimitry Andric ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
8842cab237bSDimitry Andric
8852cab237bSDimitry Andric // Now, only Handle vector const whose elements have the same value.
8862cab237bSDimitry Andric CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
8872cab237bSDimitry Andric CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
8882cab237bSDimitry Andric }
8892cab237bSDimitry Andric
8902cab237bSDimitry Andric // No unsafe math , no constant argument, do nothing
8912cab237bSDimitry Andric if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
8922cab237bSDimitry Andric return false;
8932cab237bSDimitry Andric
8942cab237bSDimitry Andric // 0x1111111 means that we don't do anything for this call.
8952cab237bSDimitry Andric int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
8962cab237bSDimitry Andric
8972cab237bSDimitry Andric if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
8982cab237bSDimitry Andric // pow/powr/pown(x, 0) == 1
8994ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
9002cab237bSDimitry Andric Constant *cnval = ConstantFP::get(eltType, 1.0);
9012cab237bSDimitry Andric if (getVecSize(FInfo) > 1) {
9022cab237bSDimitry Andric cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9032cab237bSDimitry Andric }
9042cab237bSDimitry Andric replaceCall(cnval);
9052cab237bSDimitry Andric return true;
9062cab237bSDimitry Andric }
9072cab237bSDimitry Andric if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
9082cab237bSDimitry Andric // pow/powr/pown(x, 1.0) = x
9094ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
9102cab237bSDimitry Andric replaceCall(opr0);
9112cab237bSDimitry Andric return true;
9122cab237bSDimitry Andric }
9132cab237bSDimitry Andric if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
9142cab237bSDimitry Andric // pow/powr/pown(x, 2.0) = x*x
9154ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
9164ba319b5SDimitry Andric << "\n");
9172cab237bSDimitry Andric Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
9182cab237bSDimitry Andric replaceCall(nval);
9192cab237bSDimitry Andric return true;
9202cab237bSDimitry Andric }
9212cab237bSDimitry Andric if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
9222cab237bSDimitry Andric // pow/powr/pown(x, -1.0) = 1.0/x
9234ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
9242cab237bSDimitry Andric Constant *cnval = ConstantFP::get(eltType, 1.0);
9252cab237bSDimitry Andric if (getVecSize(FInfo) > 1) {
9262cab237bSDimitry Andric cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9272cab237bSDimitry Andric }
9282cab237bSDimitry Andric Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
9292cab237bSDimitry Andric replaceCall(nval);
9302cab237bSDimitry Andric return true;
9312cab237bSDimitry Andric }
9322cab237bSDimitry Andric
9332cab237bSDimitry Andric Module *M = CI->getModule();
9342cab237bSDimitry Andric if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
9352cab237bSDimitry Andric // pow[r](x, [-]0.5) = sqrt(x)
9362cab237bSDimitry Andric bool issqrt = CF->isExactlyValue(0.5);
9372cab237bSDimitry Andric if (Constant *FPExpr = getFunction(M,
9382cab237bSDimitry Andric AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
9392cab237bSDimitry Andric : AMDGPULibFunc::EI_RSQRT, FInfo))) {
9404ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
9412cab237bSDimitry Andric << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
9422cab237bSDimitry Andric Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
9432cab237bSDimitry Andric : "__pow2rsqrt");
9442cab237bSDimitry Andric replaceCall(nval);
9452cab237bSDimitry Andric return true;
9462cab237bSDimitry Andric }
9472cab237bSDimitry Andric }
9482cab237bSDimitry Andric
9492cab237bSDimitry Andric if (!isUnsafeMath(CI))
9502cab237bSDimitry Andric return false;
9512cab237bSDimitry Andric
9522cab237bSDimitry Andric // Unsafe Math optimization
9532cab237bSDimitry Andric
9542cab237bSDimitry Andric // Remember that ci_opr1 is set if opr1 is integral
9552cab237bSDimitry Andric if (CF) {
9562cab237bSDimitry Andric double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
9572cab237bSDimitry Andric ? (double)CF->getValueAPF().convertToFloat()
9582cab237bSDimitry Andric : CF->getValueAPF().convertToDouble();
9592cab237bSDimitry Andric int ival = (int)dval;
9602cab237bSDimitry Andric if ((double)ival == dval) {
9612cab237bSDimitry Andric ci_opr1 = ival;
9622cab237bSDimitry Andric } else
9632cab237bSDimitry Andric ci_opr1 = 0x11111111;
9642cab237bSDimitry Andric }
9652cab237bSDimitry Andric
9662cab237bSDimitry Andric // pow/powr/pown(x, c) = [1/](x*x*..x); where
9672cab237bSDimitry Andric // trunc(c) == c && the number of x == c && |c| <= 12
9682cab237bSDimitry Andric unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
9692cab237bSDimitry Andric if (abs_opr1 <= 12) {
9702cab237bSDimitry Andric Constant *cnval;
9712cab237bSDimitry Andric Value *nval;
9722cab237bSDimitry Andric if (abs_opr1 == 0) {
9732cab237bSDimitry Andric cnval = ConstantFP::get(eltType, 1.0);
9742cab237bSDimitry Andric if (getVecSize(FInfo) > 1) {
9752cab237bSDimitry Andric cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9762cab237bSDimitry Andric }
9772cab237bSDimitry Andric nval = cnval;
9782cab237bSDimitry Andric } else {
9792cab237bSDimitry Andric Value *valx2 = nullptr;
9802cab237bSDimitry Andric nval = nullptr;
9812cab237bSDimitry Andric while (abs_opr1 > 0) {
9822cab237bSDimitry Andric valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
9832cab237bSDimitry Andric if (abs_opr1 & 1) {
9842cab237bSDimitry Andric nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
9852cab237bSDimitry Andric }
9862cab237bSDimitry Andric abs_opr1 >>= 1;
9872cab237bSDimitry Andric }
9882cab237bSDimitry Andric }
9892cab237bSDimitry Andric
9902cab237bSDimitry Andric if (ci_opr1 < 0) {
9912cab237bSDimitry Andric cnval = ConstantFP::get(eltType, 1.0);
9922cab237bSDimitry Andric if (getVecSize(FInfo) > 1) {
9932cab237bSDimitry Andric cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9942cab237bSDimitry Andric }
9952cab237bSDimitry Andric nval = B.CreateFDiv(cnval, nval, "__1powprod");
9962cab237bSDimitry Andric }
9974ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
9984ba319b5SDimitry Andric << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
9994ba319b5SDimitry Andric << ")\n");
10002cab237bSDimitry Andric replaceCall(nval);
10012cab237bSDimitry Andric return true;
10022cab237bSDimitry Andric }
10032cab237bSDimitry Andric
10042cab237bSDimitry Andric // powr ---> exp2(y * log2(x))
10052cab237bSDimitry Andric // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
10062cab237bSDimitry Andric Constant *ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2,
10072cab237bSDimitry Andric FInfo));
10082cab237bSDimitry Andric if (!ExpExpr)
10092cab237bSDimitry Andric return false;
10102cab237bSDimitry Andric
10112cab237bSDimitry Andric bool needlog = false;
10122cab237bSDimitry Andric bool needabs = false;
10132cab237bSDimitry Andric bool needcopysign = false;
10142cab237bSDimitry Andric Constant *cnval = nullptr;
10152cab237bSDimitry Andric if (getVecSize(FInfo) == 1) {
10162cab237bSDimitry Andric CF = dyn_cast<ConstantFP>(opr0);
10172cab237bSDimitry Andric
10182cab237bSDimitry Andric if (CF) {
10192cab237bSDimitry Andric double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
10202cab237bSDimitry Andric ? (double)CF->getValueAPF().convertToFloat()
10212cab237bSDimitry Andric : CF->getValueAPF().convertToDouble();
10222cab237bSDimitry Andric
10232cab237bSDimitry Andric V = log2(std::abs(V));
10242cab237bSDimitry Andric cnval = ConstantFP::get(eltType, V);
10252cab237bSDimitry Andric needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
10262cab237bSDimitry Andric CF->isNegative();
10272cab237bSDimitry Andric } else {
10282cab237bSDimitry Andric needlog = true;
10292cab237bSDimitry Andric needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
10302cab237bSDimitry Andric (!CF || CF->isNegative());
10312cab237bSDimitry Andric }
10322cab237bSDimitry Andric } else {
10332cab237bSDimitry Andric ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
10342cab237bSDimitry Andric
10352cab237bSDimitry Andric if (!CDV) {
10362cab237bSDimitry Andric needlog = true;
10372cab237bSDimitry Andric needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
10382cab237bSDimitry Andric } else {
10392cab237bSDimitry Andric assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
10402cab237bSDimitry Andric "Wrong vector size detected");
10412cab237bSDimitry Andric
10422cab237bSDimitry Andric SmallVector<double, 0> DVal;
10432cab237bSDimitry Andric for (int i=0; i < getVecSize(FInfo); ++i) {
10442cab237bSDimitry Andric double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
10452cab237bSDimitry Andric ? (double)CDV->getElementAsFloat(i)
10462cab237bSDimitry Andric : CDV->getElementAsDouble(i);
10472cab237bSDimitry Andric if (V < 0.0) needcopysign = true;
10482cab237bSDimitry Andric V = log2(std::abs(V));
10492cab237bSDimitry Andric DVal.push_back(V);
10502cab237bSDimitry Andric }
10512cab237bSDimitry Andric if (getArgType(FInfo) == AMDGPULibFunc::F32) {
10522cab237bSDimitry Andric SmallVector<float, 0> FVal;
10532cab237bSDimitry Andric for (unsigned i=0; i < DVal.size(); ++i) {
10542cab237bSDimitry Andric FVal.push_back((float)DVal[i]);
10552cab237bSDimitry Andric }
10562cab237bSDimitry Andric ArrayRef<float> tmp(FVal);
10572cab237bSDimitry Andric cnval = ConstantDataVector::get(M->getContext(), tmp);
10582cab237bSDimitry Andric } else {
10592cab237bSDimitry Andric ArrayRef<double> tmp(DVal);
10602cab237bSDimitry Andric cnval = ConstantDataVector::get(M->getContext(), tmp);
10612cab237bSDimitry Andric }
10622cab237bSDimitry Andric }
10632cab237bSDimitry Andric }
10642cab237bSDimitry Andric
10652cab237bSDimitry Andric if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
10662cab237bSDimitry Andric // We cannot handle corner cases for a general pow() function, give up
10672cab237bSDimitry Andric // unless y is a constant integral value. Then proceed as if it were pown.
10682cab237bSDimitry Andric if (getVecSize(FInfo) == 1) {
10692cab237bSDimitry Andric if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
10702cab237bSDimitry Andric double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
10712cab237bSDimitry Andric ? (double)CF->getValueAPF().convertToFloat()
10722cab237bSDimitry Andric : CF->getValueAPF().convertToDouble();
10732cab237bSDimitry Andric if (y != (double)(int64_t)y)
10742cab237bSDimitry Andric return false;
10752cab237bSDimitry Andric } else
10762cab237bSDimitry Andric return false;
10772cab237bSDimitry Andric } else {
10782cab237bSDimitry Andric if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
10792cab237bSDimitry Andric for (int i=0; i < getVecSize(FInfo); ++i) {
10802cab237bSDimitry Andric double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
10812cab237bSDimitry Andric ? (double)CDV->getElementAsFloat(i)
10822cab237bSDimitry Andric : CDV->getElementAsDouble(i);
10832cab237bSDimitry Andric if (y != (double)(int64_t)y)
10842cab237bSDimitry Andric return false;
10852cab237bSDimitry Andric }
10862cab237bSDimitry Andric } else
10872cab237bSDimitry Andric return false;
10882cab237bSDimitry Andric }
10892cab237bSDimitry Andric }
10902cab237bSDimitry Andric
10912cab237bSDimitry Andric Value *nval;
10922cab237bSDimitry Andric if (needabs) {
10932cab237bSDimitry Andric Constant *AbsExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS,
10942cab237bSDimitry Andric FInfo));
10952cab237bSDimitry Andric if (!AbsExpr)
10962cab237bSDimitry Andric return false;
10972cab237bSDimitry Andric nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
10982cab237bSDimitry Andric } else {
10992cab237bSDimitry Andric nval = cnval ? cnval : opr0;
11002cab237bSDimitry Andric }
11012cab237bSDimitry Andric if (needlog) {
11022cab237bSDimitry Andric Constant *LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2,
11032cab237bSDimitry Andric FInfo));
11042cab237bSDimitry Andric if (!LogExpr)
11052cab237bSDimitry Andric return false;
11062cab237bSDimitry Andric nval = CreateCallEx(B,LogExpr, nval, "__log2");
11072cab237bSDimitry Andric }
11082cab237bSDimitry Andric
11092cab237bSDimitry Andric if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
11102cab237bSDimitry Andric // convert int(32) to fp(f32 or f64)
11112cab237bSDimitry Andric opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
11122cab237bSDimitry Andric }
11132cab237bSDimitry Andric nval = B.CreateFMul(opr1, nval, "__ylogx");
11142cab237bSDimitry Andric nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
11152cab237bSDimitry Andric
11162cab237bSDimitry Andric if (needcopysign) {
11172cab237bSDimitry Andric Value *opr_n;
11182cab237bSDimitry Andric Type* rTy = opr0->getType();
11192cab237bSDimitry Andric Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
11202cab237bSDimitry Andric Type *nTy = nTyS;
11212cab237bSDimitry Andric if (const VectorType *vTy = dyn_cast<VectorType>(rTy))
11222cab237bSDimitry Andric nTy = VectorType::get(nTyS, vTy->getNumElements());
11232cab237bSDimitry Andric unsigned size = nTy->getScalarSizeInBits();
11242cab237bSDimitry Andric opr_n = CI->getArgOperand(1);
11252cab237bSDimitry Andric if (opr_n->getType()->isIntegerTy())
11262cab237bSDimitry Andric opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
11272cab237bSDimitry Andric else
11282cab237bSDimitry Andric opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
11292cab237bSDimitry Andric
11302cab237bSDimitry Andric Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
11312cab237bSDimitry Andric sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
11322cab237bSDimitry Andric nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
11332cab237bSDimitry Andric nval = B.CreateBitCast(nval, opr0->getType());
11342cab237bSDimitry Andric }
11352cab237bSDimitry Andric
11364ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
11372cab237bSDimitry Andric << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
11382cab237bSDimitry Andric replaceCall(nval);
11392cab237bSDimitry Andric
11402cab237bSDimitry Andric return true;
11412cab237bSDimitry Andric }
11422cab237bSDimitry Andric
fold_rootn(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)11432cab237bSDimitry Andric bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
11442cab237bSDimitry Andric const FuncInfo &FInfo) {
11452cab237bSDimitry Andric Value *opr0 = CI->getArgOperand(0);
11462cab237bSDimitry Andric Value *opr1 = CI->getArgOperand(1);
11472cab237bSDimitry Andric
11482cab237bSDimitry Andric ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
11492cab237bSDimitry Andric if (!CINT) {
11502cab237bSDimitry Andric return false;
11512cab237bSDimitry Andric }
11522cab237bSDimitry Andric int ci_opr1 = (int)CINT->getSExtValue();
11532cab237bSDimitry Andric if (ci_opr1 == 1) { // rootn(x, 1) = x
11544ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
11552cab237bSDimitry Andric replaceCall(opr0);
11562cab237bSDimitry Andric return true;
11572cab237bSDimitry Andric }
11582cab237bSDimitry Andric if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
11592cab237bSDimitry Andric std::vector<const Type*> ParamsTys;
11602cab237bSDimitry Andric ParamsTys.push_back(opr0->getType());
11612cab237bSDimitry Andric Module *M = CI->getModule();
11622cab237bSDimitry Andric if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT,
11632cab237bSDimitry Andric FInfo))) {
11644ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
11652cab237bSDimitry Andric Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
11662cab237bSDimitry Andric replaceCall(nval);
11672cab237bSDimitry Andric return true;
11682cab237bSDimitry Andric }
11692cab237bSDimitry Andric } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
11702cab237bSDimitry Andric Module *M = CI->getModule();
11712cab237bSDimitry Andric if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT,
11722cab237bSDimitry Andric FInfo))) {
11734ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
11742cab237bSDimitry Andric Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
11752cab237bSDimitry Andric replaceCall(nval);
11762cab237bSDimitry Andric return true;
11772cab237bSDimitry Andric }
11782cab237bSDimitry Andric } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
11794ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
11802cab237bSDimitry Andric Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
11812cab237bSDimitry Andric opr0,
11822cab237bSDimitry Andric "__rootn2div");
11832cab237bSDimitry Andric replaceCall(nval);
11842cab237bSDimitry Andric return true;
11852cab237bSDimitry Andric } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
11862cab237bSDimitry Andric std::vector<const Type*> ParamsTys;
11872cab237bSDimitry Andric ParamsTys.push_back(opr0->getType());
11882cab237bSDimitry Andric Module *M = CI->getModule();
11892cab237bSDimitry Andric if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT,
11902cab237bSDimitry Andric FInfo))) {
11914ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
11924ba319b5SDimitry Andric << ")\n");
11932cab237bSDimitry Andric Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
11942cab237bSDimitry Andric replaceCall(nval);
11952cab237bSDimitry Andric return true;
11962cab237bSDimitry Andric }
11972cab237bSDimitry Andric }
11982cab237bSDimitry Andric return false;
11992cab237bSDimitry Andric }
12002cab237bSDimitry Andric
fold_fma_mad(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)12012cab237bSDimitry Andric bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
12022cab237bSDimitry Andric const FuncInfo &FInfo) {
12032cab237bSDimitry Andric Value *opr0 = CI->getArgOperand(0);
12042cab237bSDimitry Andric Value *opr1 = CI->getArgOperand(1);
12052cab237bSDimitry Andric Value *opr2 = CI->getArgOperand(2);
12062cab237bSDimitry Andric
12072cab237bSDimitry Andric ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
12082cab237bSDimitry Andric ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
12092cab237bSDimitry Andric if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
12102cab237bSDimitry Andric // fma/mad(a, b, c) = c if a=0 || b=0
12114ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
12122cab237bSDimitry Andric replaceCall(opr2);
12132cab237bSDimitry Andric return true;
12142cab237bSDimitry Andric }
12152cab237bSDimitry Andric if (CF0 && CF0->isExactlyValue(1.0f)) {
12162cab237bSDimitry Andric // fma/mad(a, b, c) = b+c if a=1
12174ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
12184ba319b5SDimitry Andric << "\n");
12192cab237bSDimitry Andric Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
12202cab237bSDimitry Andric replaceCall(nval);
12212cab237bSDimitry Andric return true;
12222cab237bSDimitry Andric }
12232cab237bSDimitry Andric if (CF1 && CF1->isExactlyValue(1.0f)) {
12242cab237bSDimitry Andric // fma/mad(a, b, c) = a+c if b=1
12254ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
12264ba319b5SDimitry Andric << "\n");
12272cab237bSDimitry Andric Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
12282cab237bSDimitry Andric replaceCall(nval);
12292cab237bSDimitry Andric return true;
12302cab237bSDimitry Andric }
12312cab237bSDimitry Andric if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
12322cab237bSDimitry Andric if (CF->isZero()) {
12332cab237bSDimitry Andric // fma/mad(a, b, c) = a*b if c=0
12344ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
12354ba319b5SDimitry Andric << *opr1 << "\n");
12362cab237bSDimitry Andric Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
12372cab237bSDimitry Andric replaceCall(nval);
12382cab237bSDimitry Andric return true;
12392cab237bSDimitry Andric }
12402cab237bSDimitry Andric }
12412cab237bSDimitry Andric
12422cab237bSDimitry Andric return false;
12432cab237bSDimitry Andric }
12442cab237bSDimitry Andric
12452cab237bSDimitry Andric // Get a scalar native builtin signle argument FP function
getNativeFunction(Module * M,const FuncInfo & FInfo)12462cab237bSDimitry Andric Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) {
12472cab237bSDimitry Andric if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
12482cab237bSDimitry Andric return nullptr;
12492cab237bSDimitry Andric FuncInfo nf = FInfo;
12502cab237bSDimitry Andric nf.setPrefix(AMDGPULibFunc::NATIVE);
12512cab237bSDimitry Andric return getFunction(M, nf);
12522cab237bSDimitry Andric }
12532cab237bSDimitry Andric
12542cab237bSDimitry Andric // fold sqrt -> native_sqrt (x)
fold_sqrt(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)12552cab237bSDimitry Andric bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
12562cab237bSDimitry Andric const FuncInfo &FInfo) {
12572cab237bSDimitry Andric if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
12582cab237bSDimitry Andric (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
12592cab237bSDimitry Andric if (Constant *FPExpr = getNativeFunction(
12602cab237bSDimitry Andric CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
12612cab237bSDimitry Andric Value *opr0 = CI->getArgOperand(0);
12624ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
12632cab237bSDimitry Andric << "sqrt(" << *opr0 << ")\n");
12642cab237bSDimitry Andric Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
12652cab237bSDimitry Andric replaceCall(nval);
12662cab237bSDimitry Andric return true;
12672cab237bSDimitry Andric }
12682cab237bSDimitry Andric }
12692cab237bSDimitry Andric return false;
12702cab237bSDimitry Andric }
12712cab237bSDimitry Andric
12722cab237bSDimitry Andric // fold sin, cos -> sincos.
fold_sincos(CallInst * CI,IRBuilder<> & B,AliasAnalysis * AA)12732cab237bSDimitry Andric bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
12742cab237bSDimitry Andric AliasAnalysis *AA) {
12752cab237bSDimitry Andric AMDGPULibFunc fInfo;
12762cab237bSDimitry Andric if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
12772cab237bSDimitry Andric return false;
12782cab237bSDimitry Andric
12792cab237bSDimitry Andric assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
12802cab237bSDimitry Andric fInfo.getId() == AMDGPULibFunc::EI_COS);
12812cab237bSDimitry Andric bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
12822cab237bSDimitry Andric
12832cab237bSDimitry Andric Value *CArgVal = CI->getArgOperand(0);
12842cab237bSDimitry Andric BasicBlock * const CBB = CI->getParent();
12852cab237bSDimitry Andric
12862cab237bSDimitry Andric int const MaxScan = 30;
12872cab237bSDimitry Andric
12882cab237bSDimitry Andric { // fold in load value.
12892cab237bSDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
12902cab237bSDimitry Andric if (LI && LI->getParent() == CBB) {
12912cab237bSDimitry Andric BasicBlock::iterator BBI = LI->getIterator();
12922cab237bSDimitry Andric Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
12932cab237bSDimitry Andric if (AvailableVal) {
12942cab237bSDimitry Andric CArgVal->replaceAllUsesWith(AvailableVal);
12952cab237bSDimitry Andric if (CArgVal->getNumUses() == 0)
12962cab237bSDimitry Andric LI->eraseFromParent();
12972cab237bSDimitry Andric CArgVal = CI->getArgOperand(0);
12982cab237bSDimitry Andric }
12992cab237bSDimitry Andric }
13002cab237bSDimitry Andric }
13012cab237bSDimitry Andric
13022cab237bSDimitry Andric Module *M = CI->getModule();
13032cab237bSDimitry Andric fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
13042cab237bSDimitry Andric std::string const PairName = fInfo.mangle();
13052cab237bSDimitry Andric
13062cab237bSDimitry Andric CallInst *UI = nullptr;
13072cab237bSDimitry Andric for (User* U : CArgVal->users()) {
13082cab237bSDimitry Andric CallInst *XI = dyn_cast_or_null<CallInst>(U);
13092cab237bSDimitry Andric if (!XI || XI == CI || XI->getParent() != CBB)
13102cab237bSDimitry Andric continue;
13112cab237bSDimitry Andric
13122cab237bSDimitry Andric Function *UCallee = XI->getCalledFunction();
13132cab237bSDimitry Andric if (!UCallee || !UCallee->getName().equals(PairName))
13142cab237bSDimitry Andric continue;
13152cab237bSDimitry Andric
13162cab237bSDimitry Andric BasicBlock::iterator BBI = CI->getIterator();
13172cab237bSDimitry Andric if (BBI == CI->getParent()->begin())
13182cab237bSDimitry Andric break;
13192cab237bSDimitry Andric --BBI;
13202cab237bSDimitry Andric for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
13212cab237bSDimitry Andric if (cast<Instruction>(BBI) == XI) {
13222cab237bSDimitry Andric UI = XI;
13232cab237bSDimitry Andric break;
13242cab237bSDimitry Andric }
13252cab237bSDimitry Andric }
13262cab237bSDimitry Andric if (UI) break;
13272cab237bSDimitry Andric }
13282cab237bSDimitry Andric
13292cab237bSDimitry Andric if (!UI) return false;
13302cab237bSDimitry Andric
13312cab237bSDimitry Andric // Merge the sin and cos.
13322cab237bSDimitry Andric
13332cab237bSDimitry Andric // for OpenCL 2.0 we have only generic implementation of sincos
13342cab237bSDimitry Andric // function.
13352cab237bSDimitry Andric AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
1336*b5893f02SDimitry Andric nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
13372cab237bSDimitry Andric Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
13382cab237bSDimitry Andric if (!Fsincos) return false;
13392cab237bSDimitry Andric
13402cab237bSDimitry Andric BasicBlock::iterator ItOld = B.GetInsertPoint();
13412cab237bSDimitry Andric AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
13422cab237bSDimitry Andric B.SetInsertPoint(UI);
13432cab237bSDimitry Andric
13442cab237bSDimitry Andric Value *P = Alloc;
13452cab237bSDimitry Andric Type *PTy = Fsincos->getFunctionType()->getParamType(1);
13462cab237bSDimitry Andric // The allocaInst allocates the memory in private address space. This need
13472cab237bSDimitry Andric // to be bitcasted to point to the address space of cos pointer type.
13482cab237bSDimitry Andric // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1349*b5893f02SDimitry Andric if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
13502cab237bSDimitry Andric P = B.CreateAddrSpaceCast(Alloc, PTy);
13512cab237bSDimitry Andric CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
13522cab237bSDimitry Andric
13534ba319b5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
13544ba319b5SDimitry Andric << *Call << "\n");
13552cab237bSDimitry Andric
13562cab237bSDimitry Andric if (!isSin) { // CI->cos, UI->sin
13572cab237bSDimitry Andric B.SetInsertPoint(&*ItOld);
13582cab237bSDimitry Andric UI->replaceAllUsesWith(&*Call);
13592cab237bSDimitry Andric Instruction *Reload = B.CreateLoad(Alloc);
13602cab237bSDimitry Andric CI->replaceAllUsesWith(Reload);
13612cab237bSDimitry Andric UI->eraseFromParent();
13622cab237bSDimitry Andric CI->eraseFromParent();
13632cab237bSDimitry Andric } else { // CI->sin, UI->cos
13642cab237bSDimitry Andric Instruction *Reload = B.CreateLoad(Alloc);
13652cab237bSDimitry Andric UI->replaceAllUsesWith(Reload);
13662cab237bSDimitry Andric CI->replaceAllUsesWith(Call);
13672cab237bSDimitry Andric UI->eraseFromParent();
13682cab237bSDimitry Andric CI->eraseFromParent();
13692cab237bSDimitry Andric }
13702cab237bSDimitry Andric return true;
13712cab237bSDimitry Andric }
13722cab237bSDimitry Andric
13732cab237bSDimitry Andric // Get insertion point at entry.
getEntryIns(CallInst * UI)13742cab237bSDimitry Andric BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
13752cab237bSDimitry Andric Function * Func = UI->getParent()->getParent();
13762cab237bSDimitry Andric BasicBlock * BB = &Func->getEntryBlock();
13772cab237bSDimitry Andric assert(BB && "Entry block not found!");
13782cab237bSDimitry Andric BasicBlock::iterator ItNew = BB->begin();
13792cab237bSDimitry Andric return ItNew;
13802cab237bSDimitry Andric }
13812cab237bSDimitry Andric
13822cab237bSDimitry Andric // Insert a AllocsInst at the beginning of function entry block.
insertAlloca(CallInst * UI,IRBuilder<> & B,const char * prefix)13832cab237bSDimitry Andric AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
13842cab237bSDimitry Andric const char *prefix) {
13852cab237bSDimitry Andric BasicBlock::iterator ItNew = getEntryIns(UI);
13862cab237bSDimitry Andric Function *UCallee = UI->getCalledFunction();
13872cab237bSDimitry Andric Type *RetType = UCallee->getReturnType();
13882cab237bSDimitry Andric B.SetInsertPoint(&*ItNew);
13892cab237bSDimitry Andric AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
13902cab237bSDimitry Andric std::string(prefix) + UI->getName());
13912cab237bSDimitry Andric Alloc->setAlignment(UCallee->getParent()->getDataLayout()
13922cab237bSDimitry Andric .getTypeAllocSize(RetType));
13932cab237bSDimitry Andric return Alloc;
13942cab237bSDimitry Andric }
13952cab237bSDimitry Andric
evaluateScalarMathFunc(FuncInfo & FInfo,double & Res0,double & Res1,Constant * copr0,Constant * copr1,Constant * copr2)13962cab237bSDimitry Andric bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
13972cab237bSDimitry Andric double& Res0, double& Res1,
13982cab237bSDimitry Andric Constant *copr0, Constant *copr1,
13992cab237bSDimitry Andric Constant *copr2) {
14002cab237bSDimitry Andric // By default, opr0/opr1/opr3 holds values of float/double type.
14012cab237bSDimitry Andric // If they are not float/double, each function has to its
14022cab237bSDimitry Andric // operand separately.
14032cab237bSDimitry Andric double opr0=0.0, opr1=0.0, opr2=0.0;
14042cab237bSDimitry Andric ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
14052cab237bSDimitry Andric ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
14062cab237bSDimitry Andric ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
14072cab237bSDimitry Andric if (fpopr0) {
14082cab237bSDimitry Andric opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14092cab237bSDimitry Andric ? fpopr0->getValueAPF().convertToDouble()
14102cab237bSDimitry Andric : (double)fpopr0->getValueAPF().convertToFloat();
14112cab237bSDimitry Andric }
14122cab237bSDimitry Andric
14132cab237bSDimitry Andric if (fpopr1) {
14142cab237bSDimitry Andric opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14152cab237bSDimitry Andric ? fpopr1->getValueAPF().convertToDouble()
14162cab237bSDimitry Andric : (double)fpopr1->getValueAPF().convertToFloat();
14172cab237bSDimitry Andric }
14182cab237bSDimitry Andric
14192cab237bSDimitry Andric if (fpopr2) {
14202cab237bSDimitry Andric opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14212cab237bSDimitry Andric ? fpopr2->getValueAPF().convertToDouble()
14222cab237bSDimitry Andric : (double)fpopr2->getValueAPF().convertToFloat();
14232cab237bSDimitry Andric }
14242cab237bSDimitry Andric
14252cab237bSDimitry Andric switch (FInfo.getId()) {
14262cab237bSDimitry Andric default : return false;
14272cab237bSDimitry Andric
14282cab237bSDimitry Andric case AMDGPULibFunc::EI_ACOS:
14292cab237bSDimitry Andric Res0 = acos(opr0);
14302cab237bSDimitry Andric return true;
14312cab237bSDimitry Andric
14322cab237bSDimitry Andric case AMDGPULibFunc::EI_ACOSH:
14332cab237bSDimitry Andric // acosh(x) == log(x + sqrt(x*x - 1))
14342cab237bSDimitry Andric Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
14352cab237bSDimitry Andric return true;
14362cab237bSDimitry Andric
14372cab237bSDimitry Andric case AMDGPULibFunc::EI_ACOSPI:
14382cab237bSDimitry Andric Res0 = acos(opr0) / MATH_PI;
14392cab237bSDimitry Andric return true;
14402cab237bSDimitry Andric
14412cab237bSDimitry Andric case AMDGPULibFunc::EI_ASIN:
14422cab237bSDimitry Andric Res0 = asin(opr0);
14432cab237bSDimitry Andric return true;
14442cab237bSDimitry Andric
14452cab237bSDimitry Andric case AMDGPULibFunc::EI_ASINH:
14462cab237bSDimitry Andric // asinh(x) == log(x + sqrt(x*x + 1))
14472cab237bSDimitry Andric Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
14482cab237bSDimitry Andric return true;
14492cab237bSDimitry Andric
14502cab237bSDimitry Andric case AMDGPULibFunc::EI_ASINPI:
14512cab237bSDimitry Andric Res0 = asin(opr0) / MATH_PI;
14522cab237bSDimitry Andric return true;
14532cab237bSDimitry Andric
14542cab237bSDimitry Andric case AMDGPULibFunc::EI_ATAN:
14552cab237bSDimitry Andric Res0 = atan(opr0);
14562cab237bSDimitry Andric return true;
14572cab237bSDimitry Andric
14582cab237bSDimitry Andric case AMDGPULibFunc::EI_ATANH:
14592cab237bSDimitry Andric // atanh(x) == (log(x+1) - log(x-1))/2;
14602cab237bSDimitry Andric Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
14612cab237bSDimitry Andric return true;
14622cab237bSDimitry Andric
14632cab237bSDimitry Andric case AMDGPULibFunc::EI_ATANPI:
14642cab237bSDimitry Andric Res0 = atan(opr0) / MATH_PI;
14652cab237bSDimitry Andric return true;
14662cab237bSDimitry Andric
14672cab237bSDimitry Andric case AMDGPULibFunc::EI_CBRT:
14682cab237bSDimitry Andric Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
14692cab237bSDimitry Andric return true;
14702cab237bSDimitry Andric
14712cab237bSDimitry Andric case AMDGPULibFunc::EI_COS:
14722cab237bSDimitry Andric Res0 = cos(opr0);
14732cab237bSDimitry Andric return true;
14742cab237bSDimitry Andric
14752cab237bSDimitry Andric case AMDGPULibFunc::EI_COSH:
14762cab237bSDimitry Andric Res0 = cosh(opr0);
14772cab237bSDimitry Andric return true;
14782cab237bSDimitry Andric
14792cab237bSDimitry Andric case AMDGPULibFunc::EI_COSPI:
14802cab237bSDimitry Andric Res0 = cos(MATH_PI * opr0);
14812cab237bSDimitry Andric return true;
14822cab237bSDimitry Andric
14832cab237bSDimitry Andric case AMDGPULibFunc::EI_EXP:
14842cab237bSDimitry Andric Res0 = exp(opr0);
14852cab237bSDimitry Andric return true;
14862cab237bSDimitry Andric
14872cab237bSDimitry Andric case AMDGPULibFunc::EI_EXP2:
14882cab237bSDimitry Andric Res0 = pow(2.0, opr0);
14892cab237bSDimitry Andric return true;
14902cab237bSDimitry Andric
14912cab237bSDimitry Andric case AMDGPULibFunc::EI_EXP10:
14922cab237bSDimitry Andric Res0 = pow(10.0, opr0);
14932cab237bSDimitry Andric return true;
14942cab237bSDimitry Andric
14952cab237bSDimitry Andric case AMDGPULibFunc::EI_EXPM1:
14962cab237bSDimitry Andric Res0 = exp(opr0) - 1.0;
14972cab237bSDimitry Andric return true;
14982cab237bSDimitry Andric
14992cab237bSDimitry Andric case AMDGPULibFunc::EI_LOG:
15002cab237bSDimitry Andric Res0 = log(opr0);
15012cab237bSDimitry Andric return true;
15022cab237bSDimitry Andric
15032cab237bSDimitry Andric case AMDGPULibFunc::EI_LOG2:
15042cab237bSDimitry Andric Res0 = log(opr0) / log(2.0);
15052cab237bSDimitry Andric return true;
15062cab237bSDimitry Andric
15072cab237bSDimitry Andric case AMDGPULibFunc::EI_LOG10:
15082cab237bSDimitry Andric Res0 = log(opr0) / log(10.0);
15092cab237bSDimitry Andric return true;
15102cab237bSDimitry Andric
15112cab237bSDimitry Andric case AMDGPULibFunc::EI_RSQRT:
15122cab237bSDimitry Andric Res0 = 1.0 / sqrt(opr0);
15132cab237bSDimitry Andric return true;
15142cab237bSDimitry Andric
15152cab237bSDimitry Andric case AMDGPULibFunc::EI_SIN:
15162cab237bSDimitry Andric Res0 = sin(opr0);
15172cab237bSDimitry Andric return true;
15182cab237bSDimitry Andric
15192cab237bSDimitry Andric case AMDGPULibFunc::EI_SINH:
15202cab237bSDimitry Andric Res0 = sinh(opr0);
15212cab237bSDimitry Andric return true;
15222cab237bSDimitry Andric
15232cab237bSDimitry Andric case AMDGPULibFunc::EI_SINPI:
15242cab237bSDimitry Andric Res0 = sin(MATH_PI * opr0);
15252cab237bSDimitry Andric return true;
15262cab237bSDimitry Andric
15272cab237bSDimitry Andric case AMDGPULibFunc::EI_SQRT:
15282cab237bSDimitry Andric Res0 = sqrt(opr0);
15292cab237bSDimitry Andric return true;
15302cab237bSDimitry Andric
15312cab237bSDimitry Andric case AMDGPULibFunc::EI_TAN:
15322cab237bSDimitry Andric Res0 = tan(opr0);
15332cab237bSDimitry Andric return true;
15342cab237bSDimitry Andric
15352cab237bSDimitry Andric case AMDGPULibFunc::EI_TANH:
15362cab237bSDimitry Andric Res0 = tanh(opr0);
15372cab237bSDimitry Andric return true;
15382cab237bSDimitry Andric
15392cab237bSDimitry Andric case AMDGPULibFunc::EI_TANPI:
15402cab237bSDimitry Andric Res0 = tan(MATH_PI * opr0);
15412cab237bSDimitry Andric return true;
15422cab237bSDimitry Andric
15432cab237bSDimitry Andric case AMDGPULibFunc::EI_RECIP:
15442cab237bSDimitry Andric Res0 = 1.0 / opr0;
15452cab237bSDimitry Andric return true;
15462cab237bSDimitry Andric
15472cab237bSDimitry Andric // two-arg functions
15482cab237bSDimitry Andric case AMDGPULibFunc::EI_DIVIDE:
15492cab237bSDimitry Andric Res0 = opr0 / opr1;
15502cab237bSDimitry Andric return true;
15512cab237bSDimitry Andric
15522cab237bSDimitry Andric case AMDGPULibFunc::EI_POW:
15532cab237bSDimitry Andric case AMDGPULibFunc::EI_POWR:
15542cab237bSDimitry Andric Res0 = pow(opr0, opr1);
15552cab237bSDimitry Andric return true;
15562cab237bSDimitry Andric
15572cab237bSDimitry Andric case AMDGPULibFunc::EI_POWN: {
15582cab237bSDimitry Andric if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
15592cab237bSDimitry Andric double val = (double)iopr1->getSExtValue();
15602cab237bSDimitry Andric Res0 = pow(opr0, val);
15612cab237bSDimitry Andric return true;
15622cab237bSDimitry Andric }
15632cab237bSDimitry Andric return false;
15642cab237bSDimitry Andric }
15652cab237bSDimitry Andric
15662cab237bSDimitry Andric case AMDGPULibFunc::EI_ROOTN: {
15672cab237bSDimitry Andric if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
15682cab237bSDimitry Andric double val = (double)iopr1->getSExtValue();
15692cab237bSDimitry Andric Res0 = pow(opr0, 1.0 / val);
15702cab237bSDimitry Andric return true;
15712cab237bSDimitry Andric }
15722cab237bSDimitry Andric return false;
15732cab237bSDimitry Andric }
15742cab237bSDimitry Andric
15752cab237bSDimitry Andric // with ptr arg
15762cab237bSDimitry Andric case AMDGPULibFunc::EI_SINCOS:
15772cab237bSDimitry Andric Res0 = sin(opr0);
15782cab237bSDimitry Andric Res1 = cos(opr0);
15792cab237bSDimitry Andric return true;
15802cab237bSDimitry Andric
15812cab237bSDimitry Andric // three-arg functions
15822cab237bSDimitry Andric case AMDGPULibFunc::EI_FMA:
15832cab237bSDimitry Andric case AMDGPULibFunc::EI_MAD:
15842cab237bSDimitry Andric Res0 = opr0 * opr1 + opr2;
15852cab237bSDimitry Andric return true;
15862cab237bSDimitry Andric }
15872cab237bSDimitry Andric
15882cab237bSDimitry Andric return false;
15892cab237bSDimitry Andric }
15902cab237bSDimitry Andric
evaluateCall(CallInst * aCI,FuncInfo & FInfo)15912cab237bSDimitry Andric bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
15922cab237bSDimitry Andric int numArgs = (int)aCI->getNumArgOperands();
15932cab237bSDimitry Andric if (numArgs > 3)
15942cab237bSDimitry Andric return false;
15952cab237bSDimitry Andric
15962cab237bSDimitry Andric Constant *copr0 = nullptr;
15972cab237bSDimitry Andric Constant *copr1 = nullptr;
15982cab237bSDimitry Andric Constant *copr2 = nullptr;
15992cab237bSDimitry Andric if (numArgs > 0) {
16002cab237bSDimitry Andric if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
16012cab237bSDimitry Andric return false;
16022cab237bSDimitry Andric }
16032cab237bSDimitry Andric
16042cab237bSDimitry Andric if (numArgs > 1) {
16052cab237bSDimitry Andric if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
16062cab237bSDimitry Andric if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
16072cab237bSDimitry Andric return false;
16082cab237bSDimitry Andric }
16092cab237bSDimitry Andric }
16102cab237bSDimitry Andric
16112cab237bSDimitry Andric if (numArgs > 2) {
16122cab237bSDimitry Andric if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
16132cab237bSDimitry Andric return false;
16142cab237bSDimitry Andric }
16152cab237bSDimitry Andric
16162cab237bSDimitry Andric // At this point, all arguments to aCI are constants.
16172cab237bSDimitry Andric
16182cab237bSDimitry Andric // max vector size is 16, and sincos will generate two results.
16192cab237bSDimitry Andric double DVal0[16], DVal1[16];
16202cab237bSDimitry Andric bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
16212cab237bSDimitry Andric if (getVecSize(FInfo) == 1) {
16222cab237bSDimitry Andric if (!evaluateScalarMathFunc(FInfo, DVal0[0],
16232cab237bSDimitry Andric DVal1[0], copr0, copr1, copr2)) {
16242cab237bSDimitry Andric return false;
16252cab237bSDimitry Andric }
16262cab237bSDimitry Andric } else {
16272cab237bSDimitry Andric ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
16282cab237bSDimitry Andric ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
16292cab237bSDimitry Andric ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
16302cab237bSDimitry Andric for (int i=0; i < getVecSize(FInfo); ++i) {
16312cab237bSDimitry Andric Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
16322cab237bSDimitry Andric Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
16332cab237bSDimitry Andric Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
16342cab237bSDimitry Andric if (!evaluateScalarMathFunc(FInfo, DVal0[i],
16352cab237bSDimitry Andric DVal1[i], celt0, celt1, celt2)) {
16362cab237bSDimitry Andric return false;
16372cab237bSDimitry Andric }
16382cab237bSDimitry Andric }
16392cab237bSDimitry Andric }
16402cab237bSDimitry Andric
16412cab237bSDimitry Andric LLVMContext &context = CI->getParent()->getParent()->getContext();
16422cab237bSDimitry Andric Constant *nval0, *nval1;
16432cab237bSDimitry Andric if (getVecSize(FInfo) == 1) {
16442cab237bSDimitry Andric nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
16452cab237bSDimitry Andric if (hasTwoResults)
16462cab237bSDimitry Andric nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
16472cab237bSDimitry Andric } else {
16482cab237bSDimitry Andric if (getArgType(FInfo) == AMDGPULibFunc::F32) {
16492cab237bSDimitry Andric SmallVector <float, 0> FVal0, FVal1;
16502cab237bSDimitry Andric for (int i=0; i < getVecSize(FInfo); ++i)
16512cab237bSDimitry Andric FVal0.push_back((float)DVal0[i]);
16522cab237bSDimitry Andric ArrayRef<float> tmp0(FVal0);
16532cab237bSDimitry Andric nval0 = ConstantDataVector::get(context, tmp0);
16542cab237bSDimitry Andric if (hasTwoResults) {
16552cab237bSDimitry Andric for (int i=0; i < getVecSize(FInfo); ++i)
16562cab237bSDimitry Andric FVal1.push_back((float)DVal1[i]);
16572cab237bSDimitry Andric ArrayRef<float> tmp1(FVal1);
16582cab237bSDimitry Andric nval1 = ConstantDataVector::get(context, tmp1);
16592cab237bSDimitry Andric }
16602cab237bSDimitry Andric } else {
16612cab237bSDimitry Andric ArrayRef<double> tmp0(DVal0);
16622cab237bSDimitry Andric nval0 = ConstantDataVector::get(context, tmp0);
16632cab237bSDimitry Andric if (hasTwoResults) {
16642cab237bSDimitry Andric ArrayRef<double> tmp1(DVal1);
16652cab237bSDimitry Andric nval1 = ConstantDataVector::get(context, tmp1);
16662cab237bSDimitry Andric }
16672cab237bSDimitry Andric }
16682cab237bSDimitry Andric }
16692cab237bSDimitry Andric
16702cab237bSDimitry Andric if (hasTwoResults) {
16712cab237bSDimitry Andric // sincos
16722cab237bSDimitry Andric assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
16732cab237bSDimitry Andric "math function with ptr arg not supported yet");
16742cab237bSDimitry Andric new StoreInst(nval1, aCI->getArgOperand(1), aCI);
16752cab237bSDimitry Andric }
16762cab237bSDimitry Andric
16772cab237bSDimitry Andric replaceCall(nval0);
16782cab237bSDimitry Andric return true;
16792cab237bSDimitry Andric }
16802cab237bSDimitry Andric
16812cab237bSDimitry Andric // Public interface to the Simplify LibCalls pass.
createAMDGPUSimplifyLibCallsPass(const TargetOptions & Opt)16822cab237bSDimitry Andric FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
16832cab237bSDimitry Andric return new AMDGPUSimplifyLibCalls(Opt);
16842cab237bSDimitry Andric }
16852cab237bSDimitry Andric
createAMDGPUUseNativeCallsPass()16862cab237bSDimitry Andric FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
16872cab237bSDimitry Andric return new AMDGPUUseNativeCalls();
16882cab237bSDimitry Andric }
16892cab237bSDimitry Andric
setFastFlags(Function & F,const TargetOptions & Options)16902cab237bSDimitry Andric static bool setFastFlags(Function &F, const TargetOptions &Options) {
16912cab237bSDimitry Andric AttrBuilder B;
16922cab237bSDimitry Andric
16932cab237bSDimitry Andric if (Options.UnsafeFPMath || Options.NoInfsFPMath)
16942cab237bSDimitry Andric B.addAttribute("no-infs-fp-math", "true");
16952cab237bSDimitry Andric if (Options.UnsafeFPMath || Options.NoNaNsFPMath)
16962cab237bSDimitry Andric B.addAttribute("no-nans-fp-math", "true");
16972cab237bSDimitry Andric if (Options.UnsafeFPMath) {
16982cab237bSDimitry Andric B.addAttribute("less-precise-fpmad", "true");
16992cab237bSDimitry Andric B.addAttribute("unsafe-fp-math", "true");
17002cab237bSDimitry Andric }
17012cab237bSDimitry Andric
17022cab237bSDimitry Andric if (!B.hasAttributes())
17032cab237bSDimitry Andric return false;
17042cab237bSDimitry Andric
17052cab237bSDimitry Andric F.addAttributes(AttributeList::FunctionIndex, B);
17062cab237bSDimitry Andric
17072cab237bSDimitry Andric return true;
17082cab237bSDimitry Andric }
17092cab237bSDimitry Andric
runOnFunction(Function & F)17102cab237bSDimitry Andric bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
17112cab237bSDimitry Andric if (skipFunction(F))
17122cab237bSDimitry Andric return false;
17132cab237bSDimitry Andric
17142cab237bSDimitry Andric bool Changed = false;
17152cab237bSDimitry Andric auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
17162cab237bSDimitry Andric
17174ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "AMDIC: process function ";
17184ba319b5SDimitry Andric F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
17192cab237bSDimitry Andric
17202cab237bSDimitry Andric if (!EnablePreLink)
17212cab237bSDimitry Andric Changed |= setFastFlags(F, Options);
17222cab237bSDimitry Andric
17232cab237bSDimitry Andric for (auto &BB : F) {
17242cab237bSDimitry Andric for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
17252cab237bSDimitry Andric // Ignore non-calls.
17262cab237bSDimitry Andric CallInst *CI = dyn_cast<CallInst>(I);
17272cab237bSDimitry Andric ++I;
17282cab237bSDimitry Andric if (!CI) continue;
17292cab237bSDimitry Andric
17302cab237bSDimitry Andric // Ignore indirect calls.
17312cab237bSDimitry Andric Function *Callee = CI->getCalledFunction();
17322cab237bSDimitry Andric if (Callee == 0) continue;
17332cab237bSDimitry Andric
17344ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
17352cab237bSDimitry Andric dbgs().flush());
17362cab237bSDimitry Andric if(Simplifier.fold(CI, AA))
17372cab237bSDimitry Andric Changed = true;
17382cab237bSDimitry Andric }
17392cab237bSDimitry Andric }
17402cab237bSDimitry Andric return Changed;
17412cab237bSDimitry Andric }
17422cab237bSDimitry Andric
runOnFunction(Function & F)17432cab237bSDimitry Andric bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
17442cab237bSDimitry Andric if (skipFunction(F) || UseNative.empty())
17452cab237bSDimitry Andric return false;
17462cab237bSDimitry Andric
17472cab237bSDimitry Andric bool Changed = false;
17482cab237bSDimitry Andric for (auto &BB : F) {
17492cab237bSDimitry Andric for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
17502cab237bSDimitry Andric // Ignore non-calls.
17512cab237bSDimitry Andric CallInst *CI = dyn_cast<CallInst>(I);
17522cab237bSDimitry Andric ++I;
17532cab237bSDimitry Andric if (!CI) continue;
17542cab237bSDimitry Andric
17552cab237bSDimitry Andric // Ignore indirect calls.
17562cab237bSDimitry Andric Function *Callee = CI->getCalledFunction();
17572cab237bSDimitry Andric if (Callee == 0) continue;
17582cab237bSDimitry Andric
17592cab237bSDimitry Andric if(Simplifier.useNative(CI))
17602cab237bSDimitry Andric Changed = true;
17612cab237bSDimitry Andric }
17622cab237bSDimitry Andric }
17632cab237bSDimitry Andric return Changed;
17642cab237bSDimitry Andric }
1765