13ca95b02SDimitry Andric //===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
23ca95b02SDimitry Andric //
33ca95b02SDimitry Andric //                     The LLVM Compiler Infrastructure
43ca95b02SDimitry Andric //
53ca95b02SDimitry Andric // This file is distributed under the University of Illinois Open Source
63ca95b02SDimitry Andric // License. See LICENSE.TXT for details.
73ca95b02SDimitry Andric //
83ca95b02SDimitry Andric //===----------------------------------------------------------------------===//
93ca95b02SDimitry Andric //
103ca95b02SDimitry Andric /// \file
113ca95b02SDimitry Andric /// This pass does misc. AMDGPU optimizations on IR before instruction
123ca95b02SDimitry Andric /// selection.
133ca95b02SDimitry Andric //
143ca95b02SDimitry Andric //===----------------------------------------------------------------------===//
153ca95b02SDimitry Andric 
163ca95b02SDimitry Andric #include "AMDGPU.h"
173ca95b02SDimitry Andric #include "AMDGPUSubtarget.h"
186c4bc1bdSDimitry Andric #include "AMDGPUTargetMachine.h"
197a7e6055SDimitry Andric #include "llvm/ADT/StringRef.h"
204ba319b5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
21*b5893f02SDimitry Andric #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
222cab237bSDimitry Andric #include "llvm/Analysis/Loads.h"
234ba319b5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
243ca95b02SDimitry Andric #include "llvm/CodeGen/Passes.h"
25d8866befSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
267a7e6055SDimitry Andric #include "llvm/IR/Attributes.h"
277a7e6055SDimitry Andric #include "llvm/IR/BasicBlock.h"
287a7e6055SDimitry Andric #include "llvm/IR/Constants.h"
297a7e6055SDimitry Andric #include "llvm/IR/DerivedTypes.h"
307a7e6055SDimitry Andric #include "llvm/IR/Function.h"
31db17bf38SDimitry Andric #include "llvm/IR/IRBuilder.h"
32db17bf38SDimitry Andric #include "llvm/IR/InstVisitor.h"
337a7e6055SDimitry Andric #include "llvm/IR/InstrTypes.h"
347a7e6055SDimitry Andric #include "llvm/IR/Instruction.h"
357a7e6055SDimitry Andric #include "llvm/IR/Instructions.h"
367a7e6055SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
377a7e6055SDimitry Andric #include "llvm/IR/Intrinsics.h"
387a7e6055SDimitry Andric #include "llvm/IR/LLVMContext.h"
397a7e6055SDimitry Andric #include "llvm/IR/Operator.h"
407a7e6055SDimitry Andric #include "llvm/IR/Type.h"
417a7e6055SDimitry Andric #include "llvm/IR/Value.h"
427a7e6055SDimitry Andric #include "llvm/Pass.h"
437a7e6055SDimitry Andric #include "llvm/Support/Casting.h"
447a7e6055SDimitry Andric #include <cassert>
457a7e6055SDimitry Andric #include <iterator>
463ca95b02SDimitry Andric 
473ca95b02SDimitry Andric #define DEBUG_TYPE "amdgpu-codegenprepare"
483ca95b02SDimitry Andric 
493ca95b02SDimitry Andric using namespace llvm;
503ca95b02SDimitry Andric 
513ca95b02SDimitry Andric namespace {
523ca95b02SDimitry Andric 
534ba319b5SDimitry Andric static cl::opt<bool> WidenLoads(
544ba319b5SDimitry Andric   "amdgpu-codegenprepare-widen-constant-loads",
554ba319b5SDimitry Andric   cl::desc("Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"),
564ba319b5SDimitry Andric   cl::ReallyHidden,
574ba319b5SDimitry Andric   cl::init(true));
584ba319b5SDimitry Andric 
593ca95b02SDimitry Andric class AMDGPUCodeGenPrepare : public FunctionPass,
606c4bc1bdSDimitry Andric                              public InstVisitor<AMDGPUCodeGenPrepare, bool> {
614ba319b5SDimitry Andric   const GCNSubtarget *ST = nullptr;
624ba319b5SDimitry Andric   AssumptionCache *AC = nullptr;
63*b5893f02SDimitry Andric   LegacyDivergenceAnalysis *DA = nullptr;
647a7e6055SDimitry Andric   Module *Mod = nullptr;
657a7e6055SDimitry Andric   bool HasUnsafeFPMath = false;
663ca95b02SDimitry Andric 
674ba319b5SDimitry Andric   /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
68d88c1a5aSDimitry Andric   /// binary operation \p V.
69d88c1a5aSDimitry Andric   ///
70d88c1a5aSDimitry Andric   /// \returns Binary operation \p V.
71d88c1a5aSDimitry Andric   /// \returns \p T's base element bit width.
72d88c1a5aSDimitry Andric   unsigned getBaseElementBitWidth(const Type *T) const;
73d88c1a5aSDimitry Andric 
74d88c1a5aSDimitry Andric   /// \returns Equivalent 32 bit integer type for given type \p T. For example,
75d88c1a5aSDimitry Andric   /// if \p T is i7, then i32 is returned; if \p T is <3 x i12>, then <3 x i32>
76d88c1a5aSDimitry Andric   /// is returned.
77d88c1a5aSDimitry Andric   Type *getI32Ty(IRBuilder<> &B, const Type *T) const;
78d88c1a5aSDimitry Andric 
79d88c1a5aSDimitry Andric   /// \returns True if binary operation \p I is a signed binary operation, false
80d88c1a5aSDimitry Andric   /// otherwise.
81d88c1a5aSDimitry Andric   bool isSigned(const BinaryOperator &I) const;
82d88c1a5aSDimitry Andric 
83d88c1a5aSDimitry Andric   /// \returns True if the condition of 'select' operation \p I comes from a
84d88c1a5aSDimitry Andric   /// signed 'icmp' operation, false otherwise.
85d88c1a5aSDimitry Andric   bool isSigned(const SelectInst &I) const;
86d88c1a5aSDimitry Andric 
87d88c1a5aSDimitry Andric   /// \returns True if type \p T needs to be promoted to 32 bit integer type,
88d88c1a5aSDimitry Andric   /// false otherwise.
89d88c1a5aSDimitry Andric   bool needsPromotionToI32(const Type *T) const;
90d88c1a5aSDimitry Andric 
914ba319b5SDimitry Andric   /// Promotes uniform binary operation \p I to equivalent 32 bit binary
92d88c1a5aSDimitry Andric   /// operation.
93d88c1a5aSDimitry Andric   ///
94d88c1a5aSDimitry Andric   /// \details \p I's base element bit width must be greater than 1 and less
95d88c1a5aSDimitry Andric   /// than or equal 16. Promotion is done by sign or zero extending operands to
96d88c1a5aSDimitry Andric   /// 32 bits, replacing \p I with equivalent 32 bit binary operation, and
97d88c1a5aSDimitry Andric   /// truncating the result of 32 bit binary operation back to \p I's original
98d88c1a5aSDimitry Andric   /// type. Division operation is not promoted.
99d88c1a5aSDimitry Andric   ///
100d88c1a5aSDimitry Andric   /// \returns True if \p I is promoted to equivalent 32 bit binary operation,
101d88c1a5aSDimitry Andric   /// false otherwise.
102d88c1a5aSDimitry Andric   bool promoteUniformOpToI32(BinaryOperator &I) const;
103d88c1a5aSDimitry Andric 
1044ba319b5SDimitry Andric   /// Promotes uniform 'icmp' operation \p I to 32 bit 'icmp' operation.
105d88c1a5aSDimitry Andric   ///
106d88c1a5aSDimitry Andric   /// \details \p I's base element bit width must be greater than 1 and less
107d88c1a5aSDimitry Andric   /// than or equal 16. Promotion is done by sign or zero extending operands to
108d88c1a5aSDimitry Andric   /// 32 bits, and replacing \p I with 32 bit 'icmp' operation.
109d88c1a5aSDimitry Andric   ///
110d88c1a5aSDimitry Andric   /// \returns True.
111d88c1a5aSDimitry Andric   bool promoteUniformOpToI32(ICmpInst &I) const;
112d88c1a5aSDimitry Andric 
1134ba319b5SDimitry Andric   /// Promotes uniform 'select' operation \p I to 32 bit 'select'
114d88c1a5aSDimitry Andric   /// operation.
115d88c1a5aSDimitry Andric   ///
116d88c1a5aSDimitry Andric   /// \details \p I's base element bit width must be greater than 1 and less
117d88c1a5aSDimitry Andric   /// than or equal 16. Promotion is done by sign or zero extending operands to
118d88c1a5aSDimitry Andric   /// 32 bits, replacing \p I with 32 bit 'select' operation, and truncating the
119d88c1a5aSDimitry Andric   /// result of 32 bit 'select' operation back to \p I's original type.
120d88c1a5aSDimitry Andric   ///
121d88c1a5aSDimitry Andric   /// \returns True.
122d88c1a5aSDimitry Andric   bool promoteUniformOpToI32(SelectInst &I) const;
123d88c1a5aSDimitry Andric 
1244ba319b5SDimitry Andric   /// Promotes uniform 'bitreverse' intrinsic \p I to 32 bit 'bitreverse'
125d88c1a5aSDimitry Andric   /// intrinsic.
126d88c1a5aSDimitry Andric   ///
127d88c1a5aSDimitry Andric   /// \details \p I's base element bit width must be greater than 1 and less
128d88c1a5aSDimitry Andric   /// than or equal 16. Promotion is done by zero extending the operand to 32
129d88c1a5aSDimitry Andric   /// bits, replacing \p I with 32 bit 'bitreverse' intrinsic, shifting the
130d88c1a5aSDimitry Andric   /// result of 32 bit 'bitreverse' intrinsic to the right with zero fill (the
131d88c1a5aSDimitry Andric   /// shift amount is 32 minus \p I's base element bit width), and truncating
132d88c1a5aSDimitry Andric   /// the result of the shift operation back to \p I's original type.
133d88c1a5aSDimitry Andric   ///
134d88c1a5aSDimitry Andric   /// \returns True.
135d88c1a5aSDimitry Andric   bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
1364ba319b5SDimitry Andric 
1374ba319b5SDimitry Andric   /// Expands 24 bit div or rem.
1384ba319b5SDimitry Andric   Value* expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I,
1394ba319b5SDimitry Andric                         Value *Num, Value *Den,
1404ba319b5SDimitry Andric                         bool IsDiv, bool IsSigned) const;
1414ba319b5SDimitry Andric 
1424ba319b5SDimitry Andric   /// Expands 32 bit div or rem.
1434ba319b5SDimitry Andric   Value* expandDivRem32(IRBuilder<> &Builder, BinaryOperator &I,
1444ba319b5SDimitry Andric                         Value *Num, Value *Den) const;
1454ba319b5SDimitry Andric 
1464ba319b5SDimitry Andric   /// Widen a scalar load.
1472cab237bSDimitry Andric   ///
1482cab237bSDimitry Andric   /// \details \p Widen scalar load for uniform, small type loads from constant
1492cab237bSDimitry Andric   //  memory / to a full 32-bits and then truncate the input to allow a scalar
1502cab237bSDimitry Andric   //  load instead of a vector load.
1512cab237bSDimitry Andric   //
1522cab237bSDimitry Andric   /// \returns True.
1532cab237bSDimitry Andric 
1542cab237bSDimitry Andric   bool canWidenScalarExtLoad(LoadInst &I) const;
155d88c1a5aSDimitry Andric 
1563ca95b02SDimitry Andric public:
1573ca95b02SDimitry Andric   static char ID;
1587a7e6055SDimitry Andric 
AMDGPUCodeGenPrepare()159d8866befSDimitry Andric   AMDGPUCodeGenPrepare() : FunctionPass(ID) {}
1606c4bc1bdSDimitry Andric 
1616c4bc1bdSDimitry Andric   bool visitFDiv(BinaryOperator &I);
1626c4bc1bdSDimitry Andric 
visitInstruction(Instruction & I)163d88c1a5aSDimitry Andric   bool visitInstruction(Instruction &I) { return false; }
164d88c1a5aSDimitry Andric   bool visitBinaryOperator(BinaryOperator &I);
1652cab237bSDimitry Andric   bool visitLoadInst(LoadInst &I);
166d88c1a5aSDimitry Andric   bool visitICmpInst(ICmpInst &I);
167d88c1a5aSDimitry Andric   bool visitSelectInst(SelectInst &I);
168d88c1a5aSDimitry Andric 
169d88c1a5aSDimitry Andric   bool visitIntrinsicInst(IntrinsicInst &I);
170d88c1a5aSDimitry Andric   bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
1713ca95b02SDimitry Andric 
1723ca95b02SDimitry Andric   bool doInitialization(Module &M) override;
1733ca95b02SDimitry Andric   bool runOnFunction(Function &F) override;
1743ca95b02SDimitry Andric 
getPassName() const175d88c1a5aSDimitry Andric   StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
1763ca95b02SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const1773ca95b02SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
1784ba319b5SDimitry Andric     AU.addRequired<AssumptionCacheTracker>();
179*b5893f02SDimitry Andric     AU.addRequired<LegacyDivergenceAnalysis>();
1803ca95b02SDimitry Andric     AU.setPreservesAll();
1813ca95b02SDimitry Andric  }
1823ca95b02SDimitry Andric };
1833ca95b02SDimitry Andric 
1847a7e6055SDimitry Andric } // end anonymous namespace
185d88c1a5aSDimitry Andric 
getBaseElementBitWidth(const Type * T) const186d88c1a5aSDimitry Andric unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const {
187d88c1a5aSDimitry Andric   assert(needsPromotionToI32(T) && "T does not need promotion to i32");
188d88c1a5aSDimitry Andric 
189d88c1a5aSDimitry Andric   if (T->isIntegerTy())
190d88c1a5aSDimitry Andric     return T->getIntegerBitWidth();
191d88c1a5aSDimitry Andric   return cast<VectorType>(T)->getElementType()->getIntegerBitWidth();
192d88c1a5aSDimitry Andric }
193d88c1a5aSDimitry Andric 
getI32Ty(IRBuilder<> & B,const Type * T) const194d88c1a5aSDimitry Andric Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const {
195d88c1a5aSDimitry Andric   assert(needsPromotionToI32(T) && "T does not need promotion to i32");
196d88c1a5aSDimitry Andric 
197d88c1a5aSDimitry Andric   if (T->isIntegerTy())
198d88c1a5aSDimitry Andric     return B.getInt32Ty();
199d88c1a5aSDimitry Andric   return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements());
200d88c1a5aSDimitry Andric }
201d88c1a5aSDimitry Andric 
isSigned(const BinaryOperator & I) const202d88c1a5aSDimitry Andric bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
203d88c1a5aSDimitry Andric   return I.getOpcode() == Instruction::AShr ||
204d88c1a5aSDimitry Andric       I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::SRem;
205d88c1a5aSDimitry Andric }
206d88c1a5aSDimitry Andric 
isSigned(const SelectInst & I) const207d88c1a5aSDimitry Andric bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
208d88c1a5aSDimitry Andric   return isa<ICmpInst>(I.getOperand(0)) ?
209d88c1a5aSDimitry Andric       cast<ICmpInst>(I.getOperand(0))->isSigned() : false;
210d88c1a5aSDimitry Andric }
211d88c1a5aSDimitry Andric 
needsPromotionToI32(const Type * T) const212d88c1a5aSDimitry Andric bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
2137a7e6055SDimitry Andric   const IntegerType *IntTy = dyn_cast<IntegerType>(T);
2147a7e6055SDimitry Andric   if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16)
215d88c1a5aSDimitry Andric     return true;
2167a7e6055SDimitry Andric 
2177a7e6055SDimitry Andric   if (const VectorType *VT = dyn_cast<VectorType>(T)) {
2187a7e6055SDimitry Andric     // TODO: The set of packed operations is more limited, so may want to
2197a7e6055SDimitry Andric     // promote some anyway.
2207a7e6055SDimitry Andric     if (ST->hasVOP3PInsts())
221d88c1a5aSDimitry Andric       return false;
2227a7e6055SDimitry Andric 
2237a7e6055SDimitry Andric     return needsPromotionToI32(VT->getElementType());
2247a7e6055SDimitry Andric   }
2257a7e6055SDimitry Andric 
2267a7e6055SDimitry Andric   return false;
2277a7e6055SDimitry Andric }
2287a7e6055SDimitry Andric 
2297a7e6055SDimitry Andric // Return true if the op promoted to i32 should have nsw set.
promotedOpIsNSW(const Instruction & I)2307a7e6055SDimitry Andric static bool promotedOpIsNSW(const Instruction &I) {
2317a7e6055SDimitry Andric   switch (I.getOpcode()) {
2327a7e6055SDimitry Andric   case Instruction::Shl:
2337a7e6055SDimitry Andric   case Instruction::Add:
2347a7e6055SDimitry Andric   case Instruction::Sub:
2357a7e6055SDimitry Andric     return true;
2367a7e6055SDimitry Andric   case Instruction::Mul:
2377a7e6055SDimitry Andric     return I.hasNoUnsignedWrap();
2387a7e6055SDimitry Andric   default:
2397a7e6055SDimitry Andric     return false;
2407a7e6055SDimitry Andric   }
2417a7e6055SDimitry Andric }
2427a7e6055SDimitry Andric 
2437a7e6055SDimitry Andric // Return true if the op promoted to i32 should have nuw set.
promotedOpIsNUW(const Instruction & I)2447a7e6055SDimitry Andric static bool promotedOpIsNUW(const Instruction &I) {
2457a7e6055SDimitry Andric   switch (I.getOpcode()) {
2467a7e6055SDimitry Andric   case Instruction::Shl:
2477a7e6055SDimitry Andric   case Instruction::Add:
2487a7e6055SDimitry Andric   case Instruction::Mul:
2497a7e6055SDimitry Andric     return true;
2507a7e6055SDimitry Andric   case Instruction::Sub:
2517a7e6055SDimitry Andric     return I.hasNoUnsignedWrap();
2527a7e6055SDimitry Andric   default:
2537a7e6055SDimitry Andric     return false;
2547a7e6055SDimitry Andric   }
255d88c1a5aSDimitry Andric }
256d88c1a5aSDimitry Andric 
canWidenScalarExtLoad(LoadInst & I) const2572cab237bSDimitry Andric bool AMDGPUCodeGenPrepare::canWidenScalarExtLoad(LoadInst &I) const {
2582cab237bSDimitry Andric   Type *Ty = I.getType();
2592cab237bSDimitry Andric   const DataLayout &DL = Mod->getDataLayout();
2602cab237bSDimitry Andric   int TySize = DL.getTypeSizeInBits(Ty);
2612cab237bSDimitry Andric   unsigned Align = I.getAlignment() ?
2622cab237bSDimitry Andric                    I.getAlignment() : DL.getABITypeAlignment(Ty);
2632cab237bSDimitry Andric 
2642cab237bSDimitry Andric   return I.isSimple() && TySize < 32 && Align >= 4 && DA->isUniform(&I);
2652cab237bSDimitry Andric }
2662cab237bSDimitry Andric 
promoteUniformOpToI32(BinaryOperator & I) const267d88c1a5aSDimitry Andric bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
268d88c1a5aSDimitry Andric   assert(needsPromotionToI32(I.getType()) &&
269d88c1a5aSDimitry Andric          "I does not need promotion to i32");
270d88c1a5aSDimitry Andric 
271d88c1a5aSDimitry Andric   if (I.getOpcode() == Instruction::SDiv ||
2724ba319b5SDimitry Andric       I.getOpcode() == Instruction::UDiv ||
2734ba319b5SDimitry Andric       I.getOpcode() == Instruction::SRem ||
2744ba319b5SDimitry Andric       I.getOpcode() == Instruction::URem)
275d88c1a5aSDimitry Andric     return false;
276d88c1a5aSDimitry Andric 
277d88c1a5aSDimitry Andric   IRBuilder<> Builder(&I);
278d88c1a5aSDimitry Andric   Builder.SetCurrentDebugLocation(I.getDebugLoc());
279d88c1a5aSDimitry Andric 
280d88c1a5aSDimitry Andric   Type *I32Ty = getI32Ty(Builder, I.getType());
281d88c1a5aSDimitry Andric   Value *ExtOp0 = nullptr;
282d88c1a5aSDimitry Andric   Value *ExtOp1 = nullptr;
283d88c1a5aSDimitry Andric   Value *ExtRes = nullptr;
284d88c1a5aSDimitry Andric   Value *TruncRes = nullptr;
285d88c1a5aSDimitry Andric 
286d88c1a5aSDimitry Andric   if (isSigned(I)) {
287d88c1a5aSDimitry Andric     ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
288d88c1a5aSDimitry Andric     ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
289d88c1a5aSDimitry Andric   } else {
290d88c1a5aSDimitry Andric     ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
291d88c1a5aSDimitry Andric     ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
292d88c1a5aSDimitry Andric   }
2937a7e6055SDimitry Andric 
2947a7e6055SDimitry Andric   ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1);
2957a7e6055SDimitry Andric   if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) {
2967a7e6055SDimitry Andric     if (promotedOpIsNSW(cast<Instruction>(I)))
2977a7e6055SDimitry Andric       Inst->setHasNoSignedWrap();
2987a7e6055SDimitry Andric 
2997a7e6055SDimitry Andric     if (promotedOpIsNUW(cast<Instruction>(I)))
3007a7e6055SDimitry Andric       Inst->setHasNoUnsignedWrap();
3017a7e6055SDimitry Andric 
3027a7e6055SDimitry Andric     if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
3037a7e6055SDimitry Andric       Inst->setIsExact(ExactOp->isExact());
3047a7e6055SDimitry Andric   }
3057a7e6055SDimitry Andric 
306d88c1a5aSDimitry Andric   TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
307d88c1a5aSDimitry Andric 
308d88c1a5aSDimitry Andric   I.replaceAllUsesWith(TruncRes);
309d88c1a5aSDimitry Andric   I.eraseFromParent();
310d88c1a5aSDimitry Andric 
311d88c1a5aSDimitry Andric   return true;
312d88c1a5aSDimitry Andric }
313d88c1a5aSDimitry Andric 
promoteUniformOpToI32(ICmpInst & I) const314d88c1a5aSDimitry Andric bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(ICmpInst &I) const {
315d88c1a5aSDimitry Andric   assert(needsPromotionToI32(I.getOperand(0)->getType()) &&
316d88c1a5aSDimitry Andric          "I does not need promotion to i32");
317d88c1a5aSDimitry Andric 
318d88c1a5aSDimitry Andric   IRBuilder<> Builder(&I);
319d88c1a5aSDimitry Andric   Builder.SetCurrentDebugLocation(I.getDebugLoc());
320d88c1a5aSDimitry Andric 
321d88c1a5aSDimitry Andric   Type *I32Ty = getI32Ty(Builder, I.getOperand(0)->getType());
322d88c1a5aSDimitry Andric   Value *ExtOp0 = nullptr;
323d88c1a5aSDimitry Andric   Value *ExtOp1 = nullptr;
324d88c1a5aSDimitry Andric   Value *NewICmp  = nullptr;
325d88c1a5aSDimitry Andric 
326d88c1a5aSDimitry Andric   if (I.isSigned()) {
327d88c1a5aSDimitry Andric     ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
328d88c1a5aSDimitry Andric     ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
329d88c1a5aSDimitry Andric   } else {
330d88c1a5aSDimitry Andric     ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
331d88c1a5aSDimitry Andric     ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
332d88c1a5aSDimitry Andric   }
333d88c1a5aSDimitry Andric   NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);
334d88c1a5aSDimitry Andric 
335d88c1a5aSDimitry Andric   I.replaceAllUsesWith(NewICmp);
336d88c1a5aSDimitry Andric   I.eraseFromParent();
337d88c1a5aSDimitry Andric 
338d88c1a5aSDimitry Andric   return true;
339d88c1a5aSDimitry Andric }
340d88c1a5aSDimitry Andric 
promoteUniformOpToI32(SelectInst & I) const341d88c1a5aSDimitry Andric bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(SelectInst &I) const {
342d88c1a5aSDimitry Andric   assert(needsPromotionToI32(I.getType()) &&
343d88c1a5aSDimitry Andric          "I does not need promotion to i32");
344d88c1a5aSDimitry Andric 
345d88c1a5aSDimitry Andric   IRBuilder<> Builder(&I);
346d88c1a5aSDimitry Andric   Builder.SetCurrentDebugLocation(I.getDebugLoc());
347d88c1a5aSDimitry Andric 
348d88c1a5aSDimitry Andric   Type *I32Ty = getI32Ty(Builder, I.getType());
349d88c1a5aSDimitry Andric   Value *ExtOp1 = nullptr;
350d88c1a5aSDimitry Andric   Value *ExtOp2 = nullptr;
351d88c1a5aSDimitry Andric   Value *ExtRes = nullptr;
352d88c1a5aSDimitry Andric   Value *TruncRes = nullptr;
353d88c1a5aSDimitry Andric 
354d88c1a5aSDimitry Andric   if (isSigned(I)) {
355d88c1a5aSDimitry Andric     ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
356d88c1a5aSDimitry Andric     ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
357d88c1a5aSDimitry Andric   } else {
358d88c1a5aSDimitry Andric     ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
359d88c1a5aSDimitry Andric     ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
360d88c1a5aSDimitry Andric   }
361d88c1a5aSDimitry Andric   ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
362d88c1a5aSDimitry Andric   TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
363d88c1a5aSDimitry Andric 
364d88c1a5aSDimitry Andric   I.replaceAllUsesWith(TruncRes);
365d88c1a5aSDimitry Andric   I.eraseFromParent();
366d88c1a5aSDimitry Andric 
367d88c1a5aSDimitry Andric   return true;
368d88c1a5aSDimitry Andric }
369d88c1a5aSDimitry Andric 
promoteUniformBitreverseToI32(IntrinsicInst & I) const370d88c1a5aSDimitry Andric bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
371d88c1a5aSDimitry Andric     IntrinsicInst &I) const {
372d88c1a5aSDimitry Andric   assert(I.getIntrinsicID() == Intrinsic::bitreverse &&
373d88c1a5aSDimitry Andric          "I must be bitreverse intrinsic");
374d88c1a5aSDimitry Andric   assert(needsPromotionToI32(I.getType()) &&
375d88c1a5aSDimitry Andric          "I does not need promotion to i32");
376d88c1a5aSDimitry Andric 
377d88c1a5aSDimitry Andric   IRBuilder<> Builder(&I);
378d88c1a5aSDimitry Andric   Builder.SetCurrentDebugLocation(I.getDebugLoc());
379d88c1a5aSDimitry Andric 
380d88c1a5aSDimitry Andric   Type *I32Ty = getI32Ty(Builder, I.getType());
381d88c1a5aSDimitry Andric   Function *I32 =
382d88c1a5aSDimitry Andric       Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty });
383d88c1a5aSDimitry Andric   Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty);
384d88c1a5aSDimitry Andric   Value *ExtRes = Builder.CreateCall(I32, { ExtOp });
385d88c1a5aSDimitry Andric   Value *LShrOp =
386d88c1a5aSDimitry Andric       Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType()));
387d88c1a5aSDimitry Andric   Value *TruncRes =
388d88c1a5aSDimitry Andric       Builder.CreateTrunc(LShrOp, I.getType());
389d88c1a5aSDimitry Andric 
390d88c1a5aSDimitry Andric   I.replaceAllUsesWith(TruncRes);
391d88c1a5aSDimitry Andric   I.eraseFromParent();
392d88c1a5aSDimitry Andric 
393d88c1a5aSDimitry Andric   return true;
394d88c1a5aSDimitry Andric }
395d88c1a5aSDimitry Andric 
shouldKeepFDivF32(Value * Num,bool UnsafeDiv,bool HasDenormals)3964ba319b5SDimitry Andric static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) {
3976c4bc1bdSDimitry Andric   const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
3986c4bc1bdSDimitry Andric   if (!CNum)
3994ba319b5SDimitry Andric     return HasDenormals;
4004ba319b5SDimitry Andric 
4014ba319b5SDimitry Andric   if (UnsafeDiv)
4024ba319b5SDimitry Andric     return true;
4034ba319b5SDimitry Andric 
4044ba319b5SDimitry Andric   bool IsOne = CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0);
4056c4bc1bdSDimitry Andric 
4066c4bc1bdSDimitry Andric   // Reciprocal f32 is handled separately without denormals.
4074ba319b5SDimitry Andric   return HasDenormals ^ IsOne;
4086c4bc1bdSDimitry Andric }
4096c4bc1bdSDimitry Andric 
4106c4bc1bdSDimitry Andric // Insert an intrinsic for fast fdiv for safe math situations where we can
4116c4bc1bdSDimitry Andric // reduce precision. Leave fdiv for situations where the generic node is
4126c4bc1bdSDimitry Andric // expected to be optimized.
visitFDiv(BinaryOperator & FDiv)4136c4bc1bdSDimitry Andric bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
4146c4bc1bdSDimitry Andric   Type *Ty = FDiv.getType();
4156c4bc1bdSDimitry Andric 
4166c4bc1bdSDimitry Andric   if (!Ty->getScalarType()->isFloatTy())
4176c4bc1bdSDimitry Andric     return false;
4186c4bc1bdSDimitry Andric 
4196c4bc1bdSDimitry Andric   MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath);
4206c4bc1bdSDimitry Andric   if (!FPMath)
4216c4bc1bdSDimitry Andric     return false;
4226c4bc1bdSDimitry Andric 
4236c4bc1bdSDimitry Andric   const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
4246c4bc1bdSDimitry Andric   float ULP = FPOp->getFPAccuracy();
4256c4bc1bdSDimitry Andric   if (ULP < 2.5f)
4266c4bc1bdSDimitry Andric     return false;
4276c4bc1bdSDimitry Andric 
4286c4bc1bdSDimitry Andric   FastMathFlags FMF = FPOp->getFastMathFlags();
4292cab237bSDimitry Andric   bool UnsafeDiv = HasUnsafeFPMath || FMF.isFast() ||
4306c4bc1bdSDimitry Andric                                       FMF.allowReciprocal();
431c4394386SDimitry Andric 
432c4394386SDimitry Andric   // With UnsafeDiv node will be optimized to just rcp and mul.
4334ba319b5SDimitry Andric   if (UnsafeDiv)
4346c4bc1bdSDimitry Andric     return false;
4356c4bc1bdSDimitry Andric 
4366c4bc1bdSDimitry Andric   IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
4376c4bc1bdSDimitry Andric   Builder.setFastMathFlags(FMF);
4386c4bc1bdSDimitry Andric   Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
4396c4bc1bdSDimitry Andric 
4407a7e6055SDimitry Andric   Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
4416c4bc1bdSDimitry Andric 
4426c4bc1bdSDimitry Andric   Value *Num = FDiv.getOperand(0);
4436c4bc1bdSDimitry Andric   Value *Den = FDiv.getOperand(1);
4446c4bc1bdSDimitry Andric 
4456c4bc1bdSDimitry Andric   Value *NewFDiv = nullptr;
4466c4bc1bdSDimitry Andric 
4474ba319b5SDimitry Andric   bool HasDenormals = ST->hasFP32Denormals();
4486c4bc1bdSDimitry Andric   if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
4496c4bc1bdSDimitry Andric     NewFDiv = UndefValue::get(VT);
4506c4bc1bdSDimitry Andric 
4516c4bc1bdSDimitry Andric     // FIXME: Doesn't do the right thing for cases where the vector is partially
4526c4bc1bdSDimitry Andric     // constant. This works when the scalarizer pass is run first.
4536c4bc1bdSDimitry Andric     for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
4546c4bc1bdSDimitry Andric       Value *NumEltI = Builder.CreateExtractElement(Num, I);
4556c4bc1bdSDimitry Andric       Value *DenEltI = Builder.CreateExtractElement(Den, I);
4566c4bc1bdSDimitry Andric       Value *NewElt;
4576c4bc1bdSDimitry Andric 
4584ba319b5SDimitry Andric       if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasDenormals)) {
4596c4bc1bdSDimitry Andric         NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
4606c4bc1bdSDimitry Andric       } else {
4616c4bc1bdSDimitry Andric         NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
4626c4bc1bdSDimitry Andric       }
4636c4bc1bdSDimitry Andric 
4646c4bc1bdSDimitry Andric       NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
4656c4bc1bdSDimitry Andric     }
4666c4bc1bdSDimitry Andric   } else {
4674ba319b5SDimitry Andric     if (!shouldKeepFDivF32(Num, UnsafeDiv, HasDenormals))
4686c4bc1bdSDimitry Andric       NewFDiv = Builder.CreateCall(Decl, { Num, Den });
4696c4bc1bdSDimitry Andric   }
4706c4bc1bdSDimitry Andric 
4716c4bc1bdSDimitry Andric   if (NewFDiv) {
4726c4bc1bdSDimitry Andric     FDiv.replaceAllUsesWith(NewFDiv);
4736c4bc1bdSDimitry Andric     NewFDiv->takeName(&FDiv);
4746c4bc1bdSDimitry Andric     FDiv.eraseFromParent();
4756c4bc1bdSDimitry Andric   }
4766c4bc1bdSDimitry Andric 
4774ba319b5SDimitry Andric   return !!NewFDiv;
4786c4bc1bdSDimitry Andric }
4796c4bc1bdSDimitry Andric 
hasUnsafeFPMath(const Function & F)4806c4bc1bdSDimitry Andric static bool hasUnsafeFPMath(const Function &F) {
4816c4bc1bdSDimitry Andric   Attribute Attr = F.getFnAttribute("unsafe-fp-math");
4826c4bc1bdSDimitry Andric   return Attr.getValueAsString() == "true";
4836c4bc1bdSDimitry Andric }
4846c4bc1bdSDimitry Andric 
getMul64(IRBuilder<> & Builder,Value * LHS,Value * RHS)4854ba319b5SDimitry Andric static std::pair<Value*, Value*> getMul64(IRBuilder<> &Builder,
4864ba319b5SDimitry Andric                                           Value *LHS, Value *RHS) {
4874ba319b5SDimitry Andric   Type *I32Ty = Builder.getInt32Ty();
4884ba319b5SDimitry Andric   Type *I64Ty = Builder.getInt64Ty();
489d88c1a5aSDimitry Andric 
4904ba319b5SDimitry Andric   Value *LHS_EXT64 = Builder.CreateZExt(LHS, I64Ty);
4914ba319b5SDimitry Andric   Value *RHS_EXT64 = Builder.CreateZExt(RHS, I64Ty);
4924ba319b5SDimitry Andric   Value *MUL64 = Builder.CreateMul(LHS_EXT64, RHS_EXT64);
4934ba319b5SDimitry Andric   Value *Lo = Builder.CreateTrunc(MUL64, I32Ty);
4944ba319b5SDimitry Andric   Value *Hi = Builder.CreateLShr(MUL64, Builder.getInt64(32));
4954ba319b5SDimitry Andric   Hi = Builder.CreateTrunc(Hi, I32Ty);
4964ba319b5SDimitry Andric   return std::make_pair(Lo, Hi);
4974ba319b5SDimitry Andric }
4984ba319b5SDimitry Andric 
getMulHu(IRBuilder<> & Builder,Value * LHS,Value * RHS)4994ba319b5SDimitry Andric static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
5004ba319b5SDimitry Andric   return getMul64(Builder, LHS, RHS).second;
5014ba319b5SDimitry Andric }
5024ba319b5SDimitry Andric 
5034ba319b5SDimitry Andric // The fractional part of a float is enough to accurately represent up to
5044ba319b5SDimitry Andric // a 24-bit signed integer.
expandDivRem24(IRBuilder<> & Builder,BinaryOperator & I,Value * Num,Value * Den,bool IsDiv,bool IsSigned) const5054ba319b5SDimitry Andric Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
5064ba319b5SDimitry Andric                                             BinaryOperator &I,
5074ba319b5SDimitry Andric                                             Value *Num, Value *Den,
5084ba319b5SDimitry Andric                                             bool IsDiv, bool IsSigned) const {
5094ba319b5SDimitry Andric   assert(Num->getType()->isIntegerTy(32));
5104ba319b5SDimitry Andric 
5114ba319b5SDimitry Andric   const DataLayout &DL = Mod->getDataLayout();
5124ba319b5SDimitry Andric   unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I);
5134ba319b5SDimitry Andric   if (LHSSignBits < 9)
5144ba319b5SDimitry Andric     return nullptr;
5154ba319b5SDimitry Andric 
5164ba319b5SDimitry Andric   unsigned RHSSignBits = ComputeNumSignBits(Den, DL, 0, AC, &I);
5174ba319b5SDimitry Andric   if (RHSSignBits < 9)
5184ba319b5SDimitry Andric     return nullptr;
5194ba319b5SDimitry Andric 
5204ba319b5SDimitry Andric 
5214ba319b5SDimitry Andric   unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
5224ba319b5SDimitry Andric   unsigned DivBits = 32 - SignBits;
5234ba319b5SDimitry Andric   if (IsSigned)
5244ba319b5SDimitry Andric     ++DivBits;
5254ba319b5SDimitry Andric 
5264ba319b5SDimitry Andric   Type *Ty = Num->getType();
5274ba319b5SDimitry Andric   Type *I32Ty = Builder.getInt32Ty();
5284ba319b5SDimitry Andric   Type *F32Ty = Builder.getFloatTy();
5294ba319b5SDimitry Andric   ConstantInt *One = Builder.getInt32(1);
5304ba319b5SDimitry Andric   Value *JQ = One;
5314ba319b5SDimitry Andric 
5324ba319b5SDimitry Andric   if (IsSigned) {
5334ba319b5SDimitry Andric     // char|short jq = ia ^ ib;
5344ba319b5SDimitry Andric     JQ = Builder.CreateXor(Num, Den);
5354ba319b5SDimitry Andric 
5364ba319b5SDimitry Andric     // jq = jq >> (bitsize - 2)
5374ba319b5SDimitry Andric     JQ = Builder.CreateAShr(JQ, Builder.getInt32(30));
5384ba319b5SDimitry Andric 
5394ba319b5SDimitry Andric     // jq = jq | 0x1
5404ba319b5SDimitry Andric     JQ = Builder.CreateOr(JQ, One);
5414ba319b5SDimitry Andric   }
5424ba319b5SDimitry Andric 
5434ba319b5SDimitry Andric   // int ia = (int)LHS;
5444ba319b5SDimitry Andric   Value *IA = Num;
5454ba319b5SDimitry Andric 
5464ba319b5SDimitry Andric   // int ib, (int)RHS;
5474ba319b5SDimitry Andric   Value *IB = Den;
5484ba319b5SDimitry Andric 
5494ba319b5SDimitry Andric   // float fa = (float)ia;
5504ba319b5SDimitry Andric   Value *FA = IsSigned ? Builder.CreateSIToFP(IA, F32Ty)
5514ba319b5SDimitry Andric                        : Builder.CreateUIToFP(IA, F32Ty);
5524ba319b5SDimitry Andric 
5534ba319b5SDimitry Andric   // float fb = (float)ib;
5544ba319b5SDimitry Andric   Value *FB = IsSigned ? Builder.CreateSIToFP(IB,F32Ty)
5554ba319b5SDimitry Andric                        : Builder.CreateUIToFP(IB,F32Ty);
5564ba319b5SDimitry Andric 
5574ba319b5SDimitry Andric   Value *RCP = Builder.CreateFDiv(ConstantFP::get(F32Ty, 1.0), FB);
5584ba319b5SDimitry Andric   Value *FQM = Builder.CreateFMul(FA, RCP);
5594ba319b5SDimitry Andric 
5604ba319b5SDimitry Andric   // fq = trunc(fqm);
561*b5893f02SDimitry Andric   CallInst *FQ = Builder.CreateUnaryIntrinsic(Intrinsic::trunc, FQM);
5624ba319b5SDimitry Andric   FQ->copyFastMathFlags(Builder.getFastMathFlags());
5634ba319b5SDimitry Andric 
5644ba319b5SDimitry Andric   // float fqneg = -fq;
5654ba319b5SDimitry Andric   Value *FQNeg = Builder.CreateFNeg(FQ);
5664ba319b5SDimitry Andric 
5674ba319b5SDimitry Andric   // float fr = mad(fqneg, fb, fa);
5684ba319b5SDimitry Andric   Value *FR = Builder.CreateIntrinsic(Intrinsic::amdgcn_fmad_ftz,
569*b5893f02SDimitry Andric                                       {FQNeg->getType()}, {FQNeg, FB, FA}, FQ);
5704ba319b5SDimitry Andric 
5714ba319b5SDimitry Andric   // int iq = (int)fq;
5724ba319b5SDimitry Andric   Value *IQ = IsSigned ? Builder.CreateFPToSI(FQ, I32Ty)
5734ba319b5SDimitry Andric                        : Builder.CreateFPToUI(FQ, I32Ty);
5744ba319b5SDimitry Andric 
5754ba319b5SDimitry Andric   // fr = fabs(fr);
576*b5893f02SDimitry Andric   FR = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FR, FQ);
5774ba319b5SDimitry Andric 
5784ba319b5SDimitry Andric   // fb = fabs(fb);
579*b5893f02SDimitry Andric   FB = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FB, FQ);
5804ba319b5SDimitry Andric 
5814ba319b5SDimitry Andric   // int cv = fr >= fb;
5824ba319b5SDimitry Andric   Value *CV = Builder.CreateFCmpOGE(FR, FB);
5834ba319b5SDimitry Andric 
5844ba319b5SDimitry Andric   // jq = (cv ? jq : 0);
5854ba319b5SDimitry Andric   JQ = Builder.CreateSelect(CV, JQ, Builder.getInt32(0));
5864ba319b5SDimitry Andric 
5874ba319b5SDimitry Andric   // dst = iq + jq;
5884ba319b5SDimitry Andric   Value *Div = Builder.CreateAdd(IQ, JQ);
5894ba319b5SDimitry Andric 
5904ba319b5SDimitry Andric   Value *Res = Div;
5914ba319b5SDimitry Andric   if (!IsDiv) {
5924ba319b5SDimitry Andric     // Rem needs compensation, it's easier to recompute it
5934ba319b5SDimitry Andric     Value *Rem = Builder.CreateMul(Div, Den);
5944ba319b5SDimitry Andric     Res = Builder.CreateSub(Num, Rem);
5954ba319b5SDimitry Andric   }
5964ba319b5SDimitry Andric 
5974ba319b5SDimitry Andric   // Truncate to number of bits this divide really is.
5984ba319b5SDimitry Andric   if (IsSigned) {
5994ba319b5SDimitry Andric     Res = Builder.CreateTrunc(Res, Builder.getIntNTy(DivBits));
6004ba319b5SDimitry Andric     Res = Builder.CreateSExt(Res, Ty);
6014ba319b5SDimitry Andric   } else {
6024ba319b5SDimitry Andric     ConstantInt *TruncMask = Builder.getInt32((UINT64_C(1) << DivBits) - 1);
6034ba319b5SDimitry Andric     Res = Builder.CreateAnd(Res, TruncMask);
6044ba319b5SDimitry Andric   }
6054ba319b5SDimitry Andric 
6064ba319b5SDimitry Andric   return Res;
6074ba319b5SDimitry Andric }
6084ba319b5SDimitry Andric 
expandDivRem32(IRBuilder<> & Builder,BinaryOperator & I,Value * Num,Value * Den) const6094ba319b5SDimitry Andric Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
6104ba319b5SDimitry Andric                                             BinaryOperator &I,
6114ba319b5SDimitry Andric                                             Value *Num, Value *Den) const {
6124ba319b5SDimitry Andric   Instruction::BinaryOps Opc = I.getOpcode();
6134ba319b5SDimitry Andric   assert(Opc == Instruction::URem || Opc == Instruction::UDiv ||
6144ba319b5SDimitry Andric          Opc == Instruction::SRem || Opc == Instruction::SDiv);
6154ba319b5SDimitry Andric 
6164ba319b5SDimitry Andric   FastMathFlags FMF;
6174ba319b5SDimitry Andric   FMF.setFast();
6184ba319b5SDimitry Andric   Builder.setFastMathFlags(FMF);
6194ba319b5SDimitry Andric 
6204ba319b5SDimitry Andric   if (isa<Constant>(Den))
6214ba319b5SDimitry Andric     return nullptr; // Keep it for optimization
6224ba319b5SDimitry Andric 
6234ba319b5SDimitry Andric   bool IsDiv = Opc == Instruction::UDiv || Opc == Instruction::SDiv;
6244ba319b5SDimitry Andric   bool IsSigned = Opc == Instruction::SRem || Opc == Instruction::SDiv;
6254ba319b5SDimitry Andric 
6264ba319b5SDimitry Andric   Type *Ty = Num->getType();
6274ba319b5SDimitry Andric   Type *I32Ty = Builder.getInt32Ty();
6284ba319b5SDimitry Andric   Type *F32Ty = Builder.getFloatTy();
6294ba319b5SDimitry Andric 
6304ba319b5SDimitry Andric   if (Ty->getScalarSizeInBits() < 32) {
6314ba319b5SDimitry Andric     if (IsSigned) {
6324ba319b5SDimitry Andric       Num = Builder.CreateSExt(Num, I32Ty);
6334ba319b5SDimitry Andric       Den = Builder.CreateSExt(Den, I32Ty);
6344ba319b5SDimitry Andric     } else {
6354ba319b5SDimitry Andric       Num = Builder.CreateZExt(Num, I32Ty);
6364ba319b5SDimitry Andric       Den = Builder.CreateZExt(Den, I32Ty);
6374ba319b5SDimitry Andric     }
6384ba319b5SDimitry Andric   }
6394ba319b5SDimitry Andric 
6404ba319b5SDimitry Andric   if (Value *Res = expandDivRem24(Builder, I, Num, Den, IsDiv, IsSigned)) {
6414ba319b5SDimitry Andric     Res = Builder.CreateTrunc(Res, Ty);
6424ba319b5SDimitry Andric     return Res;
6434ba319b5SDimitry Andric   }
6444ba319b5SDimitry Andric 
6454ba319b5SDimitry Andric   ConstantInt *Zero = Builder.getInt32(0);
6464ba319b5SDimitry Andric   ConstantInt *One = Builder.getInt32(1);
6474ba319b5SDimitry Andric   ConstantInt *MinusOne = Builder.getInt32(~0);
6484ba319b5SDimitry Andric 
6494ba319b5SDimitry Andric   Value *Sign = nullptr;
6504ba319b5SDimitry Andric   if (IsSigned) {
6514ba319b5SDimitry Andric     ConstantInt *K31 = Builder.getInt32(31);
6524ba319b5SDimitry Andric     Value *LHSign = Builder.CreateAShr(Num, K31);
6534ba319b5SDimitry Andric     Value *RHSign = Builder.CreateAShr(Den, K31);
6544ba319b5SDimitry Andric     // Remainder sign is the same as LHS
6554ba319b5SDimitry Andric     Sign = IsDiv ? Builder.CreateXor(LHSign, RHSign) : LHSign;
6564ba319b5SDimitry Andric 
6574ba319b5SDimitry Andric     Num = Builder.CreateAdd(Num, LHSign);
6584ba319b5SDimitry Andric     Den = Builder.CreateAdd(Den, RHSign);
6594ba319b5SDimitry Andric 
6604ba319b5SDimitry Andric     Num = Builder.CreateXor(Num, LHSign);
6614ba319b5SDimitry Andric     Den = Builder.CreateXor(Den, RHSign);
6624ba319b5SDimitry Andric   }
6634ba319b5SDimitry Andric 
6644ba319b5SDimitry Andric   // RCP =  URECIP(Den) = 2^32 / Den + e
6654ba319b5SDimitry Andric   // e is rounding error.
6664ba319b5SDimitry Andric   Value *DEN_F32 = Builder.CreateUIToFP(Den, F32Ty);
6674ba319b5SDimitry Andric   Value *RCP_F32 = Builder.CreateFDiv(ConstantFP::get(F32Ty, 1.0), DEN_F32);
6684ba319b5SDimitry Andric   Constant *UINT_MAX_PLUS_1 = ConstantFP::get(F32Ty, BitsToFloat(0x4f800000));
6694ba319b5SDimitry Andric   Value *RCP_SCALE = Builder.CreateFMul(RCP_F32, UINT_MAX_PLUS_1);
6704ba319b5SDimitry Andric   Value *RCP = Builder.CreateFPToUI(RCP_SCALE, I32Ty);
6714ba319b5SDimitry Andric 
6724ba319b5SDimitry Andric   // RCP_LO, RCP_HI = mul(RCP, Den) */
6734ba319b5SDimitry Andric   Value *RCP_LO, *RCP_HI;
6744ba319b5SDimitry Andric   std::tie(RCP_LO, RCP_HI) = getMul64(Builder, RCP, Den);
6754ba319b5SDimitry Andric 
6764ba319b5SDimitry Andric   // NEG_RCP_LO = -RCP_LO
6774ba319b5SDimitry Andric   Value *NEG_RCP_LO = Builder.CreateNeg(RCP_LO);
6784ba319b5SDimitry Andric 
6794ba319b5SDimitry Andric   // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
6804ba319b5SDimitry Andric   Value *RCP_HI_0_CC = Builder.CreateICmpEQ(RCP_HI, Zero);
6814ba319b5SDimitry Andric   Value *ABS_RCP_LO = Builder.CreateSelect(RCP_HI_0_CC, NEG_RCP_LO, RCP_LO);
6824ba319b5SDimitry Andric 
6834ba319b5SDimitry Andric   // Calculate the rounding error from the URECIP instruction
6844ba319b5SDimitry Andric   // E = mulhu(ABS_RCP_LO, RCP)
6854ba319b5SDimitry Andric   Value *E = getMulHu(Builder, ABS_RCP_LO, RCP);
6864ba319b5SDimitry Andric 
6874ba319b5SDimitry Andric   // RCP_A_E = RCP + E
6884ba319b5SDimitry Andric   Value *RCP_A_E = Builder.CreateAdd(RCP, E);
6894ba319b5SDimitry Andric 
6904ba319b5SDimitry Andric   // RCP_S_E = RCP - E
6914ba319b5SDimitry Andric   Value *RCP_S_E = Builder.CreateSub(RCP, E);
6924ba319b5SDimitry Andric 
6934ba319b5SDimitry Andric   // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
6944ba319b5SDimitry Andric   Value *Tmp0 = Builder.CreateSelect(RCP_HI_0_CC, RCP_A_E, RCP_S_E);
6954ba319b5SDimitry Andric 
6964ba319b5SDimitry Andric   // Quotient = mulhu(Tmp0, Num)
6974ba319b5SDimitry Andric   Value *Quotient = getMulHu(Builder, Tmp0, Num);
6984ba319b5SDimitry Andric 
6994ba319b5SDimitry Andric   // Num_S_Remainder = Quotient * Den
7004ba319b5SDimitry Andric   Value *Num_S_Remainder = Builder.CreateMul(Quotient, Den);
7014ba319b5SDimitry Andric 
7024ba319b5SDimitry Andric   // Remainder = Num - Num_S_Remainder
7034ba319b5SDimitry Andric   Value *Remainder = Builder.CreateSub(Num, Num_S_Remainder);
7044ba319b5SDimitry Andric 
7054ba319b5SDimitry Andric   // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
7064ba319b5SDimitry Andric   Value *Rem_GE_Den_CC = Builder.CreateICmpUGE(Remainder, Den);
7074ba319b5SDimitry Andric   Value *Remainder_GE_Den = Builder.CreateSelect(Rem_GE_Den_CC, MinusOne, Zero);
7084ba319b5SDimitry Andric 
7094ba319b5SDimitry Andric   // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
7104ba319b5SDimitry Andric   Value *Num_GE_Num_S_Rem_CC = Builder.CreateICmpUGE(Num, Num_S_Remainder);
7114ba319b5SDimitry Andric   Value *Remainder_GE_Zero = Builder.CreateSelect(Num_GE_Num_S_Rem_CC,
7124ba319b5SDimitry Andric                                                   MinusOne, Zero);
7134ba319b5SDimitry Andric 
7144ba319b5SDimitry Andric   // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
7154ba319b5SDimitry Andric   Value *Tmp1 = Builder.CreateAnd(Remainder_GE_Den, Remainder_GE_Zero);
7164ba319b5SDimitry Andric   Value *Tmp1_0_CC = Builder.CreateICmpEQ(Tmp1, Zero);
7174ba319b5SDimitry Andric 
7184ba319b5SDimitry Andric   Value *Res;
7194ba319b5SDimitry Andric   if (IsDiv) {
7204ba319b5SDimitry Andric     // Quotient_A_One = Quotient + 1
7214ba319b5SDimitry Andric     Value *Quotient_A_One = Builder.CreateAdd(Quotient, One);
7224ba319b5SDimitry Andric 
7234ba319b5SDimitry Andric     // Quotient_S_One = Quotient - 1
7244ba319b5SDimitry Andric     Value *Quotient_S_One = Builder.CreateSub(Quotient, One);
7254ba319b5SDimitry Andric 
7264ba319b5SDimitry Andric     // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
7274ba319b5SDimitry Andric     Value *Div = Builder.CreateSelect(Tmp1_0_CC, Quotient, Quotient_A_One);
7284ba319b5SDimitry Andric 
7294ba319b5SDimitry Andric     // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
7304ba319b5SDimitry Andric     Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Div, Quotient_S_One);
7314ba319b5SDimitry Andric   } else {
7324ba319b5SDimitry Andric     // Remainder_S_Den = Remainder - Den
7334ba319b5SDimitry Andric     Value *Remainder_S_Den = Builder.CreateSub(Remainder, Den);
7344ba319b5SDimitry Andric 
7354ba319b5SDimitry Andric     // Remainder_A_Den = Remainder + Den
7364ba319b5SDimitry Andric     Value *Remainder_A_Den = Builder.CreateAdd(Remainder, Den);
7374ba319b5SDimitry Andric 
7384ba319b5SDimitry Andric     // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
7394ba319b5SDimitry Andric     Value *Rem = Builder.CreateSelect(Tmp1_0_CC, Remainder, Remainder_S_Den);
7404ba319b5SDimitry Andric 
7414ba319b5SDimitry Andric     // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
7424ba319b5SDimitry Andric     Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Rem, Remainder_A_Den);
7434ba319b5SDimitry Andric   }
7444ba319b5SDimitry Andric 
7454ba319b5SDimitry Andric   if (IsSigned) {
7464ba319b5SDimitry Andric     Res = Builder.CreateXor(Res, Sign);
7474ba319b5SDimitry Andric     Res = Builder.CreateSub(Res, Sign);
7484ba319b5SDimitry Andric   }
7494ba319b5SDimitry Andric 
7504ba319b5SDimitry Andric   Res = Builder.CreateTrunc(Res, Ty);
7514ba319b5SDimitry Andric 
7524ba319b5SDimitry Andric   return Res;
7534ba319b5SDimitry Andric }
7544ba319b5SDimitry Andric 
visitBinaryOperator(BinaryOperator & I)7554ba319b5SDimitry Andric bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
756d88c1a5aSDimitry Andric   if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
7574ba319b5SDimitry Andric       DA->isUniform(&I) && promoteUniformOpToI32(I))
7584ba319b5SDimitry Andric     return true;
7594ba319b5SDimitry Andric 
7604ba319b5SDimitry Andric   bool Changed = false;
7614ba319b5SDimitry Andric   Instruction::BinaryOps Opc = I.getOpcode();
7624ba319b5SDimitry Andric   Type *Ty = I.getType();
7634ba319b5SDimitry Andric   Value *NewDiv = nullptr;
7644ba319b5SDimitry Andric   if ((Opc == Instruction::URem || Opc == Instruction::UDiv ||
7654ba319b5SDimitry Andric        Opc == Instruction::SRem || Opc == Instruction::SDiv) &&
7664ba319b5SDimitry Andric       Ty->getScalarSizeInBits() <= 32) {
7674ba319b5SDimitry Andric     Value *Num = I.getOperand(0);
7684ba319b5SDimitry Andric     Value *Den = I.getOperand(1);
7694ba319b5SDimitry Andric     IRBuilder<> Builder(&I);
7704ba319b5SDimitry Andric     Builder.SetCurrentDebugLocation(I.getDebugLoc());
7714ba319b5SDimitry Andric 
7724ba319b5SDimitry Andric     if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
7734ba319b5SDimitry Andric       NewDiv = UndefValue::get(VT);
7744ba319b5SDimitry Andric 
7754ba319b5SDimitry Andric       for (unsigned N = 0, E = VT->getNumElements(); N != E; ++N) {
7764ba319b5SDimitry Andric         Value *NumEltN = Builder.CreateExtractElement(Num, N);
7774ba319b5SDimitry Andric         Value *DenEltN = Builder.CreateExtractElement(Den, N);
7784ba319b5SDimitry Andric         Value *NewElt = expandDivRem32(Builder, I, NumEltN, DenEltN);
7794ba319b5SDimitry Andric         if (!NewElt)
7804ba319b5SDimitry Andric           NewElt = Builder.CreateBinOp(Opc, NumEltN, DenEltN);
7814ba319b5SDimitry Andric         NewDiv = Builder.CreateInsertElement(NewDiv, NewElt, N);
7824ba319b5SDimitry Andric       }
7834ba319b5SDimitry Andric     } else {
7844ba319b5SDimitry Andric       NewDiv = expandDivRem32(Builder, I, Num, Den);
7854ba319b5SDimitry Andric     }
7864ba319b5SDimitry Andric 
7874ba319b5SDimitry Andric     if (NewDiv) {
7884ba319b5SDimitry Andric       I.replaceAllUsesWith(NewDiv);
7894ba319b5SDimitry Andric       I.eraseFromParent();
7904ba319b5SDimitry Andric       Changed = true;
7914ba319b5SDimitry Andric     }
7924ba319b5SDimitry Andric   }
793d88c1a5aSDimitry Andric 
794d88c1a5aSDimitry Andric   return Changed;
795d88c1a5aSDimitry Andric }
796d88c1a5aSDimitry Andric 
visitLoadInst(LoadInst & I)7972cab237bSDimitry Andric bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
7984ba319b5SDimitry Andric   if (!WidenLoads)
7994ba319b5SDimitry Andric     return false;
8004ba319b5SDimitry Andric 
801*b5893f02SDimitry Andric   if ((I.getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
802*b5893f02SDimitry Andric        I.getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
8032cab237bSDimitry Andric       canWidenScalarExtLoad(I)) {
8042cab237bSDimitry Andric     IRBuilder<> Builder(&I);
8052cab237bSDimitry Andric     Builder.SetCurrentDebugLocation(I.getDebugLoc());
8062cab237bSDimitry Andric 
8072cab237bSDimitry Andric     Type *I32Ty = Builder.getInt32Ty();
8082cab237bSDimitry Andric     Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
8092cab237bSDimitry Andric     Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
8104ba319b5SDimitry Andric     LoadInst *WidenLoad = Builder.CreateLoad(BitCast);
8114ba319b5SDimitry Andric     WidenLoad->copyMetadata(I);
8124ba319b5SDimitry Andric 
8134ba319b5SDimitry Andric     // If we have range metadata, we need to convert the type, and not make
8144ba319b5SDimitry Andric     // assumptions about the high bits.
8154ba319b5SDimitry Andric     if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) {
8164ba319b5SDimitry Andric       ConstantInt *Lower =
8174ba319b5SDimitry Andric         mdconst::extract<ConstantInt>(Range->getOperand(0));
8184ba319b5SDimitry Andric 
8194ba319b5SDimitry Andric       if (Lower->getValue().isNullValue()) {
8204ba319b5SDimitry Andric         WidenLoad->setMetadata(LLVMContext::MD_range, nullptr);
8214ba319b5SDimitry Andric       } else {
8224ba319b5SDimitry Andric         Metadata *LowAndHigh[] = {
8234ba319b5SDimitry Andric           ConstantAsMetadata::get(ConstantInt::get(I32Ty, Lower->getValue().zext(32))),
8244ba319b5SDimitry Andric           // Don't make assumptions about the high bits.
8254ba319b5SDimitry Andric           ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0))
8264ba319b5SDimitry Andric         };
8274ba319b5SDimitry Andric 
8284ba319b5SDimitry Andric         WidenLoad->setMetadata(LLVMContext::MD_range,
8294ba319b5SDimitry Andric                                MDNode::get(Mod->getContext(), LowAndHigh));
8304ba319b5SDimitry Andric       }
8314ba319b5SDimitry Andric     }
8322cab237bSDimitry Andric 
8332cab237bSDimitry Andric     int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
8342cab237bSDimitry Andric     Type *IntNTy = Builder.getIntNTy(TySize);
8352cab237bSDimitry Andric     Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
8362cab237bSDimitry Andric     Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
8372cab237bSDimitry Andric     I.replaceAllUsesWith(ValOrig);
8382cab237bSDimitry Andric     I.eraseFromParent();
8392cab237bSDimitry Andric     return true;
8402cab237bSDimitry Andric   }
8412cab237bSDimitry Andric 
8422cab237bSDimitry Andric   return false;
8432cab237bSDimitry Andric }
8442cab237bSDimitry Andric 
visitICmpInst(ICmpInst & I)845d88c1a5aSDimitry Andric bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
846d88c1a5aSDimitry Andric   bool Changed = false;
847d88c1a5aSDimitry Andric 
848d88c1a5aSDimitry Andric   if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
849d88c1a5aSDimitry Andric       DA->isUniform(&I))
850d88c1a5aSDimitry Andric     Changed |= promoteUniformOpToI32(I);
851d88c1a5aSDimitry Andric 
852d88c1a5aSDimitry Andric   return Changed;
853d88c1a5aSDimitry Andric }
854d88c1a5aSDimitry Andric 
visitSelectInst(SelectInst & I)855d88c1a5aSDimitry Andric bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
856d88c1a5aSDimitry Andric   bool Changed = false;
857d88c1a5aSDimitry Andric 
858d88c1a5aSDimitry Andric   if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
859d88c1a5aSDimitry Andric       DA->isUniform(&I))
860d88c1a5aSDimitry Andric     Changed |= promoteUniformOpToI32(I);
861d88c1a5aSDimitry Andric 
862d88c1a5aSDimitry Andric   return Changed;
863d88c1a5aSDimitry Andric }
864d88c1a5aSDimitry Andric 
visitIntrinsicInst(IntrinsicInst & I)865d88c1a5aSDimitry Andric bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
866d88c1a5aSDimitry Andric   switch (I.getIntrinsicID()) {
867d88c1a5aSDimitry Andric   case Intrinsic::bitreverse:
868d88c1a5aSDimitry Andric     return visitBitreverseIntrinsicInst(I);
869d88c1a5aSDimitry Andric   default:
870d88c1a5aSDimitry Andric     return false;
871d88c1a5aSDimitry Andric   }
872d88c1a5aSDimitry Andric }
873d88c1a5aSDimitry Andric 
visitBitreverseIntrinsicInst(IntrinsicInst & I)874d88c1a5aSDimitry Andric bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
875d88c1a5aSDimitry Andric   bool Changed = false;
876d88c1a5aSDimitry Andric 
877d88c1a5aSDimitry Andric   if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
878d88c1a5aSDimitry Andric       DA->isUniform(&I))
879d88c1a5aSDimitry Andric     Changed |= promoteUniformBitreverseToI32(I);
880d88c1a5aSDimitry Andric 
881d88c1a5aSDimitry Andric   return Changed;
882d88c1a5aSDimitry Andric }
883d88c1a5aSDimitry Andric 
doInitialization(Module & M)8843ca95b02SDimitry Andric bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
8856c4bc1bdSDimitry Andric   Mod = &M;
8863ca95b02SDimitry Andric   return false;
8873ca95b02SDimitry Andric }
8883ca95b02SDimitry Andric 
runOnFunction(Function & F)8893ca95b02SDimitry Andric bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
890d8866befSDimitry Andric   if (skipFunction(F))
8913ca95b02SDimitry Andric     return false;
8923ca95b02SDimitry Andric 
893d8866befSDimitry Andric   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
894d8866befSDimitry Andric   if (!TPC)
895d8866befSDimitry Andric     return false;
896d8866befSDimitry Andric 
8974ba319b5SDimitry Andric   const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
8984ba319b5SDimitry Andric   ST = &TM.getSubtarget<GCNSubtarget>(F);
8994ba319b5SDimitry Andric   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
900*b5893f02SDimitry Andric   DA = &getAnalysis<LegacyDivergenceAnalysis>();
9016c4bc1bdSDimitry Andric   HasUnsafeFPMath = hasUnsafeFPMath(F);
9023ca95b02SDimitry Andric 
9036c4bc1bdSDimitry Andric   bool MadeChange = false;
9046c4bc1bdSDimitry Andric 
9056c4bc1bdSDimitry Andric   for (BasicBlock &BB : F) {
9066c4bc1bdSDimitry Andric     BasicBlock::iterator Next;
9076c4bc1bdSDimitry Andric     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) {
9086c4bc1bdSDimitry Andric       Next = std::next(I);
9096c4bc1bdSDimitry Andric       MadeChange |= visit(*I);
9106c4bc1bdSDimitry Andric     }
9116c4bc1bdSDimitry Andric   }
9126c4bc1bdSDimitry Andric 
9136c4bc1bdSDimitry Andric   return MadeChange;
9143ca95b02SDimitry Andric }
9153ca95b02SDimitry Andric 
916d8866befSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
9173ca95b02SDimitry Andric                       "AMDGPU IR optimizations", false, false)
9184ba319b5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
919*b5893f02SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
920d8866befSDimitry Andric INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
921d8866befSDimitry Andric                     false, false)
9223ca95b02SDimitry Andric 
9233ca95b02SDimitry Andric char AMDGPUCodeGenPrepare::ID = 0;
9243ca95b02SDimitry Andric 
createAMDGPUCodeGenPreparePass()925d8866befSDimitry Andric FunctionPass *llvm::createAMDGPUCodeGenPreparePass() {
926d8866befSDimitry Andric   return new AMDGPUCodeGenPrepare();
9273ca95b02SDimitry Andric }
928