186de486dSMatt Arsenault //===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
286de486dSMatt Arsenault //
386de486dSMatt Arsenault //                     The LLVM Compiler Infrastructure
486de486dSMatt Arsenault //
586de486dSMatt Arsenault // This file is distributed under the University of Illinois Open Source
686de486dSMatt Arsenault // License. See LICENSE.TXT for details.
786de486dSMatt Arsenault //
886de486dSMatt Arsenault //===----------------------------------------------------------------------===//
986de486dSMatt Arsenault //
1086de486dSMatt Arsenault /// \file
1186de486dSMatt Arsenault /// This pass does misc. AMDGPU optimizations on IR before instruction
1286de486dSMatt Arsenault /// selection.
1386de486dSMatt Arsenault //
1486de486dSMatt Arsenault //===----------------------------------------------------------------------===//
1586de486dSMatt Arsenault 
1686de486dSMatt Arsenault #include "AMDGPU.h"
1786de486dSMatt Arsenault #include "AMDGPUSubtarget.h"
18a1fe17c9SMatt Arsenault #include "AMDGPUTargetMachine.h"
19734bb7bbSEugene Zelenko #include "llvm/ADT/StringRef.h"
2086de486dSMatt Arsenault #include "llvm/Analysis/DivergenceAnalysis.h"
21a126a13bSWei Ding #include "llvm/Analysis/Loads.h"
2286de486dSMatt Arsenault #include "llvm/CodeGen/Passes.h"
238b61764cSFrancis Visoiu Mistrih #include "llvm/CodeGen/TargetPassConfig.h"
24734bb7bbSEugene Zelenko #include "llvm/IR/Attributes.h"
25734bb7bbSEugene Zelenko #include "llvm/IR/BasicBlock.h"
26734bb7bbSEugene Zelenko #include "llvm/IR/Constants.h"
27734bb7bbSEugene Zelenko #include "llvm/IR/DerivedTypes.h"
28734bb7bbSEugene Zelenko #include "llvm/IR/Function.h"
296bda14b3SChandler Carruth #include "llvm/IR/IRBuilder.h"
306bda14b3SChandler Carruth #include "llvm/IR/InstVisitor.h"
31734bb7bbSEugene Zelenko #include "llvm/IR/InstrTypes.h"
32734bb7bbSEugene Zelenko #include "llvm/IR/Instruction.h"
33734bb7bbSEugene Zelenko #include "llvm/IR/Instructions.h"
34734bb7bbSEugene Zelenko #include "llvm/IR/IntrinsicInst.h"
35734bb7bbSEugene Zelenko #include "llvm/IR/Intrinsics.h"
36734bb7bbSEugene Zelenko #include "llvm/IR/LLVMContext.h"
37734bb7bbSEugene Zelenko #include "llvm/IR/Operator.h"
38734bb7bbSEugene Zelenko #include "llvm/IR/Type.h"
39734bb7bbSEugene Zelenko #include "llvm/IR/Value.h"
40734bb7bbSEugene Zelenko #include "llvm/Pass.h"
41734bb7bbSEugene Zelenko #include "llvm/Support/Casting.h"
42734bb7bbSEugene Zelenko #include <cassert>
43734bb7bbSEugene Zelenko #include <iterator>
4486de486dSMatt Arsenault 
4586de486dSMatt Arsenault #define DEBUG_TYPE "amdgpu-codegenprepare"
4686de486dSMatt Arsenault 
4786de486dSMatt Arsenault using namespace llvm;
4886de486dSMatt Arsenault 
4986de486dSMatt Arsenault namespace {
5086de486dSMatt Arsenault 
5186de486dSMatt Arsenault class AMDGPUCodeGenPrepare : public FunctionPass,
52a1fe17c9SMatt Arsenault                              public InstVisitor<AMDGPUCodeGenPrepare, bool> {
53734bb7bbSEugene Zelenko   const SISubtarget *ST = nullptr;
54734bb7bbSEugene Zelenko   DivergenceAnalysis *DA = nullptr;
55734bb7bbSEugene Zelenko   Module *Mod = nullptr;
56734bb7bbSEugene Zelenko   bool HasUnsafeFPMath = false;
57a126a13bSWei Ding   AMDGPUAS AMDGPUASI;
5886de486dSMatt Arsenault 
595f8f34e4SAdrian Prantl   /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
60f74fc60aSKonstantin Zhuravlyov   /// binary operation \p V.
61e14df4b2SKonstantin Zhuravlyov   ///
62f74fc60aSKonstantin Zhuravlyov   /// \returns Binary operation \p V.
63f74fc60aSKonstantin Zhuravlyov   /// \returns \p T's base element bit width.
64f74fc60aSKonstantin Zhuravlyov   unsigned getBaseElementBitWidth(const Type *T) const;
65e14df4b2SKonstantin Zhuravlyov 
66f74fc60aSKonstantin Zhuravlyov   /// \returns Equivalent 32 bit integer type for given type \p T. For example,
67f74fc60aSKonstantin Zhuravlyov   /// if \p T is i7, then i32 is returned; if \p T is <3 x i12>, then <3 x i32>
68f74fc60aSKonstantin Zhuravlyov   /// is returned.
69e14df4b2SKonstantin Zhuravlyov   Type *getI32Ty(IRBuilder<> &B, const Type *T) const;
70e14df4b2SKonstantin Zhuravlyov 
71e14df4b2SKonstantin Zhuravlyov   /// \returns True if binary operation \p I is a signed binary operation, false
72e14df4b2SKonstantin Zhuravlyov   /// otherwise.
73e14df4b2SKonstantin Zhuravlyov   bool isSigned(const BinaryOperator &I) const;
74e14df4b2SKonstantin Zhuravlyov 
75e14df4b2SKonstantin Zhuravlyov   /// \returns True if the condition of 'select' operation \p I comes from a
76e14df4b2SKonstantin Zhuravlyov   /// signed 'icmp' operation, false otherwise.
77e14df4b2SKonstantin Zhuravlyov   bool isSigned(const SelectInst &I) const;
78e14df4b2SKonstantin Zhuravlyov 
79f74fc60aSKonstantin Zhuravlyov   /// \returns True if type \p T needs to be promoted to 32 bit integer type,
80f74fc60aSKonstantin Zhuravlyov   /// false otherwise.
81f74fc60aSKonstantin Zhuravlyov   bool needsPromotionToI32(const Type *T) const;
82e14df4b2SKonstantin Zhuravlyov 
835f8f34e4SAdrian Prantl   /// Promotes uniform binary operation \p I to equivalent 32 bit binary
84f74fc60aSKonstantin Zhuravlyov   /// operation.
85f74fc60aSKonstantin Zhuravlyov   ///
86f74fc60aSKonstantin Zhuravlyov   /// \details \p I's base element bit width must be greater than 1 and less
87f74fc60aSKonstantin Zhuravlyov   /// than or equal 16. Promotion is done by sign or zero extending operands to
88f74fc60aSKonstantin Zhuravlyov   /// 32 bits, replacing \p I with equivalent 32 bit binary operation, and
89f74fc60aSKonstantin Zhuravlyov   /// truncating the result of 32 bit binary operation back to \p I's original
90f74fc60aSKonstantin Zhuravlyov   /// type. Division operation is not promoted.
91f74fc60aSKonstantin Zhuravlyov   ///
92f74fc60aSKonstantin Zhuravlyov   /// \returns True if \p I is promoted to equivalent 32 bit binary operation,
93f74fc60aSKonstantin Zhuravlyov   /// false otherwise.
94f74fc60aSKonstantin Zhuravlyov   bool promoteUniformOpToI32(BinaryOperator &I) const;
95f74fc60aSKonstantin Zhuravlyov 
965f8f34e4SAdrian Prantl   /// Promotes uniform 'icmp' operation \p I to 32 bit 'icmp' operation.
97f74fc60aSKonstantin Zhuravlyov   ///
98f74fc60aSKonstantin Zhuravlyov   /// \details \p I's base element bit width must be greater than 1 and less
99f74fc60aSKonstantin Zhuravlyov   /// than or equal 16. Promotion is done by sign or zero extending operands to
100f74fc60aSKonstantin Zhuravlyov   /// 32 bits, and replacing \p I with 32 bit 'icmp' operation.
101e14df4b2SKonstantin Zhuravlyov   ///
102e14df4b2SKonstantin Zhuravlyov   /// \returns True.
103f74fc60aSKonstantin Zhuravlyov   bool promoteUniformOpToI32(ICmpInst &I) const;
104e14df4b2SKonstantin Zhuravlyov 
1055f8f34e4SAdrian Prantl   /// Promotes uniform 'select' operation \p I to 32 bit 'select'
106f74fc60aSKonstantin Zhuravlyov   /// operation.
107f74fc60aSKonstantin Zhuravlyov   ///
108f74fc60aSKonstantin Zhuravlyov   /// \details \p I's base element bit width must be greater than 1 and less
109f74fc60aSKonstantin Zhuravlyov   /// than or equal 16. Promotion is done by sign or zero extending operands to
110f74fc60aSKonstantin Zhuravlyov   /// 32 bits, replacing \p I with 32 bit 'select' operation, and truncating the
111f74fc60aSKonstantin Zhuravlyov   /// result of 32 bit 'select' operation back to \p I's original type.
112e14df4b2SKonstantin Zhuravlyov   ///
113e14df4b2SKonstantin Zhuravlyov   /// \returns True.
114f74fc60aSKonstantin Zhuravlyov   bool promoteUniformOpToI32(SelectInst &I) const;
115b4eb5d50SKonstantin Zhuravlyov 
1165f8f34e4SAdrian Prantl   /// Promotes uniform 'bitreverse' intrinsic \p I to 32 bit 'bitreverse'
117f74fc60aSKonstantin Zhuravlyov   /// intrinsic.
118f74fc60aSKonstantin Zhuravlyov   ///
119f74fc60aSKonstantin Zhuravlyov   /// \details \p I's base element bit width must be greater than 1 and less
120f74fc60aSKonstantin Zhuravlyov   /// than or equal 16. Promotion is done by zero extending the operand to 32
121f74fc60aSKonstantin Zhuravlyov   /// bits, replacing \p I with 32 bit 'bitreverse' intrinsic, shifting the
122f74fc60aSKonstantin Zhuravlyov   /// result of 32 bit 'bitreverse' intrinsic to the right with zero fill (the
123f74fc60aSKonstantin Zhuravlyov   /// shift amount is 32 minus \p I's base element bit width), and truncating
124f74fc60aSKonstantin Zhuravlyov   /// the result of the shift operation back to \p I's original type.
125b4eb5d50SKonstantin Zhuravlyov   ///
126b4eb5d50SKonstantin Zhuravlyov   /// \returns True.
127f74fc60aSKonstantin Zhuravlyov   bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
1285f8f34e4SAdrian Prantl   /// Widen a scalar load.
129a126a13bSWei Ding   ///
130a126a13bSWei Ding   /// \details \p Widen scalar load for uniform, small type loads from constant
131a126a13bSWei Ding   //  memory / to a full 32-bits and then truncate the input to allow a scalar
132a126a13bSWei Ding   //  load instead of a vector load.
133a126a13bSWei Ding   //
134a126a13bSWei Ding   /// \returns True.
135a126a13bSWei Ding 
136a126a13bSWei Ding   bool canWidenScalarExtLoad(LoadInst &I) const;
137e14df4b2SKonstantin Zhuravlyov 
13886de486dSMatt Arsenault public:
13986de486dSMatt Arsenault   static char ID;
140734bb7bbSEugene Zelenko 
1418b61764cSFrancis Visoiu Mistrih   AMDGPUCodeGenPrepare() : FunctionPass(ID) {}
142a1fe17c9SMatt Arsenault 
143a1fe17c9SMatt Arsenault   bool visitFDiv(BinaryOperator &I);
144a1fe17c9SMatt Arsenault 
145e14df4b2SKonstantin Zhuravlyov   bool visitInstruction(Instruction &I) { return false; }
146e14df4b2SKonstantin Zhuravlyov   bool visitBinaryOperator(BinaryOperator &I);
147a126a13bSWei Ding   bool visitLoadInst(LoadInst &I);
148e14df4b2SKonstantin Zhuravlyov   bool visitICmpInst(ICmpInst &I);
149e14df4b2SKonstantin Zhuravlyov   bool visitSelectInst(SelectInst &I);
15086de486dSMatt Arsenault 
151b4eb5d50SKonstantin Zhuravlyov   bool visitIntrinsicInst(IntrinsicInst &I);
152b4eb5d50SKonstantin Zhuravlyov   bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
153b4eb5d50SKonstantin Zhuravlyov 
15486de486dSMatt Arsenault   bool doInitialization(Module &M) override;
15586de486dSMatt Arsenault   bool runOnFunction(Function &F) override;
15686de486dSMatt Arsenault 
157117296c0SMehdi Amini   StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
15886de486dSMatt Arsenault 
15986de486dSMatt Arsenault   void getAnalysisUsage(AnalysisUsage &AU) const override {
16086de486dSMatt Arsenault     AU.addRequired<DivergenceAnalysis>();
16186de486dSMatt Arsenault     AU.setPreservesAll();
16286de486dSMatt Arsenault  }
16386de486dSMatt Arsenault };
16486de486dSMatt Arsenault 
165734bb7bbSEugene Zelenko } // end anonymous namespace
16686de486dSMatt Arsenault 
167f74fc60aSKonstantin Zhuravlyov unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const {
168f74fc60aSKonstantin Zhuravlyov   assert(needsPromotionToI32(T) && "T does not need promotion to i32");
169e14df4b2SKonstantin Zhuravlyov 
170e14df4b2SKonstantin Zhuravlyov   if (T->isIntegerTy())
171f74fc60aSKonstantin Zhuravlyov     return T->getIntegerBitWidth();
172f74fc60aSKonstantin Zhuravlyov   return cast<VectorType>(T)->getElementType()->getIntegerBitWidth();
173e14df4b2SKonstantin Zhuravlyov }
174e14df4b2SKonstantin Zhuravlyov 
175e14df4b2SKonstantin Zhuravlyov Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const {
176f74fc60aSKonstantin Zhuravlyov   assert(needsPromotionToI32(T) && "T does not need promotion to i32");
177e14df4b2SKonstantin Zhuravlyov 
178e14df4b2SKonstantin Zhuravlyov   if (T->isIntegerTy())
179e14df4b2SKonstantin Zhuravlyov     return B.getInt32Ty();
180e14df4b2SKonstantin Zhuravlyov   return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements());
181e14df4b2SKonstantin Zhuravlyov }
182e14df4b2SKonstantin Zhuravlyov 
183e14df4b2SKonstantin Zhuravlyov bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
184691e2e02SKonstantin Zhuravlyov   return I.getOpcode() == Instruction::AShr ||
185691e2e02SKonstantin Zhuravlyov       I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::SRem;
186e14df4b2SKonstantin Zhuravlyov }
187e14df4b2SKonstantin Zhuravlyov 
188e14df4b2SKonstantin Zhuravlyov bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
189e14df4b2SKonstantin Zhuravlyov   return isa<ICmpInst>(I.getOperand(0)) ?
190e14df4b2SKonstantin Zhuravlyov       cast<ICmpInst>(I.getOperand(0))->isSigned() : false;
191e14df4b2SKonstantin Zhuravlyov }
192e14df4b2SKonstantin Zhuravlyov 
193f74fc60aSKonstantin Zhuravlyov bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
194eb522e68SMatt Arsenault   const IntegerType *IntTy = dyn_cast<IntegerType>(T);
195eb522e68SMatt Arsenault   if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16)
196f74fc60aSKonstantin Zhuravlyov     return true;
197eb522e68SMatt Arsenault 
198eb522e68SMatt Arsenault   if (const VectorType *VT = dyn_cast<VectorType>(T)) {
199eb522e68SMatt Arsenault     // TODO: The set of packed operations is more limited, so may want to
200eb522e68SMatt Arsenault     // promote some anyway.
201eb522e68SMatt Arsenault     if (ST->hasVOP3PInsts())
202f74fc60aSKonstantin Zhuravlyov       return false;
203eb522e68SMatt Arsenault 
204eb522e68SMatt Arsenault     return needsPromotionToI32(VT->getElementType());
205eb522e68SMatt Arsenault   }
206eb522e68SMatt Arsenault 
207eb522e68SMatt Arsenault   return false;
208f74fc60aSKonstantin Zhuravlyov }
209e14df4b2SKonstantin Zhuravlyov 
210d59e6404SMatt Arsenault // Return true if the op promoted to i32 should have nsw set.
211d59e6404SMatt Arsenault static bool promotedOpIsNSW(const Instruction &I) {
212d59e6404SMatt Arsenault   switch (I.getOpcode()) {
213d59e6404SMatt Arsenault   case Instruction::Shl:
214d59e6404SMatt Arsenault   case Instruction::Add:
215d59e6404SMatt Arsenault   case Instruction::Sub:
216d59e6404SMatt Arsenault     return true;
217d59e6404SMatt Arsenault   case Instruction::Mul:
218d59e6404SMatt Arsenault     return I.hasNoUnsignedWrap();
219d59e6404SMatt Arsenault   default:
220d59e6404SMatt Arsenault     return false;
221d59e6404SMatt Arsenault   }
222d59e6404SMatt Arsenault }
223d59e6404SMatt Arsenault 
224d59e6404SMatt Arsenault // Return true if the op promoted to i32 should have nuw set.
225d59e6404SMatt Arsenault static bool promotedOpIsNUW(const Instruction &I) {
226d59e6404SMatt Arsenault   switch (I.getOpcode()) {
227d59e6404SMatt Arsenault   case Instruction::Shl:
228d59e6404SMatt Arsenault   case Instruction::Add:
229d59e6404SMatt Arsenault   case Instruction::Mul:
230d59e6404SMatt Arsenault     return true;
231d59e6404SMatt Arsenault   case Instruction::Sub:
232d59e6404SMatt Arsenault     return I.hasNoUnsignedWrap();
233d59e6404SMatt Arsenault   default:
234d59e6404SMatt Arsenault     return false;
235d59e6404SMatt Arsenault   }
236d59e6404SMatt Arsenault }
237d59e6404SMatt Arsenault 
238a126a13bSWei Ding bool AMDGPUCodeGenPrepare::canWidenScalarExtLoad(LoadInst &I) const {
239a126a13bSWei Ding   Type *Ty = I.getType();
240a126a13bSWei Ding   const DataLayout &DL = Mod->getDataLayout();
241a126a13bSWei Ding   int TySize = DL.getTypeSizeInBits(Ty);
242a126a13bSWei Ding   unsigned Align = I.getAlignment() ?
243a126a13bSWei Ding                    I.getAlignment() : DL.getABITypeAlignment(Ty);
244a126a13bSWei Ding 
245a126a13bSWei Ding   return I.isSimple() && TySize < 32 && Align >= 4 && DA->isUniform(&I);
246a126a13bSWei Ding }
247a126a13bSWei Ding 
248f74fc60aSKonstantin Zhuravlyov bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
249f74fc60aSKonstantin Zhuravlyov   assert(needsPromotionToI32(I.getType()) &&
250f74fc60aSKonstantin Zhuravlyov          "I does not need promotion to i32");
251f74fc60aSKonstantin Zhuravlyov 
252f74fc60aSKonstantin Zhuravlyov   if (I.getOpcode() == Instruction::SDiv ||
253f74fc60aSKonstantin Zhuravlyov       I.getOpcode() == Instruction::UDiv)
254e14df4b2SKonstantin Zhuravlyov     return false;
255e14df4b2SKonstantin Zhuravlyov 
256e14df4b2SKonstantin Zhuravlyov   IRBuilder<> Builder(&I);
257e14df4b2SKonstantin Zhuravlyov   Builder.SetCurrentDebugLocation(I.getDebugLoc());
258e14df4b2SKonstantin Zhuravlyov 
259e14df4b2SKonstantin Zhuravlyov   Type *I32Ty = getI32Ty(Builder, I.getType());
260e14df4b2SKonstantin Zhuravlyov   Value *ExtOp0 = nullptr;
261e14df4b2SKonstantin Zhuravlyov   Value *ExtOp1 = nullptr;
262e14df4b2SKonstantin Zhuravlyov   Value *ExtRes = nullptr;
263e14df4b2SKonstantin Zhuravlyov   Value *TruncRes = nullptr;
264e14df4b2SKonstantin Zhuravlyov 
265e14df4b2SKonstantin Zhuravlyov   if (isSigned(I)) {
266e14df4b2SKonstantin Zhuravlyov     ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
267e14df4b2SKonstantin Zhuravlyov     ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
268e14df4b2SKonstantin Zhuravlyov   } else {
269e14df4b2SKonstantin Zhuravlyov     ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
270e14df4b2SKonstantin Zhuravlyov     ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
271e14df4b2SKonstantin Zhuravlyov   }
272d59e6404SMatt Arsenault 
273d59e6404SMatt Arsenault   ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1);
274d59e6404SMatt Arsenault   if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) {
275d59e6404SMatt Arsenault     if (promotedOpIsNSW(cast<Instruction>(I)))
276d59e6404SMatt Arsenault       Inst->setHasNoSignedWrap();
277d59e6404SMatt Arsenault 
278d59e6404SMatt Arsenault     if (promotedOpIsNUW(cast<Instruction>(I)))
279d59e6404SMatt Arsenault       Inst->setHasNoUnsignedWrap();
280d59e6404SMatt Arsenault 
281d59e6404SMatt Arsenault     if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
282d59e6404SMatt Arsenault       Inst->setIsExact(ExactOp->isExact());
283d59e6404SMatt Arsenault   }
284d59e6404SMatt Arsenault 
285f74fc60aSKonstantin Zhuravlyov   TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
286e14df4b2SKonstantin Zhuravlyov 
287e14df4b2SKonstantin Zhuravlyov   I.replaceAllUsesWith(TruncRes);
288e14df4b2SKonstantin Zhuravlyov   I.eraseFromParent();
289e14df4b2SKonstantin Zhuravlyov 
290e14df4b2SKonstantin Zhuravlyov   return true;
291e14df4b2SKonstantin Zhuravlyov }
292e14df4b2SKonstantin Zhuravlyov 
293f74fc60aSKonstantin Zhuravlyov bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(ICmpInst &I) const {
294f74fc60aSKonstantin Zhuravlyov   assert(needsPromotionToI32(I.getOperand(0)->getType()) &&
295f74fc60aSKonstantin Zhuravlyov          "I does not need promotion to i32");
296e14df4b2SKonstantin Zhuravlyov 
297e14df4b2SKonstantin Zhuravlyov   IRBuilder<> Builder(&I);
298e14df4b2SKonstantin Zhuravlyov   Builder.SetCurrentDebugLocation(I.getDebugLoc());
299e14df4b2SKonstantin Zhuravlyov 
300f74fc60aSKonstantin Zhuravlyov   Type *I32Ty = getI32Ty(Builder, I.getOperand(0)->getType());
301e14df4b2SKonstantin Zhuravlyov   Value *ExtOp0 = nullptr;
302e14df4b2SKonstantin Zhuravlyov   Value *ExtOp1 = nullptr;
303e14df4b2SKonstantin Zhuravlyov   Value *NewICmp  = nullptr;
304e14df4b2SKonstantin Zhuravlyov 
305e14df4b2SKonstantin Zhuravlyov   if (I.isSigned()) {
306f74fc60aSKonstantin Zhuravlyov     ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
307f74fc60aSKonstantin Zhuravlyov     ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
308e14df4b2SKonstantin Zhuravlyov   } else {
309f74fc60aSKonstantin Zhuravlyov     ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
310f74fc60aSKonstantin Zhuravlyov     ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
311e14df4b2SKonstantin Zhuravlyov   }
312e14df4b2SKonstantin Zhuravlyov   NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);
313e14df4b2SKonstantin Zhuravlyov 
314e14df4b2SKonstantin Zhuravlyov   I.replaceAllUsesWith(NewICmp);
315e14df4b2SKonstantin Zhuravlyov   I.eraseFromParent();
316e14df4b2SKonstantin Zhuravlyov 
317e14df4b2SKonstantin Zhuravlyov   return true;
318e14df4b2SKonstantin Zhuravlyov }
319e14df4b2SKonstantin Zhuravlyov 
320f74fc60aSKonstantin Zhuravlyov bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(SelectInst &I) const {
321f74fc60aSKonstantin Zhuravlyov   assert(needsPromotionToI32(I.getType()) &&
322f74fc60aSKonstantin Zhuravlyov          "I does not need promotion to i32");
323e14df4b2SKonstantin Zhuravlyov 
324e14df4b2SKonstantin Zhuravlyov   IRBuilder<> Builder(&I);
325e14df4b2SKonstantin Zhuravlyov   Builder.SetCurrentDebugLocation(I.getDebugLoc());
326e14df4b2SKonstantin Zhuravlyov 
327e14df4b2SKonstantin Zhuravlyov   Type *I32Ty = getI32Ty(Builder, I.getType());
328e14df4b2SKonstantin Zhuravlyov   Value *ExtOp1 = nullptr;
329e14df4b2SKonstantin Zhuravlyov   Value *ExtOp2 = nullptr;
330e14df4b2SKonstantin Zhuravlyov   Value *ExtRes = nullptr;
331e14df4b2SKonstantin Zhuravlyov   Value *TruncRes = nullptr;
332e14df4b2SKonstantin Zhuravlyov 
333e14df4b2SKonstantin Zhuravlyov   if (isSigned(I)) {
334e14df4b2SKonstantin Zhuravlyov     ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
335e14df4b2SKonstantin Zhuravlyov     ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
336e14df4b2SKonstantin Zhuravlyov   } else {
337e14df4b2SKonstantin Zhuravlyov     ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
338e14df4b2SKonstantin Zhuravlyov     ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
339e14df4b2SKonstantin Zhuravlyov   }
340e14df4b2SKonstantin Zhuravlyov   ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
341f74fc60aSKonstantin Zhuravlyov   TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
342e14df4b2SKonstantin Zhuravlyov 
343e14df4b2SKonstantin Zhuravlyov   I.replaceAllUsesWith(TruncRes);
344e14df4b2SKonstantin Zhuravlyov   I.eraseFromParent();
345e14df4b2SKonstantin Zhuravlyov 
346e14df4b2SKonstantin Zhuravlyov   return true;
347e14df4b2SKonstantin Zhuravlyov }
348e14df4b2SKonstantin Zhuravlyov 
349f74fc60aSKonstantin Zhuravlyov bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
350b4eb5d50SKonstantin Zhuravlyov     IntrinsicInst &I) const {
351f74fc60aSKonstantin Zhuravlyov   assert(I.getIntrinsicID() == Intrinsic::bitreverse &&
352f74fc60aSKonstantin Zhuravlyov          "I must be bitreverse intrinsic");
353f74fc60aSKonstantin Zhuravlyov   assert(needsPromotionToI32(I.getType()) &&
354f74fc60aSKonstantin Zhuravlyov          "I does not need promotion to i32");
355b4eb5d50SKonstantin Zhuravlyov 
356b4eb5d50SKonstantin Zhuravlyov   IRBuilder<> Builder(&I);
357b4eb5d50SKonstantin Zhuravlyov   Builder.SetCurrentDebugLocation(I.getDebugLoc());
358b4eb5d50SKonstantin Zhuravlyov 
359b4eb5d50SKonstantin Zhuravlyov   Type *I32Ty = getI32Ty(Builder, I.getType());
360b4eb5d50SKonstantin Zhuravlyov   Function *I32 =
361c09e2d7eSKonstantin Zhuravlyov       Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty });
362b4eb5d50SKonstantin Zhuravlyov   Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty);
363b4eb5d50SKonstantin Zhuravlyov   Value *ExtRes = Builder.CreateCall(I32, { ExtOp });
364f74fc60aSKonstantin Zhuravlyov   Value *LShrOp =
365f74fc60aSKonstantin Zhuravlyov       Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType()));
366b4eb5d50SKonstantin Zhuravlyov   Value *TruncRes =
367f74fc60aSKonstantin Zhuravlyov       Builder.CreateTrunc(LShrOp, I.getType());
368b4eb5d50SKonstantin Zhuravlyov 
369b4eb5d50SKonstantin Zhuravlyov   I.replaceAllUsesWith(TruncRes);
370b4eb5d50SKonstantin Zhuravlyov   I.eraseFromParent();
371b4eb5d50SKonstantin Zhuravlyov 
372b4eb5d50SKonstantin Zhuravlyov   return true;
373b4eb5d50SKonstantin Zhuravlyov }
374b4eb5d50SKonstantin Zhuravlyov 
375*df61be70SStanislav Mekhanoshin static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) {
376a1fe17c9SMatt Arsenault   const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
377a1fe17c9SMatt Arsenault   if (!CNum)
378*df61be70SStanislav Mekhanoshin     return HasDenormals;
379*df61be70SStanislav Mekhanoshin 
380*df61be70SStanislav Mekhanoshin   if (UnsafeDiv)
381*df61be70SStanislav Mekhanoshin     return true;
382*df61be70SStanislav Mekhanoshin 
383*df61be70SStanislav Mekhanoshin   bool IsOne = CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0);
384a1fe17c9SMatt Arsenault 
385a1fe17c9SMatt Arsenault   // Reciprocal f32 is handled separately without denormals.
386*df61be70SStanislav Mekhanoshin   return HasDenormals ^ IsOne;
387a1fe17c9SMatt Arsenault }
388a1fe17c9SMatt Arsenault 
389a1fe17c9SMatt Arsenault // Insert an intrinsic for fast fdiv for safe math situations where we can
390a1fe17c9SMatt Arsenault // reduce precision. Leave fdiv for situations where the generic node is
391a1fe17c9SMatt Arsenault // expected to be optimized.
392a1fe17c9SMatt Arsenault bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
393a1fe17c9SMatt Arsenault   Type *Ty = FDiv.getType();
394a1fe17c9SMatt Arsenault 
395a1fe17c9SMatt Arsenault   if (!Ty->getScalarType()->isFloatTy())
396a1fe17c9SMatt Arsenault     return false;
397a1fe17c9SMatt Arsenault 
398a1fe17c9SMatt Arsenault   MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath);
399a1fe17c9SMatt Arsenault   if (!FPMath)
400a1fe17c9SMatt Arsenault     return false;
401a1fe17c9SMatt Arsenault 
402a1fe17c9SMatt Arsenault   const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
403a1fe17c9SMatt Arsenault   float ULP = FPOp->getFPAccuracy();
404a1fe17c9SMatt Arsenault   if (ULP < 2.5f)
405a1fe17c9SMatt Arsenault     return false;
406a1fe17c9SMatt Arsenault 
407a1fe17c9SMatt Arsenault   FastMathFlags FMF = FPOp->getFastMathFlags();
408629c4115SSanjay Patel   bool UnsafeDiv = HasUnsafeFPMath || FMF.isFast() ||
409a1fe17c9SMatt Arsenault                                       FMF.allowReciprocal();
4109d7b1c9dSStanislav Mekhanoshin 
4119d7b1c9dSStanislav Mekhanoshin   // With UnsafeDiv node will be optimized to just rcp and mul.
412*df61be70SStanislav Mekhanoshin   if (UnsafeDiv)
413a1fe17c9SMatt Arsenault     return false;
414a1fe17c9SMatt Arsenault 
415a1fe17c9SMatt Arsenault   IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
416a1fe17c9SMatt Arsenault   Builder.setFastMathFlags(FMF);
417a1fe17c9SMatt Arsenault   Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
418a1fe17c9SMatt Arsenault 
419c5b641acSMatt Arsenault   Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
420a1fe17c9SMatt Arsenault 
421a1fe17c9SMatt Arsenault   Value *Num = FDiv.getOperand(0);
422a1fe17c9SMatt Arsenault   Value *Den = FDiv.getOperand(1);
423a1fe17c9SMatt Arsenault 
424a1fe17c9SMatt Arsenault   Value *NewFDiv = nullptr;
425a1fe17c9SMatt Arsenault 
426*df61be70SStanislav Mekhanoshin   bool HasDenormals = ST->hasFP32Denormals();
427a1fe17c9SMatt Arsenault   if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
428a1fe17c9SMatt Arsenault     NewFDiv = UndefValue::get(VT);
429a1fe17c9SMatt Arsenault 
430a1fe17c9SMatt Arsenault     // FIXME: Doesn't do the right thing for cases where the vector is partially
431a1fe17c9SMatt Arsenault     // constant. This works when the scalarizer pass is run first.
432a1fe17c9SMatt Arsenault     for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
433a1fe17c9SMatt Arsenault       Value *NumEltI = Builder.CreateExtractElement(Num, I);
434a1fe17c9SMatt Arsenault       Value *DenEltI = Builder.CreateExtractElement(Den, I);
435a1fe17c9SMatt Arsenault       Value *NewElt;
436a1fe17c9SMatt Arsenault 
437*df61be70SStanislav Mekhanoshin       if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasDenormals)) {
438a1fe17c9SMatt Arsenault         NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
439a1fe17c9SMatt Arsenault       } else {
440a1fe17c9SMatt Arsenault         NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
441a1fe17c9SMatt Arsenault       }
442a1fe17c9SMatt Arsenault 
443a1fe17c9SMatt Arsenault       NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
444a1fe17c9SMatt Arsenault     }
445a1fe17c9SMatt Arsenault   } else {
446*df61be70SStanislav Mekhanoshin     if (!shouldKeepFDivF32(Num, UnsafeDiv, HasDenormals))
447a1fe17c9SMatt Arsenault       NewFDiv = Builder.CreateCall(Decl, { Num, Den });
448a1fe17c9SMatt Arsenault   }
449a1fe17c9SMatt Arsenault 
450a1fe17c9SMatt Arsenault   if (NewFDiv) {
451a1fe17c9SMatt Arsenault     FDiv.replaceAllUsesWith(NewFDiv);
452a1fe17c9SMatt Arsenault     NewFDiv->takeName(&FDiv);
453a1fe17c9SMatt Arsenault     FDiv.eraseFromParent();
454a1fe17c9SMatt Arsenault   }
455a1fe17c9SMatt Arsenault 
456*df61be70SStanislav Mekhanoshin   return !!NewFDiv;
457a1fe17c9SMatt Arsenault }
458a1fe17c9SMatt Arsenault 
459a1fe17c9SMatt Arsenault static bool hasUnsafeFPMath(const Function &F) {
460a1fe17c9SMatt Arsenault   Attribute Attr = F.getFnAttribute("unsafe-fp-math");
461a1fe17c9SMatt Arsenault   return Attr.getValueAsString() == "true";
462a1fe17c9SMatt Arsenault }
463a1fe17c9SMatt Arsenault 
464e14df4b2SKonstantin Zhuravlyov bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
465e14df4b2SKonstantin Zhuravlyov   bool Changed = false;
466e14df4b2SKonstantin Zhuravlyov 
467f74fc60aSKonstantin Zhuravlyov   if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
468f74fc60aSKonstantin Zhuravlyov       DA->isUniform(&I))
469f74fc60aSKonstantin Zhuravlyov     Changed |= promoteUniformOpToI32(I);
470e14df4b2SKonstantin Zhuravlyov 
471e14df4b2SKonstantin Zhuravlyov   return Changed;
472e14df4b2SKonstantin Zhuravlyov }
473e14df4b2SKonstantin Zhuravlyov 
474a126a13bSWei Ding bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
475923712b6SMatt Arsenault   if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
476923712b6SMatt Arsenault        I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
477a126a13bSWei Ding       canWidenScalarExtLoad(I)) {
478a126a13bSWei Ding     IRBuilder<> Builder(&I);
479a126a13bSWei Ding     Builder.SetCurrentDebugLocation(I.getDebugLoc());
480a126a13bSWei Ding 
481a126a13bSWei Ding     Type *I32Ty = Builder.getInt32Ty();
482a126a13bSWei Ding     Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
483a126a13bSWei Ding     Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
48457e541e8SMatt Arsenault     LoadInst *WidenLoad = Builder.CreateLoad(BitCast);
48557e541e8SMatt Arsenault     WidenLoad->copyMetadata(I);
48657e541e8SMatt Arsenault 
48757e541e8SMatt Arsenault     // If we have range metadata, we need to convert the type, and not make
48857e541e8SMatt Arsenault     // assumptions about the high bits.
48957e541e8SMatt Arsenault     if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) {
49057e541e8SMatt Arsenault       ConstantInt *Lower =
49157e541e8SMatt Arsenault         mdconst::extract<ConstantInt>(Range->getOperand(0));
49257e541e8SMatt Arsenault 
49357e541e8SMatt Arsenault       if (Lower->getValue().isNullValue()) {
49457e541e8SMatt Arsenault         WidenLoad->setMetadata(LLVMContext::MD_range, nullptr);
49557e541e8SMatt Arsenault       } else {
49657e541e8SMatt Arsenault         Metadata *LowAndHigh[] = {
49757e541e8SMatt Arsenault           ConstantAsMetadata::get(ConstantInt::get(I32Ty, Lower->getValue().zext(32))),
49857e541e8SMatt Arsenault           // Don't make assumptions about the high bits.
49957e541e8SMatt Arsenault           ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0))
50057e541e8SMatt Arsenault         };
50157e541e8SMatt Arsenault 
50257e541e8SMatt Arsenault         WidenLoad->setMetadata(LLVMContext::MD_range,
50357e541e8SMatt Arsenault                                MDNode::get(Mod->getContext(), LowAndHigh));
50457e541e8SMatt Arsenault       }
50557e541e8SMatt Arsenault     }
506a126a13bSWei Ding 
507a126a13bSWei Ding     int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
508a126a13bSWei Ding     Type *IntNTy = Builder.getIntNTy(TySize);
509a126a13bSWei Ding     Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
510a126a13bSWei Ding     Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
511a126a13bSWei Ding     I.replaceAllUsesWith(ValOrig);
512a126a13bSWei Ding     I.eraseFromParent();
513a126a13bSWei Ding     return true;
514a126a13bSWei Ding   }
515a126a13bSWei Ding 
516a126a13bSWei Ding   return false;
517a126a13bSWei Ding }
518a126a13bSWei Ding 
519e14df4b2SKonstantin Zhuravlyov bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
520e14df4b2SKonstantin Zhuravlyov   bool Changed = false;
521e14df4b2SKonstantin Zhuravlyov 
522f74fc60aSKonstantin Zhuravlyov   if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
523f74fc60aSKonstantin Zhuravlyov       DA->isUniform(&I))
524f74fc60aSKonstantin Zhuravlyov     Changed |= promoteUniformOpToI32(I);
525e14df4b2SKonstantin Zhuravlyov 
526e14df4b2SKonstantin Zhuravlyov   return Changed;
527e14df4b2SKonstantin Zhuravlyov }
528e14df4b2SKonstantin Zhuravlyov 
529e14df4b2SKonstantin Zhuravlyov bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
530e14df4b2SKonstantin Zhuravlyov   bool Changed = false;
531e14df4b2SKonstantin Zhuravlyov 
532f74fc60aSKonstantin Zhuravlyov   if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
533f74fc60aSKonstantin Zhuravlyov       DA->isUniform(&I))
534f74fc60aSKonstantin Zhuravlyov     Changed |= promoteUniformOpToI32(I);
535b4eb5d50SKonstantin Zhuravlyov 
536b4eb5d50SKonstantin Zhuravlyov   return Changed;
537b4eb5d50SKonstantin Zhuravlyov }
538b4eb5d50SKonstantin Zhuravlyov 
539b4eb5d50SKonstantin Zhuravlyov bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
540b4eb5d50SKonstantin Zhuravlyov   switch (I.getIntrinsicID()) {
541b4eb5d50SKonstantin Zhuravlyov   case Intrinsic::bitreverse:
542b4eb5d50SKonstantin Zhuravlyov     return visitBitreverseIntrinsicInst(I);
543b4eb5d50SKonstantin Zhuravlyov   default:
544b4eb5d50SKonstantin Zhuravlyov     return false;
545b4eb5d50SKonstantin Zhuravlyov   }
546b4eb5d50SKonstantin Zhuravlyov }
547b4eb5d50SKonstantin Zhuravlyov 
548b4eb5d50SKonstantin Zhuravlyov bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
549b4eb5d50SKonstantin Zhuravlyov   bool Changed = false;
550b4eb5d50SKonstantin Zhuravlyov 
551f74fc60aSKonstantin Zhuravlyov   if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
552f74fc60aSKonstantin Zhuravlyov       DA->isUniform(&I))
553f74fc60aSKonstantin Zhuravlyov     Changed |= promoteUniformBitreverseToI32(I);
554e14df4b2SKonstantin Zhuravlyov 
555e14df4b2SKonstantin Zhuravlyov   return Changed;
556e14df4b2SKonstantin Zhuravlyov }
557e14df4b2SKonstantin Zhuravlyov 
55886de486dSMatt Arsenault bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
559a1fe17c9SMatt Arsenault   Mod = &M;
56086de486dSMatt Arsenault   return false;
56186de486dSMatt Arsenault }
56286de486dSMatt Arsenault 
56386de486dSMatt Arsenault bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
5648b61764cSFrancis Visoiu Mistrih   if (skipFunction(F))
56586de486dSMatt Arsenault     return false;
56686de486dSMatt Arsenault 
5678b61764cSFrancis Visoiu Mistrih   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
5688b61764cSFrancis Visoiu Mistrih   if (!TPC)
5698b61764cSFrancis Visoiu Mistrih     return false;
5708b61764cSFrancis Visoiu Mistrih 
5718b61764cSFrancis Visoiu Mistrih   const TargetMachine &TM = TPC->getTM<TargetMachine>();
5728b61764cSFrancis Visoiu Mistrih   ST = &TM.getSubtarget<SISubtarget>(F);
57386de486dSMatt Arsenault   DA = &getAnalysis<DivergenceAnalysis>();
574a1fe17c9SMatt Arsenault   HasUnsafeFPMath = hasUnsafeFPMath(F);
57586de486dSMatt Arsenault 
576a1fe17c9SMatt Arsenault   bool MadeChange = false;
577a1fe17c9SMatt Arsenault 
578a1fe17c9SMatt Arsenault   for (BasicBlock &BB : F) {
579a1fe17c9SMatt Arsenault     BasicBlock::iterator Next;
580a1fe17c9SMatt Arsenault     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) {
581a1fe17c9SMatt Arsenault       Next = std::next(I);
582a1fe17c9SMatt Arsenault       MadeChange |= visit(*I);
583a1fe17c9SMatt Arsenault     }
584a1fe17c9SMatt Arsenault   }
585a1fe17c9SMatt Arsenault 
586a1fe17c9SMatt Arsenault   return MadeChange;
58786de486dSMatt Arsenault }
58886de486dSMatt Arsenault 
5898b61764cSFrancis Visoiu Mistrih INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
59086de486dSMatt Arsenault                       "AMDGPU IR optimizations", false, false)
59186de486dSMatt Arsenault INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
5928b61764cSFrancis Visoiu Mistrih INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
5938b61764cSFrancis Visoiu Mistrih                     false, false)
59486de486dSMatt Arsenault 
59586de486dSMatt Arsenault char AMDGPUCodeGenPrepare::ID = 0;
59686de486dSMatt Arsenault 
5978b61764cSFrancis Visoiu Mistrih FunctionPass *llvm::createAMDGPUCodeGenPreparePass() {
5988b61764cSFrancis Visoiu Mistrih   return new AMDGPUCodeGenPrepare();
59986de486dSMatt Arsenault }
600