1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the AArch64-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // AArch64GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64.h"
17 #include "AArch64CallingConvention.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/Analysis/BranchProbabilityInfo.h"
27 #include "llvm/CodeGen/CallingConvLower.h"
28 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineConstantPool.h"
33 #include "llvm/CodeGen/MachineFrameInfo.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineMemOperand.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/RuntimeLibcalls.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/IR/Argument.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CallingConv.h"
44 #include "llvm/IR/Constant.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DataLayout.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GetElementPtrTypeIterator.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/Operator.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/MC/MCInstrDesc.h"
61 #include "llvm/MC/MCRegisterInfo.h"
62 #include "llvm/MC/MCSymbol.h"
63 #include "llvm/Support/AtomicOrdering.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/CodeGen.h"
66 #include "llvm/Support/Compiler.h"
67 #include "llvm/Support/ErrorHandling.h"
68 #include "llvm/Support/MachineValueType.h"
69 #include "llvm/Support/MathExtras.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstdint>
73 #include <iterator>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 namespace {
79 
80 class AArch64FastISel final : public FastISel {
81   class Address {
82   public:
83     using BaseKind = enum {
84       RegBase,
85       FrameIndexBase
86     };
87 
88   private:
89     BaseKind Kind = RegBase;
90     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
91     union {
92       unsigned Reg;
93       int FI;
94     } Base;
95     unsigned OffsetReg = 0;
96     unsigned Shift = 0;
97     int64_t Offset = 0;
98     const GlobalValue *GV = nullptr;
99 
100   public:
Address()101     Address() { Base.Reg = 0; }
102 
setKind(BaseKind K)103     void setKind(BaseKind K) { Kind = K; }
getKind() const104     BaseKind getKind() const { return Kind; }
setExtendType(AArch64_AM::ShiftExtendType E)105     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
getExtendType() const106     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
isRegBase() const107     bool isRegBase() const { return Kind == RegBase; }
isFIBase() const108     bool isFIBase() const { return Kind == FrameIndexBase; }
109 
setReg(unsigned Reg)110     void setReg(unsigned Reg) {
111       assert(isRegBase() && "Invalid base register access!");
112       Base.Reg = Reg;
113     }
114 
getReg() const115     unsigned getReg() const {
116       assert(isRegBase() && "Invalid base register access!");
117       return Base.Reg;
118     }
119 
setOffsetReg(unsigned Reg)120     void setOffsetReg(unsigned Reg) {
121       OffsetReg = Reg;
122     }
123 
getOffsetReg() const124     unsigned getOffsetReg() const {
125       return OffsetReg;
126     }
127 
setFI(unsigned FI)128     void setFI(unsigned FI) {
129       assert(isFIBase() && "Invalid base frame index  access!");
130       Base.FI = FI;
131     }
132 
getFI() const133     unsigned getFI() const {
134       assert(isFIBase() && "Invalid base frame index access!");
135       return Base.FI;
136     }
137 
setOffset(int64_t O)138     void setOffset(int64_t O) { Offset = O; }
getOffset()139     int64_t getOffset() { return Offset; }
setShift(unsigned S)140     void setShift(unsigned S) { Shift = S; }
getShift()141     unsigned getShift() { return Shift; }
142 
setGlobalValue(const GlobalValue * G)143     void setGlobalValue(const GlobalValue *G) { GV = G; }
getGlobalValue()144     const GlobalValue *getGlobalValue() { return GV; }
145   };
146 
147   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148   /// make the right decision when generating code for different targets.
149   const AArch64Subtarget *Subtarget;
150   LLVMContext *Context;
151 
152   bool fastLowerArguments() override;
153   bool fastLowerCall(CallLoweringInfo &CLI) override;
154   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 
156 private:
157   // Selection routines.
158   bool selectAddSub(const Instruction *I);
159   bool selectLogicalOp(const Instruction *I);
160   bool selectLoad(const Instruction *I);
161   bool selectStore(const Instruction *I);
162   bool selectBranch(const Instruction *I);
163   bool selectIndirectBr(const Instruction *I);
164   bool selectCmp(const Instruction *I);
165   bool selectSelect(const Instruction *I);
166   bool selectFPExt(const Instruction *I);
167   bool selectFPTrunc(const Instruction *I);
168   bool selectFPToInt(const Instruction *I, bool Signed);
169   bool selectIntToFP(const Instruction *I, bool Signed);
170   bool selectRem(const Instruction *I, unsigned ISDOpcode);
171   bool selectRet(const Instruction *I);
172   bool selectTrunc(const Instruction *I);
173   bool selectIntExt(const Instruction *I);
174   bool selectMul(const Instruction *I);
175   bool selectShift(const Instruction *I);
176   bool selectBitCast(const Instruction *I);
177   bool selectFRem(const Instruction *I);
178   bool selectSDiv(const Instruction *I);
179   bool selectGetElementPtr(const Instruction *I);
180   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 
182   // Utility helper routines.
183   bool isTypeLegal(Type *Ty, MVT &VT);
184   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185   bool isValueAvailable(const Value *V) const;
186   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187   bool computeCallAddress(const Value *V, Address &Addr);
188   bool simplifyAddress(Address &Addr, MVT VT);
189   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190                             MachineMemOperand::Flags Flags,
191                             unsigned ScaleFactor, MachineMemOperand *MMO);
192   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
193   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194                           unsigned Alignment);
195   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196                          const Value *Cond);
197   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198   bool optimizeSelect(const SelectInst *SI);
199   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
200 
201   // Emit helper routines.
202   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203                       const Value *RHS, bool SetFlags = false,
204                       bool WantResult = true,  bool IsZExt = false);
205   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
207                          bool SetFlags = false, bool WantResult = true);
208   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
210                          bool WantResult = true);
211   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
213                          AArch64_AM::ShiftExtendType ShiftType,
214                          uint64_t ShiftImm, bool SetFlags = false,
215                          bool WantResult = true);
216   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
218                           AArch64_AM::ShiftExtendType ExtType,
219                           uint64_t ShiftImm, bool SetFlags = false,
220                          bool WantResult = true);
221 
222   // Emit functions.
223   bool emitCompareAndBranch(const BranchInst *BI);
224   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
227   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229                     MachineMemOperand *MMO = nullptr);
230   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231                  MachineMemOperand *MMO = nullptr);
232   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233                         MachineMemOperand *MMO = nullptr);
234   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237                    bool SetFlags = false, bool WantResult = true,
238                    bool IsZExt = false);
239   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
240   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241                    bool SetFlags = false, bool WantResult = true,
242                    bool IsZExt = false);
243   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
244                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
245   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
246                        unsigned RHSReg, bool RHSIsKill,
247                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
248                        bool WantResult = true);
249   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
250                          const Value *RHS);
251   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252                             bool LHSIsKill, uint64_t Imm);
253   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
254                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
255                             uint64_t ShiftImm);
256   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
257   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
258                       unsigned Op1, bool Op1IsKill);
259   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
260                         unsigned Op1, bool Op1IsKill);
261   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
262                         unsigned Op1, bool Op1IsKill);
263   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
264                       unsigned Op1Reg, bool Op1IsKill);
265   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
266                       uint64_t Imm, bool IsZExt = true);
267   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
268                       unsigned Op1Reg, bool Op1IsKill);
269   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
270                       uint64_t Imm, bool IsZExt = true);
271   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
272                       unsigned Op1Reg, bool Op1IsKill);
273   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
274                       uint64_t Imm, bool IsZExt = false);
275 
276   unsigned materializeInt(const ConstantInt *CI, MVT VT);
277   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
278   unsigned materializeGV(const GlobalValue *GV);
279 
280   // Call handling routines.
281 private:
282   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
283   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
284                        unsigned &NumBytes);
285   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
286 
287 public:
288   // Backend specific FastISel code.
289   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
290   unsigned fastMaterializeConstant(const Constant *C) override;
291   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
292 
AArch64FastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)293   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
294                            const TargetLibraryInfo *LibInfo)
295       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
296     Subtarget =
297         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
298     Context = &FuncInfo.Fn->getContext();
299   }
300 
301   bool fastSelectInstruction(const Instruction *I) override;
302 
303 #include "AArch64GenFastISel.inc"
304 };
305 
306 } // end anonymous namespace
307 
308 #include "AArch64GenCallingConv.inc"
309 
310 /// Check if the sign-/zero-extend will be a noop.
isIntExtFree(const Instruction * I)311 static bool isIntExtFree(const Instruction *I) {
312   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
313          "Unexpected integer extend instruction.");
314   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
315          "Unexpected value type.");
316   bool IsZExt = isa<ZExtInst>(I);
317 
318   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
319     if (LI->hasOneUse())
320       return true;
321 
322   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
323     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
324       return true;
325 
326   return false;
327 }
328 
329 /// Determine the implicit scale factor that is applied by a memory
330 /// operation for a given value type.
getImplicitScaleFactor(MVT VT)331 static unsigned getImplicitScaleFactor(MVT VT) {
332   switch (VT.SimpleTy) {
333   default:
334     return 0;    // invalid
335   case MVT::i1:  // fall-through
336   case MVT::i8:
337     return 1;
338   case MVT::i16:
339     return 2;
340   case MVT::i32: // fall-through
341   case MVT::f32:
342     return 4;
343   case MVT::i64: // fall-through
344   case MVT::f64:
345     return 8;
346   }
347 }
348 
CCAssignFnForCall(CallingConv::ID CC) const349 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
350   if (CC == CallingConv::WebKit_JS)
351     return CC_AArch64_WebKit_JS;
352   if (CC == CallingConv::GHC)
353     return CC_AArch64_GHC;
354   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
355 }
356 
fastMaterializeAlloca(const AllocaInst * AI)357 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
358   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
359          "Alloca should always return a pointer.");
360 
361   // Don't handle dynamic allocas.
362   if (!FuncInfo.StaticAllocaMap.count(AI))
363     return 0;
364 
365   DenseMap<const AllocaInst *, int>::iterator SI =
366       FuncInfo.StaticAllocaMap.find(AI);
367 
368   if (SI != FuncInfo.StaticAllocaMap.end()) {
369     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
370     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
371             ResultReg)
372         .addFrameIndex(SI->second)
373         .addImm(0)
374         .addImm(0);
375     return ResultReg;
376   }
377 
378   return 0;
379 }
380 
materializeInt(const ConstantInt * CI,MVT VT)381 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
382   if (VT > MVT::i64)
383     return 0;
384 
385   if (!CI->isZero())
386     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
387 
388   // Create a copy from the zero register to materialize a "0" value.
389   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
390                                                    : &AArch64::GPR32RegClass;
391   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
392   unsigned ResultReg = createResultReg(RC);
393   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
394           ResultReg).addReg(ZeroReg, getKillRegState(true));
395   return ResultReg;
396 }
397 
materializeFP(const ConstantFP * CFP,MVT VT)398 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
399   // Positive zero (+0.0) has to be materialized with a fmov from the zero
400   // register, because the immediate version of fmov cannot encode zero.
401   if (CFP->isNullValue())
402     return fastMaterializeFloatZero(CFP);
403 
404   if (VT != MVT::f32 && VT != MVT::f64)
405     return 0;
406 
407   const APFloat Val = CFP->getValueAPF();
408   bool Is64Bit = (VT == MVT::f64);
409   // This checks to see if we can use FMOV instructions to materialize
410   // a constant, otherwise we have to materialize via the constant pool.
411   if (TLI.isFPImmLegal(Val, VT)) {
412     int Imm =
413         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
414     assert((Imm != -1) && "Cannot encode floating-point constant.");
415     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
416     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
417   }
418 
419   // For the MachO large code model materialize the FP constant in code.
420   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
421     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
422     const TargetRegisterClass *RC = Is64Bit ?
423         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
424 
425     unsigned TmpReg = createResultReg(RC);
426     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
427         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
428 
429     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
430     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
431             TII.get(TargetOpcode::COPY), ResultReg)
432         .addReg(TmpReg, getKillRegState(true));
433 
434     return ResultReg;
435   }
436 
437   // Materialize via constant pool.  MachineConstantPool wants an explicit
438   // alignment.
439   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
440   if (Align == 0)
441     Align = DL.getTypeAllocSize(CFP->getType());
442 
443   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
444   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
445   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
446           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
447 
448   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
449   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
450   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
451       .addReg(ADRPReg)
452       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
453   return ResultReg;
454 }
455 
materializeGV(const GlobalValue * GV)456 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
457   // We can't handle thread-local variables quickly yet.
458   if (GV->isThreadLocal())
459     return 0;
460 
461   // MachO still uses GOT for large code-model accesses, but ELF requires
462   // movz/movk sequences, which FastISel doesn't handle yet.
463   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
464     return 0;
465 
466   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
467 
468   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
469   if (!DestEVT.isSimple())
470     return 0;
471 
472   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
473   unsigned ResultReg;
474 
475   if (OpFlags & AArch64II::MO_GOT) {
476     // ADRP + LDRX
477     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
478             ADRPReg)
479         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
480 
481     ResultReg = createResultReg(&AArch64::GPR64RegClass);
482     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
483             ResultReg)
484         .addReg(ADRPReg)
485         .addGlobalAddress(GV, 0,
486                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
487   } else {
488     // ADRP + ADDX
489     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
490             ADRPReg)
491         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
492 
493     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
494     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
495             ResultReg)
496         .addReg(ADRPReg)
497         .addGlobalAddress(GV, 0,
498                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
499         .addImm(0);
500   }
501   return ResultReg;
502 }
503 
fastMaterializeConstant(const Constant * C)504 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
505   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
506 
507   // Only handle simple types.
508   if (!CEVT.isSimple())
509     return 0;
510   MVT VT = CEVT.getSimpleVT();
511 
512   if (const auto *CI = dyn_cast<ConstantInt>(C))
513     return materializeInt(CI, VT);
514   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
515     return materializeFP(CFP, VT);
516   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
517     return materializeGV(GV);
518 
519   return 0;
520 }
521 
fastMaterializeFloatZero(const ConstantFP * CFP)522 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
523   assert(CFP->isNullValue() &&
524          "Floating-point constant is not a positive zero.");
525   MVT VT;
526   if (!isTypeLegal(CFP->getType(), VT))
527     return 0;
528 
529   if (VT != MVT::f32 && VT != MVT::f64)
530     return 0;
531 
532   bool Is64Bit = (VT == MVT::f64);
533   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
534   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
535   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
536 }
537 
538 /// Check if the multiply is by a power-of-2 constant.
isMulPowOf2(const Value * I)539 static bool isMulPowOf2(const Value *I) {
540   if (const auto *MI = dyn_cast<MulOperator>(I)) {
541     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
542       if (C->getValue().isPowerOf2())
543         return true;
544     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
545       if (C->getValue().isPowerOf2())
546         return true;
547   }
548   return false;
549 }
550 
551 // Computes the address to get to an object.
computeAddress(const Value * Obj,Address & Addr,Type * Ty)552 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
553 {
554   const User *U = nullptr;
555   unsigned Opcode = Instruction::UserOp1;
556   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
557     // Don't walk into other basic blocks unless the object is an alloca from
558     // another block, otherwise it may not have a virtual register assigned.
559     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
560         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
561       Opcode = I->getOpcode();
562       U = I;
563     }
564   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
565     Opcode = C->getOpcode();
566     U = C;
567   }
568 
569   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
570     if (Ty->getAddressSpace() > 255)
571       // Fast instruction selection doesn't support the special
572       // address spaces.
573       return false;
574 
575   switch (Opcode) {
576   default:
577     break;
578   case Instruction::BitCast:
579     // Look through bitcasts.
580     return computeAddress(U->getOperand(0), Addr, Ty);
581 
582   case Instruction::IntToPtr:
583     // Look past no-op inttoptrs.
584     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
585         TLI.getPointerTy(DL))
586       return computeAddress(U->getOperand(0), Addr, Ty);
587     break;
588 
589   case Instruction::PtrToInt:
590     // Look past no-op ptrtoints.
591     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
592       return computeAddress(U->getOperand(0), Addr, Ty);
593     break;
594 
595   case Instruction::GetElementPtr: {
596     Address SavedAddr = Addr;
597     uint64_t TmpOffset = Addr.getOffset();
598 
599     // Iterate through the GEP folding the constants into offsets where
600     // we can.
601     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
602          GTI != E; ++GTI) {
603       const Value *Op = GTI.getOperand();
604       if (StructType *STy = GTI.getStructTypeOrNull()) {
605         const StructLayout *SL = DL.getStructLayout(STy);
606         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
607         TmpOffset += SL->getElementOffset(Idx);
608       } else {
609         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
610         while (true) {
611           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
612             // Constant-offset addressing.
613             TmpOffset += CI->getSExtValue() * S;
614             break;
615           }
616           if (canFoldAddIntoGEP(U, Op)) {
617             // A compatible add with a constant operand. Fold the constant.
618             ConstantInt *CI =
619                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
620             TmpOffset += CI->getSExtValue() * S;
621             // Iterate on the other operand.
622             Op = cast<AddOperator>(Op)->getOperand(0);
623             continue;
624           }
625           // Unsupported
626           goto unsupported_gep;
627         }
628       }
629     }
630 
631     // Try to grab the base operand now.
632     Addr.setOffset(TmpOffset);
633     if (computeAddress(U->getOperand(0), Addr, Ty))
634       return true;
635 
636     // We failed, restore everything and try the other options.
637     Addr = SavedAddr;
638 
639   unsupported_gep:
640     break;
641   }
642   case Instruction::Alloca: {
643     const AllocaInst *AI = cast<AllocaInst>(Obj);
644     DenseMap<const AllocaInst *, int>::iterator SI =
645         FuncInfo.StaticAllocaMap.find(AI);
646     if (SI != FuncInfo.StaticAllocaMap.end()) {
647       Addr.setKind(Address::FrameIndexBase);
648       Addr.setFI(SI->second);
649       return true;
650     }
651     break;
652   }
653   case Instruction::Add: {
654     // Adds of constants are common and easy enough.
655     const Value *LHS = U->getOperand(0);
656     const Value *RHS = U->getOperand(1);
657 
658     if (isa<ConstantInt>(LHS))
659       std::swap(LHS, RHS);
660 
661     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
662       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
663       return computeAddress(LHS, Addr, Ty);
664     }
665 
666     Address Backup = Addr;
667     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
668       return true;
669     Addr = Backup;
670 
671     break;
672   }
673   case Instruction::Sub: {
674     // Subs of constants are common and easy enough.
675     const Value *LHS = U->getOperand(0);
676     const Value *RHS = U->getOperand(1);
677 
678     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
679       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
680       return computeAddress(LHS, Addr, Ty);
681     }
682     break;
683   }
684   case Instruction::Shl: {
685     if (Addr.getOffsetReg())
686       break;
687 
688     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
689     if (!CI)
690       break;
691 
692     unsigned Val = CI->getZExtValue();
693     if (Val < 1 || Val > 3)
694       break;
695 
696     uint64_t NumBytes = 0;
697     if (Ty && Ty->isSized()) {
698       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
699       NumBytes = NumBits / 8;
700       if (!isPowerOf2_64(NumBits))
701         NumBytes = 0;
702     }
703 
704     if (NumBytes != (1ULL << Val))
705       break;
706 
707     Addr.setShift(Val);
708     Addr.setExtendType(AArch64_AM::LSL);
709 
710     const Value *Src = U->getOperand(0);
711     if (const auto *I = dyn_cast<Instruction>(Src)) {
712       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
713         // Fold the zext or sext when it won't become a noop.
714         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
715           if (!isIntExtFree(ZE) &&
716               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
717             Addr.setExtendType(AArch64_AM::UXTW);
718             Src = ZE->getOperand(0);
719           }
720         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
721           if (!isIntExtFree(SE) &&
722               SE->getOperand(0)->getType()->isIntegerTy(32)) {
723             Addr.setExtendType(AArch64_AM::SXTW);
724             Src = SE->getOperand(0);
725           }
726         }
727       }
728     }
729 
730     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
731       if (AI->getOpcode() == Instruction::And) {
732         const Value *LHS = AI->getOperand(0);
733         const Value *RHS = AI->getOperand(1);
734 
735         if (const auto *C = dyn_cast<ConstantInt>(LHS))
736           if (C->getValue() == 0xffffffff)
737             std::swap(LHS, RHS);
738 
739         if (const auto *C = dyn_cast<ConstantInt>(RHS))
740           if (C->getValue() == 0xffffffff) {
741             Addr.setExtendType(AArch64_AM::UXTW);
742             unsigned Reg = getRegForValue(LHS);
743             if (!Reg)
744               return false;
745             bool RegIsKill = hasTrivialKill(LHS);
746             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
747                                              AArch64::sub_32);
748             Addr.setOffsetReg(Reg);
749             return true;
750           }
751       }
752 
753     unsigned Reg = getRegForValue(Src);
754     if (!Reg)
755       return false;
756     Addr.setOffsetReg(Reg);
757     return true;
758   }
759   case Instruction::Mul: {
760     if (Addr.getOffsetReg())
761       break;
762 
763     if (!isMulPowOf2(U))
764       break;
765 
766     const Value *LHS = U->getOperand(0);
767     const Value *RHS = U->getOperand(1);
768 
769     // Canonicalize power-of-2 value to the RHS.
770     if (const auto *C = dyn_cast<ConstantInt>(LHS))
771       if (C->getValue().isPowerOf2())
772         std::swap(LHS, RHS);
773 
774     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
775     const auto *C = cast<ConstantInt>(RHS);
776     unsigned Val = C->getValue().logBase2();
777     if (Val < 1 || Val > 3)
778       break;
779 
780     uint64_t NumBytes = 0;
781     if (Ty && Ty->isSized()) {
782       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
783       NumBytes = NumBits / 8;
784       if (!isPowerOf2_64(NumBits))
785         NumBytes = 0;
786     }
787 
788     if (NumBytes != (1ULL << Val))
789       break;
790 
791     Addr.setShift(Val);
792     Addr.setExtendType(AArch64_AM::LSL);
793 
794     const Value *Src = LHS;
795     if (const auto *I = dyn_cast<Instruction>(Src)) {
796       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
797         // Fold the zext or sext when it won't become a noop.
798         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
799           if (!isIntExtFree(ZE) &&
800               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
801             Addr.setExtendType(AArch64_AM::UXTW);
802             Src = ZE->getOperand(0);
803           }
804         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
805           if (!isIntExtFree(SE) &&
806               SE->getOperand(0)->getType()->isIntegerTy(32)) {
807             Addr.setExtendType(AArch64_AM::SXTW);
808             Src = SE->getOperand(0);
809           }
810         }
811       }
812     }
813 
814     unsigned Reg = getRegForValue(Src);
815     if (!Reg)
816       return false;
817     Addr.setOffsetReg(Reg);
818     return true;
819   }
820   case Instruction::And: {
821     if (Addr.getOffsetReg())
822       break;
823 
824     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
825       break;
826 
827     const Value *LHS = U->getOperand(0);
828     const Value *RHS = U->getOperand(1);
829 
830     if (const auto *C = dyn_cast<ConstantInt>(LHS))
831       if (C->getValue() == 0xffffffff)
832         std::swap(LHS, RHS);
833 
834     if (const auto *C = dyn_cast<ConstantInt>(RHS))
835       if (C->getValue() == 0xffffffff) {
836         Addr.setShift(0);
837         Addr.setExtendType(AArch64_AM::LSL);
838         Addr.setExtendType(AArch64_AM::UXTW);
839 
840         unsigned Reg = getRegForValue(LHS);
841         if (!Reg)
842           return false;
843         bool RegIsKill = hasTrivialKill(LHS);
844         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
845                                          AArch64::sub_32);
846         Addr.setOffsetReg(Reg);
847         return true;
848       }
849     break;
850   }
851   case Instruction::SExt:
852   case Instruction::ZExt: {
853     if (!Addr.getReg() || Addr.getOffsetReg())
854       break;
855 
856     const Value *Src = nullptr;
857     // Fold the zext or sext when it won't become a noop.
858     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
859       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
860         Addr.setExtendType(AArch64_AM::UXTW);
861         Src = ZE->getOperand(0);
862       }
863     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
864       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
865         Addr.setExtendType(AArch64_AM::SXTW);
866         Src = SE->getOperand(0);
867       }
868     }
869 
870     if (!Src)
871       break;
872 
873     Addr.setShift(0);
874     unsigned Reg = getRegForValue(Src);
875     if (!Reg)
876       return false;
877     Addr.setOffsetReg(Reg);
878     return true;
879   }
880   } // end switch
881 
882   if (Addr.isRegBase() && !Addr.getReg()) {
883     unsigned Reg = getRegForValue(Obj);
884     if (!Reg)
885       return false;
886     Addr.setReg(Reg);
887     return true;
888   }
889 
890   if (!Addr.getOffsetReg()) {
891     unsigned Reg = getRegForValue(Obj);
892     if (!Reg)
893       return false;
894     Addr.setOffsetReg(Reg);
895     return true;
896   }
897 
898   return false;
899 }
900 
computeCallAddress(const Value * V,Address & Addr)901 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
902   const User *U = nullptr;
903   unsigned Opcode = Instruction::UserOp1;
904   bool InMBB = true;
905 
906   if (const auto *I = dyn_cast<Instruction>(V)) {
907     Opcode = I->getOpcode();
908     U = I;
909     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
910   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
911     Opcode = C->getOpcode();
912     U = C;
913   }
914 
915   switch (Opcode) {
916   default: break;
917   case Instruction::BitCast:
918     // Look past bitcasts if its operand is in the same BB.
919     if (InMBB)
920       return computeCallAddress(U->getOperand(0), Addr);
921     break;
922   case Instruction::IntToPtr:
923     // Look past no-op inttoptrs if its operand is in the same BB.
924     if (InMBB &&
925         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
926             TLI.getPointerTy(DL))
927       return computeCallAddress(U->getOperand(0), Addr);
928     break;
929   case Instruction::PtrToInt:
930     // Look past no-op ptrtoints if its operand is in the same BB.
931     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
932       return computeCallAddress(U->getOperand(0), Addr);
933     break;
934   }
935 
936   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
937     Addr.setGlobalValue(GV);
938     return true;
939   }
940 
941   // If all else fails, try to materialize the value in a register.
942   if (!Addr.getGlobalValue()) {
943     Addr.setReg(getRegForValue(V));
944     return Addr.getReg() != 0;
945   }
946 
947   return false;
948 }
949 
isTypeLegal(Type * Ty,MVT & VT)950 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
951   EVT evt = TLI.getValueType(DL, Ty, true);
952 
953   // Only handle simple types.
954   if (evt == MVT::Other || !evt.isSimple())
955     return false;
956   VT = evt.getSimpleVT();
957 
958   // This is a legal type, but it's not something we handle in fast-isel.
959   if (VT == MVT::f128)
960     return false;
961 
962   // Handle all other legal types, i.e. a register that will directly hold this
963   // value.
964   return TLI.isTypeLegal(VT);
965 }
966 
967 /// Determine if the value type is supported by FastISel.
968 ///
969 /// FastISel for AArch64 can handle more value types than are legal. This adds
970 /// simple value type such as i1, i8, and i16.
isTypeSupported(Type * Ty,MVT & VT,bool IsVectorAllowed)971 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
972   if (Ty->isVectorTy() && !IsVectorAllowed)
973     return false;
974 
975   if (isTypeLegal(Ty, VT))
976     return true;
977 
978   // If this is a type than can be sign or zero-extended to a basic operation
979   // go ahead and accept it now.
980   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
981     return true;
982 
983   return false;
984 }
985 
isValueAvailable(const Value * V) const986 bool AArch64FastISel::isValueAvailable(const Value *V) const {
987   if (!isa<Instruction>(V))
988     return true;
989 
990   const auto *I = cast<Instruction>(V);
991   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
992 }
993 
simplifyAddress(Address & Addr,MVT VT)994 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
995   unsigned ScaleFactor = getImplicitScaleFactor(VT);
996   if (!ScaleFactor)
997     return false;
998 
999   bool ImmediateOffsetNeedsLowering = false;
1000   bool RegisterOffsetNeedsLowering = false;
1001   int64_t Offset = Addr.getOffset();
1002   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1003     ImmediateOffsetNeedsLowering = true;
1004   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1005            !isUInt<12>(Offset / ScaleFactor))
1006     ImmediateOffsetNeedsLowering = true;
1007 
1008   // Cannot encode an offset register and an immediate offset in the same
1009   // instruction. Fold the immediate offset into the load/store instruction and
1010   // emit an additional add to take care of the offset register.
1011   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1012     RegisterOffsetNeedsLowering = true;
1013 
1014   // Cannot encode zero register as base.
1015   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1016     RegisterOffsetNeedsLowering = true;
1017 
1018   // If this is a stack pointer and the offset needs to be simplified then put
1019   // the alloca address into a register, set the base type back to register and
1020   // continue. This should almost never happen.
1021   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1022   {
1023     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1024     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1025             ResultReg)
1026       .addFrameIndex(Addr.getFI())
1027       .addImm(0)
1028       .addImm(0);
1029     Addr.setKind(Address::RegBase);
1030     Addr.setReg(ResultReg);
1031   }
1032 
1033   if (RegisterOffsetNeedsLowering) {
1034     unsigned ResultReg = 0;
1035     if (Addr.getReg()) {
1036       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1037           Addr.getExtendType() == AArch64_AM::UXTW   )
1038         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1039                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1040                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
1041                                   Addr.getShift());
1042       else
1043         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1044                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1045                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
1046                                   Addr.getShift());
1047     } else {
1048       if (Addr.getExtendType() == AArch64_AM::UXTW)
1049         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050                                /*Op0IsKill=*/false, Addr.getShift(),
1051                                /*IsZExt=*/true);
1052       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1053         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1054                                /*Op0IsKill=*/false, Addr.getShift(),
1055                                /*IsZExt=*/false);
1056       else
1057         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1058                                /*Op0IsKill=*/false, Addr.getShift());
1059     }
1060     if (!ResultReg)
1061       return false;
1062 
1063     Addr.setReg(ResultReg);
1064     Addr.setOffsetReg(0);
1065     Addr.setShift(0);
1066     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1067   }
1068 
1069   // Since the offset is too large for the load/store instruction get the
1070   // reg+offset into a register.
1071   if (ImmediateOffsetNeedsLowering) {
1072     unsigned ResultReg;
1073     if (Addr.getReg())
1074       // Try to fold the immediate into the add instruction.
1075       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1076     else
1077       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1078 
1079     if (!ResultReg)
1080       return false;
1081     Addr.setReg(ResultReg);
1082     Addr.setOffset(0);
1083   }
1084   return true;
1085 }
1086 
addLoadStoreOperands(Address & Addr,const MachineInstrBuilder & MIB,MachineMemOperand::Flags Flags,unsigned ScaleFactor,MachineMemOperand * MMO)1087 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1088                                            const MachineInstrBuilder &MIB,
1089                                            MachineMemOperand::Flags Flags,
1090                                            unsigned ScaleFactor,
1091                                            MachineMemOperand *MMO) {
1092   int64_t Offset = Addr.getOffset() / ScaleFactor;
1093   // Frame base works a bit differently. Handle it separately.
1094   if (Addr.isFIBase()) {
1095     int FI = Addr.getFI();
1096     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1097     // and alignment should be based on the VT.
1098     MMO = FuncInfo.MF->getMachineMemOperand(
1099         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1100         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1101     // Now add the rest of the operands.
1102     MIB.addFrameIndex(FI).addImm(Offset);
1103   } else {
1104     assert(Addr.isRegBase() && "Unexpected address kind.");
1105     const MCInstrDesc &II = MIB->getDesc();
1106     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1107     Addr.setReg(
1108       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1109     Addr.setOffsetReg(
1110       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1111     if (Addr.getOffsetReg()) {
1112       assert(Addr.getOffset() == 0 && "Unexpected offset");
1113       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1114                       Addr.getExtendType() == AArch64_AM::SXTX;
1115       MIB.addReg(Addr.getReg());
1116       MIB.addReg(Addr.getOffsetReg());
1117       MIB.addImm(IsSigned);
1118       MIB.addImm(Addr.getShift() != 0);
1119     } else
1120       MIB.addReg(Addr.getReg()).addImm(Offset);
1121   }
1122 
1123   if (MMO)
1124     MIB.addMemOperand(MMO);
1125 }
1126 
emitAddSub(bool UseAdd,MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1127 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1128                                      const Value *RHS, bool SetFlags,
1129                                      bool WantResult,  bool IsZExt) {
1130   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1131   bool NeedExtend = false;
1132   switch (RetVT.SimpleTy) {
1133   default:
1134     return 0;
1135   case MVT::i1:
1136     NeedExtend = true;
1137     break;
1138   case MVT::i8:
1139     NeedExtend = true;
1140     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1141     break;
1142   case MVT::i16:
1143     NeedExtend = true;
1144     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1145     break;
1146   case MVT::i32:  // fall-through
1147   case MVT::i64:
1148     break;
1149   }
1150   MVT SrcVT = RetVT;
1151   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1152 
1153   // Canonicalize immediates to the RHS first.
1154   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1155     std::swap(LHS, RHS);
1156 
1157   // Canonicalize mul by power of 2 to the RHS.
1158   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1159     if (isMulPowOf2(LHS))
1160       std::swap(LHS, RHS);
1161 
1162   // Canonicalize shift immediate to the RHS.
1163   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1164     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1165       if (isa<ConstantInt>(SI->getOperand(1)))
1166         if (SI->getOpcode() == Instruction::Shl  ||
1167             SI->getOpcode() == Instruction::LShr ||
1168             SI->getOpcode() == Instruction::AShr   )
1169           std::swap(LHS, RHS);
1170 
1171   unsigned LHSReg = getRegForValue(LHS);
1172   if (!LHSReg)
1173     return 0;
1174   bool LHSIsKill = hasTrivialKill(LHS);
1175 
1176   if (NeedExtend)
1177     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1178 
1179   unsigned ResultReg = 0;
1180   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1181     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1182     if (C->isNegative())
1183       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1184                                 SetFlags, WantResult);
1185     else
1186       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1187                                 WantResult);
1188   } else if (const auto *C = dyn_cast<Constant>(RHS))
1189     if (C->isNullValue())
1190       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1191                                 WantResult);
1192 
1193   if (ResultReg)
1194     return ResultReg;
1195 
1196   // Only extend the RHS within the instruction if there is a valid extend type.
1197   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1198       isValueAvailable(RHS)) {
1199     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1200       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1201         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1202           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1203           if (!RHSReg)
1204             return 0;
1205           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1206           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1207                                RHSIsKill, ExtendType, C->getZExtValue(),
1208                                SetFlags, WantResult);
1209         }
1210     unsigned RHSReg = getRegForValue(RHS);
1211     if (!RHSReg)
1212       return 0;
1213     bool RHSIsKill = hasTrivialKill(RHS);
1214     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1215                          ExtendType, 0, SetFlags, WantResult);
1216   }
1217 
1218   // Check if the mul can be folded into the instruction.
1219   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1220     if (isMulPowOf2(RHS)) {
1221       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1222       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1223 
1224       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1225         if (C->getValue().isPowerOf2())
1226           std::swap(MulLHS, MulRHS);
1227 
1228       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1229       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1230       unsigned RHSReg = getRegForValue(MulLHS);
1231       if (!RHSReg)
1232         return 0;
1233       bool RHSIsKill = hasTrivialKill(MulLHS);
1234       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1235                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1236                                 WantResult);
1237       if (ResultReg)
1238         return ResultReg;
1239     }
1240   }
1241 
1242   // Check if the shift can be folded into the instruction.
1243   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1245       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1246         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1247         switch (SI->getOpcode()) {
1248         default: break;
1249         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1250         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1251         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1252         }
1253         uint64_t ShiftVal = C->getZExtValue();
1254         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1255           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1256           if (!RHSReg)
1257             return 0;
1258           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1259           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1260                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
1261                                     WantResult);
1262           if (ResultReg)
1263             return ResultReg;
1264         }
1265       }
1266     }
1267   }
1268 
1269   unsigned RHSReg = getRegForValue(RHS);
1270   if (!RHSReg)
1271     return 0;
1272   bool RHSIsKill = hasTrivialKill(RHS);
1273 
1274   if (NeedExtend)
1275     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276 
1277   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1278                        SetFlags, WantResult);
1279 }
1280 
emitAddSub_rr(bool UseAdd,MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,bool SetFlags,bool WantResult)1281 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1282                                         bool LHSIsKill, unsigned RHSReg,
1283                                         bool RHSIsKill, bool SetFlags,
1284                                         bool WantResult) {
1285   assert(LHSReg && RHSReg && "Invalid register number.");
1286 
1287   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1288       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1289     return 0;
1290 
1291   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1292     return 0;
1293 
1294   static const unsigned OpcTable[2][2][2] = {
1295     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1296       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1297     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1298       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1299   };
1300   bool Is64Bit = RetVT == MVT::i64;
1301   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1302   const TargetRegisterClass *RC =
1303       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1304   unsigned ResultReg;
1305   if (WantResult)
1306     ResultReg = createResultReg(RC);
1307   else
1308     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1309 
1310   const MCInstrDesc &II = TII.get(Opc);
1311   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1312   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1313   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1314       .addReg(LHSReg, getKillRegState(LHSIsKill))
1315       .addReg(RHSReg, getKillRegState(RHSIsKill));
1316   return ResultReg;
1317 }
1318 
emitAddSub_ri(bool UseAdd,MVT RetVT,unsigned LHSReg,bool LHSIsKill,uint64_t Imm,bool SetFlags,bool WantResult)1319 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1320                                         bool LHSIsKill, uint64_t Imm,
1321                                         bool SetFlags, bool WantResult) {
1322   assert(LHSReg && "Invalid register number.");
1323 
1324   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1325     return 0;
1326 
1327   unsigned ShiftImm;
1328   if (isUInt<12>(Imm))
1329     ShiftImm = 0;
1330   else if ((Imm & 0xfff000) == Imm) {
1331     ShiftImm = 12;
1332     Imm >>= 12;
1333   } else
1334     return 0;
1335 
1336   static const unsigned OpcTable[2][2][2] = {
1337     { { AArch64::SUBWri,  AArch64::SUBXri  },
1338       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1339     { { AArch64::SUBSWri, AArch64::SUBSXri },
1340       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1341   };
1342   bool Is64Bit = RetVT == MVT::i64;
1343   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1344   const TargetRegisterClass *RC;
1345   if (SetFlags)
1346     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1347   else
1348     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1349   unsigned ResultReg;
1350   if (WantResult)
1351     ResultReg = createResultReg(RC);
1352   else
1353     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1354 
1355   const MCInstrDesc &II = TII.get(Opc);
1356   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1357   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1358       .addReg(LHSReg, getKillRegState(LHSIsKill))
1359       .addImm(Imm)
1360       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1361   return ResultReg;
1362 }
1363 
emitAddSub_rs(bool UseAdd,MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1364 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1365                                         bool LHSIsKill, unsigned RHSReg,
1366                                         bool RHSIsKill,
1367                                         AArch64_AM::ShiftExtendType ShiftType,
1368                                         uint64_t ShiftImm, bool SetFlags,
1369                                         bool WantResult) {
1370   assert(LHSReg && RHSReg && "Invalid register number.");
1371   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1372          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1373 
1374   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1375     return 0;
1376 
1377   // Don't deal with undefined shifts.
1378   if (ShiftImm >= RetVT.getSizeInBits())
1379     return 0;
1380 
1381   static const unsigned OpcTable[2][2][2] = {
1382     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1383       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1384     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1385       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1386   };
1387   bool Is64Bit = RetVT == MVT::i64;
1388   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1389   const TargetRegisterClass *RC =
1390       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1391   unsigned ResultReg;
1392   if (WantResult)
1393     ResultReg = createResultReg(RC);
1394   else
1395     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1396 
1397   const MCInstrDesc &II = TII.get(Opc);
1398   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1399   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1400   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1401       .addReg(LHSReg, getKillRegState(LHSIsKill))
1402       .addReg(RHSReg, getKillRegState(RHSIsKill))
1403       .addImm(getShifterImm(ShiftType, ShiftImm));
1404   return ResultReg;
1405 }
1406 
emitAddSub_rx(bool UseAdd,MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,AArch64_AM::ShiftExtendType ExtType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1407 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1408                                         bool LHSIsKill, unsigned RHSReg,
1409                                         bool RHSIsKill,
1410                                         AArch64_AM::ShiftExtendType ExtType,
1411                                         uint64_t ShiftImm, bool SetFlags,
1412                                         bool WantResult) {
1413   assert(LHSReg && RHSReg && "Invalid register number.");
1414   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1415          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1416 
1417   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1418     return 0;
1419 
1420   if (ShiftImm >= 4)
1421     return 0;
1422 
1423   static const unsigned OpcTable[2][2][2] = {
1424     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1425       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1426     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1427       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1428   };
1429   bool Is64Bit = RetVT == MVT::i64;
1430   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1431   const TargetRegisterClass *RC = nullptr;
1432   if (SetFlags)
1433     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1434   else
1435     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1436   unsigned ResultReg;
1437   if (WantResult)
1438     ResultReg = createResultReg(RC);
1439   else
1440     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1441 
1442   const MCInstrDesc &II = TII.get(Opc);
1443   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1444   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1445   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1446       .addReg(LHSReg, getKillRegState(LHSIsKill))
1447       .addReg(RHSReg, getKillRegState(RHSIsKill))
1448       .addImm(getArithExtendImm(ExtType, ShiftImm));
1449   return ResultReg;
1450 }
1451 
emitCmp(const Value * LHS,const Value * RHS,bool IsZExt)1452 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1453   Type *Ty = LHS->getType();
1454   EVT EVT = TLI.getValueType(DL, Ty, true);
1455   if (!EVT.isSimple())
1456     return false;
1457   MVT VT = EVT.getSimpleVT();
1458 
1459   switch (VT.SimpleTy) {
1460   default:
1461     return false;
1462   case MVT::i1:
1463   case MVT::i8:
1464   case MVT::i16:
1465   case MVT::i32:
1466   case MVT::i64:
1467     return emitICmp(VT, LHS, RHS, IsZExt);
1468   case MVT::f32:
1469   case MVT::f64:
1470     return emitFCmp(VT, LHS, RHS);
1471   }
1472 }
1473 
emitICmp(MVT RetVT,const Value * LHS,const Value * RHS,bool IsZExt)1474 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1475                                bool IsZExt) {
1476   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1477                  IsZExt) != 0;
1478 }
1479 
emitICmp_ri(MVT RetVT,unsigned LHSReg,bool LHSIsKill,uint64_t Imm)1480 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1481                                   uint64_t Imm) {
1482   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1483                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1484 }
1485 
emitFCmp(MVT RetVT,const Value * LHS,const Value * RHS)1486 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1487   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1488     return false;
1489 
1490   // Check to see if the 2nd operand is a constant that we can encode directly
1491   // in the compare.
1492   bool UseImm = false;
1493   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1494     if (CFP->isZero() && !CFP->isNegative())
1495       UseImm = true;
1496 
1497   unsigned LHSReg = getRegForValue(LHS);
1498   if (!LHSReg)
1499     return false;
1500   bool LHSIsKill = hasTrivialKill(LHS);
1501 
1502   if (UseImm) {
1503     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1504     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1505         .addReg(LHSReg, getKillRegState(LHSIsKill));
1506     return true;
1507   }
1508 
1509   unsigned RHSReg = getRegForValue(RHS);
1510   if (!RHSReg)
1511     return false;
1512   bool RHSIsKill = hasTrivialKill(RHS);
1513 
1514   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1515   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1516       .addReg(LHSReg, getKillRegState(LHSIsKill))
1517       .addReg(RHSReg, getKillRegState(RHSIsKill));
1518   return true;
1519 }
1520 
emitAdd(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1521 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1522                                   bool SetFlags, bool WantResult, bool IsZExt) {
1523   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1524                     IsZExt);
1525 }
1526 
1527 /// This method is a wrapper to simplify add emission.
1528 ///
1529 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1530 /// that fails, then try to materialize the immediate into a register and use
1531 /// emitAddSub_rr instead.
emitAdd_ri_(MVT VT,unsigned Op0,bool Op0IsKill,int64_t Imm)1532 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1533                                       int64_t Imm) {
1534   unsigned ResultReg;
1535   if (Imm < 0)
1536     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1537   else
1538     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1539 
1540   if (ResultReg)
1541     return ResultReg;
1542 
1543   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1544   if (!CReg)
1545     return 0;
1546 
1547   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1548   return ResultReg;
1549 }
1550 
emitSub(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1551 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1552                                   bool SetFlags, bool WantResult, bool IsZExt) {
1553   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1554                     IsZExt);
1555 }
1556 
emitSubs_rr(MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,bool WantResult)1557 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1558                                       bool LHSIsKill, unsigned RHSReg,
1559                                       bool RHSIsKill, bool WantResult) {
1560   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1561                        RHSIsKill, /*SetFlags=*/true, WantResult);
1562 }
1563 
emitSubs_rs(MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool WantResult)1564 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1565                                       bool LHSIsKill, unsigned RHSReg,
1566                                       bool RHSIsKill,
1567                                       AArch64_AM::ShiftExtendType ShiftType,
1568                                       uint64_t ShiftImm, bool WantResult) {
1569   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1570                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1571                        WantResult);
1572 }
1573 
emitLogicalOp(unsigned ISDOpc,MVT RetVT,const Value * LHS,const Value * RHS)1574 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1575                                         const Value *LHS, const Value *RHS) {
1576   // Canonicalize immediates to the RHS first.
1577   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1578     std::swap(LHS, RHS);
1579 
1580   // Canonicalize mul by power-of-2 to the RHS.
1581   if (LHS->hasOneUse() && isValueAvailable(LHS))
1582     if (isMulPowOf2(LHS))
1583       std::swap(LHS, RHS);
1584 
1585   // Canonicalize shift immediate to the RHS.
1586   if (LHS->hasOneUse() && isValueAvailable(LHS))
1587     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1588       if (isa<ConstantInt>(SI->getOperand(1)))
1589         std::swap(LHS, RHS);
1590 
1591   unsigned LHSReg = getRegForValue(LHS);
1592   if (!LHSReg)
1593     return 0;
1594   bool LHSIsKill = hasTrivialKill(LHS);
1595 
1596   unsigned ResultReg = 0;
1597   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1598     uint64_t Imm = C->getZExtValue();
1599     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1600   }
1601   if (ResultReg)
1602     return ResultReg;
1603 
1604   // Check if the mul can be folded into the instruction.
1605   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1606     if (isMulPowOf2(RHS)) {
1607       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1608       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1609 
1610       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1611         if (C->getValue().isPowerOf2())
1612           std::swap(MulLHS, MulRHS);
1613 
1614       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1615       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1616 
1617       unsigned RHSReg = getRegForValue(MulLHS);
1618       if (!RHSReg)
1619         return 0;
1620       bool RHSIsKill = hasTrivialKill(MulLHS);
1621       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1622                                    RHSIsKill, ShiftVal);
1623       if (ResultReg)
1624         return ResultReg;
1625     }
1626   }
1627 
1628   // Check if the shift can be folded into the instruction.
1629   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1630     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1631       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1632         uint64_t ShiftVal = C->getZExtValue();
1633         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1634         if (!RHSReg)
1635           return 0;
1636         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1637         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1638                                      RHSIsKill, ShiftVal);
1639         if (ResultReg)
1640           return ResultReg;
1641       }
1642   }
1643 
1644   unsigned RHSReg = getRegForValue(RHS);
1645   if (!RHSReg)
1646     return 0;
1647   bool RHSIsKill = hasTrivialKill(RHS);
1648 
1649   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1650   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1651   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1652     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1653     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1654   }
1655   return ResultReg;
1656 }
1657 
emitLogicalOp_ri(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,bool LHSIsKill,uint64_t Imm)1658 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1659                                            unsigned LHSReg, bool LHSIsKill,
1660                                            uint64_t Imm) {
1661   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662                 "ISD nodes are not consecutive!");
1663   static const unsigned OpcTable[3][2] = {
1664     { AArch64::ANDWri, AArch64::ANDXri },
1665     { AArch64::ORRWri, AArch64::ORRXri },
1666     { AArch64::EORWri, AArch64::EORXri }
1667   };
1668   const TargetRegisterClass *RC;
1669   unsigned Opc;
1670   unsigned RegSize;
1671   switch (RetVT.SimpleTy) {
1672   default:
1673     return 0;
1674   case MVT::i1:
1675   case MVT::i8:
1676   case MVT::i16:
1677   case MVT::i32: {
1678     unsigned Idx = ISDOpc - ISD::AND;
1679     Opc = OpcTable[Idx][0];
1680     RC = &AArch64::GPR32spRegClass;
1681     RegSize = 32;
1682     break;
1683   }
1684   case MVT::i64:
1685     Opc = OpcTable[ISDOpc - ISD::AND][1];
1686     RC = &AArch64::GPR64spRegClass;
1687     RegSize = 64;
1688     break;
1689   }
1690 
1691   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1692     return 0;
1693 
1694   unsigned ResultReg =
1695       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1696                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1697   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1700   }
1701   return ResultReg;
1702 }
1703 
emitLogicalOp_rs(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,uint64_t ShiftImm)1704 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705                                            unsigned LHSReg, bool LHSIsKill,
1706                                            unsigned RHSReg, bool RHSIsKill,
1707                                            uint64_t ShiftImm) {
1708   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709                 "ISD nodes are not consecutive!");
1710   static const unsigned OpcTable[3][2] = {
1711     { AArch64::ANDWrs, AArch64::ANDXrs },
1712     { AArch64::ORRWrs, AArch64::ORRXrs },
1713     { AArch64::EORWrs, AArch64::EORXrs }
1714   };
1715 
1716   // Don't deal with undefined shifts.
1717   if (ShiftImm >= RetVT.getSizeInBits())
1718     return 0;
1719 
1720   const TargetRegisterClass *RC;
1721   unsigned Opc;
1722   switch (RetVT.SimpleTy) {
1723   default:
1724     return 0;
1725   case MVT::i1:
1726   case MVT::i8:
1727   case MVT::i16:
1728   case MVT::i32:
1729     Opc = OpcTable[ISDOpc - ISD::AND][0];
1730     RC = &AArch64::GPR32RegClass;
1731     break;
1732   case MVT::i64:
1733     Opc = OpcTable[ISDOpc - ISD::AND][1];
1734     RC = &AArch64::GPR64RegClass;
1735     break;
1736   }
1737   unsigned ResultReg =
1738       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1739                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1740   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1743   }
1744   return ResultReg;
1745 }
1746 
emitAnd_ri(MVT RetVT,unsigned LHSReg,bool LHSIsKill,uint64_t Imm)1747 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1748                                      uint64_t Imm) {
1749   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1750 }
1751 
emitLoad(MVT VT,MVT RetVT,Address Addr,bool WantZExt,MachineMemOperand * MMO)1752 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753                                    bool WantZExt, MachineMemOperand *MMO) {
1754   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755     return 0;
1756 
1757   // Simplify this down to something we can handle.
1758   if (!simplifyAddress(Addr, VT))
1759     return 0;
1760 
1761   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762   if (!ScaleFactor)
1763     llvm_unreachable("Unexpected value type.");
1764 
1765   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767   bool UseScaled = true;
1768   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769     UseScaled = false;
1770     ScaleFactor = 1;
1771   }
1772 
1773   static const unsigned GPOpcTable[2][8][4] = {
1774     // Sign-extend.
1775     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1776         AArch64::LDURXi  },
1777       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1778         AArch64::LDURXi  },
1779       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1780         AArch64::LDRXui  },
1781       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1782         AArch64::LDRXui  },
1783       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784         AArch64::LDRXroX },
1785       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786         AArch64::LDRXroX },
1787       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788         AArch64::LDRXroW },
1789       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790         AArch64::LDRXroW }
1791     },
1792     // Zero-extend.
1793     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1794         AArch64::LDURXi  },
1795       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1796         AArch64::LDURXi  },
1797       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1798         AArch64::LDRXui  },
1799       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1800         AArch64::LDRXui  },
1801       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1802         AArch64::LDRXroX },
1803       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1804         AArch64::LDRXroX },
1805       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1806         AArch64::LDRXroW },
1807       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1808         AArch64::LDRXroW }
1809     }
1810   };
1811 
1812   static const unsigned FPOpcTable[4][2] = {
1813     { AArch64::LDURSi,  AArch64::LDURDi  },
1814     { AArch64::LDRSui,  AArch64::LDRDui  },
1815     { AArch64::LDRSroX, AArch64::LDRDroX },
1816     { AArch64::LDRSroW, AArch64::LDRDroW }
1817   };
1818 
1819   unsigned Opc;
1820   const TargetRegisterClass *RC;
1821   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822                       Addr.getOffsetReg();
1823   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825       Addr.getExtendType() == AArch64_AM::SXTW)
1826     Idx++;
1827 
1828   bool IsRet64Bit = RetVT == MVT::i64;
1829   switch (VT.SimpleTy) {
1830   default:
1831     llvm_unreachable("Unexpected value type.");
1832   case MVT::i1: // Intentional fall-through.
1833   case MVT::i8:
1834     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835     RC = (IsRet64Bit && !WantZExt) ?
1836              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837     break;
1838   case MVT::i16:
1839     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840     RC = (IsRet64Bit && !WantZExt) ?
1841              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842     break;
1843   case MVT::i32:
1844     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845     RC = (IsRet64Bit && !WantZExt) ?
1846              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847     break;
1848   case MVT::i64:
1849     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850     RC = &AArch64::GPR64RegClass;
1851     break;
1852   case MVT::f32:
1853     Opc = FPOpcTable[Idx][0];
1854     RC = &AArch64::FPR32RegClass;
1855     break;
1856   case MVT::f64:
1857     Opc = FPOpcTable[Idx][1];
1858     RC = &AArch64::FPR64RegClass;
1859     break;
1860   }
1861 
1862   // Create the base instruction, then add the operands.
1863   unsigned ResultReg = createResultReg(RC);
1864   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1865                                     TII.get(Opc), ResultReg);
1866   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867 
1868   // Loading an i1 requires special handling.
1869   if (VT == MVT::i1) {
1870     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1871     assert(ANDReg && "Unexpected AND instruction emission failure.");
1872     ResultReg = ANDReg;
1873   }
1874 
1875   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876   // the 32bit reg to a 64bit reg.
1877   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1880             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881         .addImm(0)
1882         .addReg(ResultReg, getKillRegState(true))
1883         .addImm(AArch64::sub_32);
1884     ResultReg = Reg64;
1885   }
1886   return ResultReg;
1887 }
1888 
selectAddSub(const Instruction * I)1889 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890   MVT VT;
1891   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892     return false;
1893 
1894   if (VT.isVector())
1895     return selectOperator(I, I->getOpcode());
1896 
1897   unsigned ResultReg;
1898   switch (I->getOpcode()) {
1899   default:
1900     llvm_unreachable("Unexpected instruction.");
1901   case Instruction::Add:
1902     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903     break;
1904   case Instruction::Sub:
1905     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906     break;
1907   }
1908   if (!ResultReg)
1909     return false;
1910 
1911   updateValueMap(I, ResultReg);
1912   return true;
1913 }
1914 
selectLogicalOp(const Instruction * I)1915 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916   MVT VT;
1917   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918     return false;
1919 
1920   if (VT.isVector())
1921     return selectOperator(I, I->getOpcode());
1922 
1923   unsigned ResultReg;
1924   switch (I->getOpcode()) {
1925   default:
1926     llvm_unreachable("Unexpected instruction.");
1927   case Instruction::And:
1928     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929     break;
1930   case Instruction::Or:
1931     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932     break;
1933   case Instruction::Xor:
1934     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935     break;
1936   }
1937   if (!ResultReg)
1938     return false;
1939 
1940   updateValueMap(I, ResultReg);
1941   return true;
1942 }
1943 
selectLoad(const Instruction * I)1944 bool AArch64FastISel::selectLoad(const Instruction *I) {
1945   MVT VT;
1946   // Verify we have a legal type before going any further.  Currently, we handle
1947   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950       cast<LoadInst>(I)->isAtomic())
1951     return false;
1952 
1953   const Value *SV = I->getOperand(0);
1954   if (TLI.supportSwiftError()) {
1955     // Swifterror values can come from either a function parameter with
1956     // swifterror attribute or an alloca with swifterror attribute.
1957     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958       if (Arg->hasSwiftErrorAttr())
1959         return false;
1960     }
1961 
1962     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963       if (Alloca->isSwiftError())
1964         return false;
1965     }
1966   }
1967 
1968   // See if we can handle this address.
1969   Address Addr;
1970   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971     return false;
1972 
1973   // Fold the following sign-/zero-extend into the load instruction.
1974   bool WantZExt = true;
1975   MVT RetVT = VT;
1976   const Value *IntExtVal = nullptr;
1977   if (I->hasOneUse()) {
1978     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979       if (isTypeSupported(ZE->getType(), RetVT))
1980         IntExtVal = ZE;
1981       else
1982         RetVT = VT;
1983     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984       if (isTypeSupported(SE->getType(), RetVT))
1985         IntExtVal = SE;
1986       else
1987         RetVT = VT;
1988       WantZExt = false;
1989     }
1990   }
1991 
1992   unsigned ResultReg =
1993       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994   if (!ResultReg)
1995     return false;
1996 
1997   // There are a few different cases we have to handle, because the load or the
1998   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999   // SelectionDAG. There is also an ordering issue when both instructions are in
2000   // different basic blocks.
2001   // 1.) The load instruction is selected by FastISel, but the integer extend
2002   //     not. This usually happens when the integer extend is in a different
2003   //     basic block and SelectionDAG took over for that basic block.
2004   // 2.) The load instruction is selected before the integer extend. This only
2005   //     happens when the integer extend is in a different basic block.
2006   // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007   //     by FastISel. This happens if there are instructions between the load
2008   //     and the integer extend that couldn't be selected by FastISel.
2009   if (IntExtVal) {
2010     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011     // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012     // it when it selects the integer extend.
2013     unsigned Reg = lookUpRegForValue(IntExtVal);
2014     auto *MI = MRI.getUniqueVRegDef(Reg);
2015     if (!MI) {
2016       if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017         if (WantZExt) {
2018           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019           MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2020           ResultReg = std::prev(I)->getOperand(0).getReg();
2021           removeDeadCode(I, std::next(I));
2022         } else
2023           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2024                                                  /*IsKill=*/true,
2025                                                  AArch64::sub_32);
2026       }
2027       updateValueMap(I, ResultReg);
2028       return true;
2029     }
2030 
2031     // The integer extend has already been emitted - delete all the instructions
2032     // that have been emitted by the integer extend lowering code and use the
2033     // result from the load instruction directly.
2034     while (MI) {
2035       Reg = 0;
2036       for (auto &Opnd : MI->uses()) {
2037         if (Opnd.isReg()) {
2038           Reg = Opnd.getReg();
2039           break;
2040         }
2041       }
2042       MachineBasicBlock::iterator I(MI);
2043       removeDeadCode(I, std::next(I));
2044       MI = nullptr;
2045       if (Reg)
2046         MI = MRI.getUniqueVRegDef(Reg);
2047     }
2048     updateValueMap(IntExtVal, ResultReg);
2049     return true;
2050   }
2051 
2052   updateValueMap(I, ResultReg);
2053   return true;
2054 }
2055 
emitStoreRelease(MVT VT,unsigned SrcReg,unsigned AddrReg,MachineMemOperand * MMO)2056 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2057                                        unsigned AddrReg,
2058                                        MachineMemOperand *MMO) {
2059   unsigned Opc;
2060   switch (VT.SimpleTy) {
2061   default: return false;
2062   case MVT::i8:  Opc = AArch64::STLRB; break;
2063   case MVT::i16: Opc = AArch64::STLRH; break;
2064   case MVT::i32: Opc = AArch64::STLRW; break;
2065   case MVT::i64: Opc = AArch64::STLRX; break;
2066   }
2067 
2068   const MCInstrDesc &II = TII.get(Opc);
2069   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2070   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2071   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2072       .addReg(SrcReg)
2073       .addReg(AddrReg)
2074       .addMemOperand(MMO);
2075   return true;
2076 }
2077 
emitStore(MVT VT,unsigned SrcReg,Address Addr,MachineMemOperand * MMO)2078 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2079                                 MachineMemOperand *MMO) {
2080   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2081     return false;
2082 
2083   // Simplify this down to something we can handle.
2084   if (!simplifyAddress(Addr, VT))
2085     return false;
2086 
2087   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2088   if (!ScaleFactor)
2089     llvm_unreachable("Unexpected value type.");
2090 
2091   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2092   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2093   bool UseScaled = true;
2094   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2095     UseScaled = false;
2096     ScaleFactor = 1;
2097   }
2098 
2099   static const unsigned OpcTable[4][6] = {
2100     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2101       AArch64::STURSi,   AArch64::STURDi },
2102     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2103       AArch64::STRSui,   AArch64::STRDui },
2104     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2105       AArch64::STRSroX,  AArch64::STRDroX },
2106     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2107       AArch64::STRSroW,  AArch64::STRDroW }
2108   };
2109 
2110   unsigned Opc;
2111   bool VTIsi1 = false;
2112   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2113                       Addr.getOffsetReg();
2114   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2115   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2116       Addr.getExtendType() == AArch64_AM::SXTW)
2117     Idx++;
2118 
2119   switch (VT.SimpleTy) {
2120   default: llvm_unreachable("Unexpected value type.");
2121   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
2122   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2123   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2124   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2125   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2126   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2127   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2128   }
2129 
2130   // Storing an i1 requires special handling.
2131   if (VTIsi1 && SrcReg != AArch64::WZR) {
2132     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2133     assert(ANDReg && "Unexpected AND instruction emission failure.");
2134     SrcReg = ANDReg;
2135   }
2136   // Create the base instruction, then add the operands.
2137   const MCInstrDesc &II = TII.get(Opc);
2138   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2139   MachineInstrBuilder MIB =
2140       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2141   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2142 
2143   return true;
2144 }
2145 
selectStore(const Instruction * I)2146 bool AArch64FastISel::selectStore(const Instruction *I) {
2147   MVT VT;
2148   const Value *Op0 = I->getOperand(0);
2149   // Verify we have a legal type before going any further.  Currently, we handle
2150   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2151   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2152   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2153     return false;
2154 
2155   const Value *PtrV = I->getOperand(1);
2156   if (TLI.supportSwiftError()) {
2157     // Swifterror values can come from either a function parameter with
2158     // swifterror attribute or an alloca with swifterror attribute.
2159     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2160       if (Arg->hasSwiftErrorAttr())
2161         return false;
2162     }
2163 
2164     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2165       if (Alloca->isSwiftError())
2166         return false;
2167     }
2168   }
2169 
2170   // Get the value to be stored into a register. Use the zero register directly
2171   // when possible to avoid an unnecessary copy and a wasted register.
2172   unsigned SrcReg = 0;
2173   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2174     if (CI->isZero())
2175       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2176   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2177     if (CF->isZero() && !CF->isNegative()) {
2178       VT = MVT::getIntegerVT(VT.getSizeInBits());
2179       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2180     }
2181   }
2182 
2183   if (!SrcReg)
2184     SrcReg = getRegForValue(Op0);
2185 
2186   if (!SrcReg)
2187     return false;
2188 
2189   auto *SI = cast<StoreInst>(I);
2190 
2191   // Try to emit a STLR for seq_cst/release.
2192   if (SI->isAtomic()) {
2193     AtomicOrdering Ord = SI->getOrdering();
2194     // The non-atomic instructions are sufficient for relaxed stores.
2195     if (isReleaseOrStronger(Ord)) {
2196       // The STLR addressing mode only supports a base reg; pass that directly.
2197       unsigned AddrReg = getRegForValue(PtrV);
2198       return emitStoreRelease(VT, SrcReg, AddrReg,
2199                               createMachineMemOperandFor(I));
2200     }
2201   }
2202 
2203   // See if we can handle this address.
2204   Address Addr;
2205   if (!computeAddress(PtrV, Addr, Op0->getType()))
2206     return false;
2207 
2208   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2209     return false;
2210   return true;
2211 }
2212 
getCompareCC(CmpInst::Predicate Pred)2213 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2214   switch (Pred) {
2215   case CmpInst::FCMP_ONE:
2216   case CmpInst::FCMP_UEQ:
2217   default:
2218     // AL is our "false" for now. The other two need more compares.
2219     return AArch64CC::AL;
2220   case CmpInst::ICMP_EQ:
2221   case CmpInst::FCMP_OEQ:
2222     return AArch64CC::EQ;
2223   case CmpInst::ICMP_SGT:
2224   case CmpInst::FCMP_OGT:
2225     return AArch64CC::GT;
2226   case CmpInst::ICMP_SGE:
2227   case CmpInst::FCMP_OGE:
2228     return AArch64CC::GE;
2229   case CmpInst::ICMP_UGT:
2230   case CmpInst::FCMP_UGT:
2231     return AArch64CC::HI;
2232   case CmpInst::FCMP_OLT:
2233     return AArch64CC::MI;
2234   case CmpInst::ICMP_ULE:
2235   case CmpInst::FCMP_OLE:
2236     return AArch64CC::LS;
2237   case CmpInst::FCMP_ORD:
2238     return AArch64CC::VC;
2239   case CmpInst::FCMP_UNO:
2240     return AArch64CC::VS;
2241   case CmpInst::FCMP_UGE:
2242     return AArch64CC::PL;
2243   case CmpInst::ICMP_SLT:
2244   case CmpInst::FCMP_ULT:
2245     return AArch64CC::LT;
2246   case CmpInst::ICMP_SLE:
2247   case CmpInst::FCMP_ULE:
2248     return AArch64CC::LE;
2249   case CmpInst::FCMP_UNE:
2250   case CmpInst::ICMP_NE:
2251     return AArch64CC::NE;
2252   case CmpInst::ICMP_UGE:
2253     return AArch64CC::HS;
2254   case CmpInst::ICMP_ULT:
2255     return AArch64CC::LO;
2256   }
2257 }
2258 
2259 /// Try to emit a combined compare-and-branch instruction.
emitCompareAndBranch(const BranchInst * BI)2260 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2261   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2262   // will not be produced, as they are conditional branch instructions that do
2263   // not set flags.
2264   if (FuncInfo.MF->getFunction().hasFnAttribute(
2265           Attribute::SpeculativeLoadHardening))
2266     return false;
2267 
2268   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2269   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2270   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2271 
2272   const Value *LHS = CI->getOperand(0);
2273   const Value *RHS = CI->getOperand(1);
2274 
2275   MVT VT;
2276   if (!isTypeSupported(LHS->getType(), VT))
2277     return false;
2278 
2279   unsigned BW = VT.getSizeInBits();
2280   if (BW > 64)
2281     return false;
2282 
2283   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2284   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2285 
2286   // Try to take advantage of fallthrough opportunities.
2287   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2288     std::swap(TBB, FBB);
2289     Predicate = CmpInst::getInversePredicate(Predicate);
2290   }
2291 
2292   int TestBit = -1;
2293   bool IsCmpNE;
2294   switch (Predicate) {
2295   default:
2296     return false;
2297   case CmpInst::ICMP_EQ:
2298   case CmpInst::ICMP_NE:
2299     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2300       std::swap(LHS, RHS);
2301 
2302     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2303       return false;
2304 
2305     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2306       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2307         const Value *AndLHS = AI->getOperand(0);
2308         const Value *AndRHS = AI->getOperand(1);
2309 
2310         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2311           if (C->getValue().isPowerOf2())
2312             std::swap(AndLHS, AndRHS);
2313 
2314         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2315           if (C->getValue().isPowerOf2()) {
2316             TestBit = C->getValue().logBase2();
2317             LHS = AndLHS;
2318           }
2319       }
2320 
2321     if (VT == MVT::i1)
2322       TestBit = 0;
2323 
2324     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2325     break;
2326   case CmpInst::ICMP_SLT:
2327   case CmpInst::ICMP_SGE:
2328     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2329       return false;
2330 
2331     TestBit = BW - 1;
2332     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2333     break;
2334   case CmpInst::ICMP_SGT:
2335   case CmpInst::ICMP_SLE:
2336     if (!isa<ConstantInt>(RHS))
2337       return false;
2338 
2339     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2340       return false;
2341 
2342     TestBit = BW - 1;
2343     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2344     break;
2345   } // end switch
2346 
2347   static const unsigned OpcTable[2][2][2] = {
2348     { {AArch64::CBZW,  AArch64::CBZX },
2349       {AArch64::CBNZW, AArch64::CBNZX} },
2350     { {AArch64::TBZW,  AArch64::TBZX },
2351       {AArch64::TBNZW, AArch64::TBNZX} }
2352   };
2353 
2354   bool IsBitTest = TestBit != -1;
2355   bool Is64Bit = BW == 64;
2356   if (TestBit < 32 && TestBit >= 0)
2357     Is64Bit = false;
2358 
2359   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2360   const MCInstrDesc &II = TII.get(Opc);
2361 
2362   unsigned SrcReg = getRegForValue(LHS);
2363   if (!SrcReg)
2364     return false;
2365   bool SrcIsKill = hasTrivialKill(LHS);
2366 
2367   if (BW == 64 && !Is64Bit)
2368     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2369                                         AArch64::sub_32);
2370 
2371   if ((BW < 32) && !IsBitTest)
2372     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2373 
2374   // Emit the combined compare and branch instruction.
2375   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2376   MachineInstrBuilder MIB =
2377       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2378           .addReg(SrcReg, getKillRegState(SrcIsKill));
2379   if (IsBitTest)
2380     MIB.addImm(TestBit);
2381   MIB.addMBB(TBB);
2382 
2383   finishCondBranch(BI->getParent(), TBB, FBB);
2384   return true;
2385 }
2386 
selectBranch(const Instruction * I)2387 bool AArch64FastISel::selectBranch(const Instruction *I) {
2388   const BranchInst *BI = cast<BranchInst>(I);
2389   if (BI->isUnconditional()) {
2390     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2391     fastEmitBranch(MSucc, BI->getDebugLoc());
2392     return true;
2393   }
2394 
2395   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2396   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2397 
2398   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2399     if (CI->hasOneUse() && isValueAvailable(CI)) {
2400       // Try to optimize or fold the cmp.
2401       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2402       switch (Predicate) {
2403       default:
2404         break;
2405       case CmpInst::FCMP_FALSE:
2406         fastEmitBranch(FBB, DbgLoc);
2407         return true;
2408       case CmpInst::FCMP_TRUE:
2409         fastEmitBranch(TBB, DbgLoc);
2410         return true;
2411       }
2412 
2413       // Try to emit a combined compare-and-branch first.
2414       if (emitCompareAndBranch(BI))
2415         return true;
2416 
2417       // Try to take advantage of fallthrough opportunities.
2418       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2419         std::swap(TBB, FBB);
2420         Predicate = CmpInst::getInversePredicate(Predicate);
2421       }
2422 
2423       // Emit the cmp.
2424       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2425         return false;
2426 
2427       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2428       // instruction.
2429       AArch64CC::CondCode CC = getCompareCC(Predicate);
2430       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2431       switch (Predicate) {
2432       default:
2433         break;
2434       case CmpInst::FCMP_UEQ:
2435         ExtraCC = AArch64CC::EQ;
2436         CC = AArch64CC::VS;
2437         break;
2438       case CmpInst::FCMP_ONE:
2439         ExtraCC = AArch64CC::MI;
2440         CC = AArch64CC::GT;
2441         break;
2442       }
2443       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2444 
2445       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2446       if (ExtraCC != AArch64CC::AL) {
2447         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2448             .addImm(ExtraCC)
2449             .addMBB(TBB);
2450       }
2451 
2452       // Emit the branch.
2453       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2454           .addImm(CC)
2455           .addMBB(TBB);
2456 
2457       finishCondBranch(BI->getParent(), TBB, FBB);
2458       return true;
2459     }
2460   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2461     uint64_t Imm = CI->getZExtValue();
2462     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2463     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2464         .addMBB(Target);
2465 
2466     // Obtain the branch probability and add the target to the successor list.
2467     if (FuncInfo.BPI) {
2468       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2469           BI->getParent(), Target->getBasicBlock());
2470       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2471     } else
2472       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2473     return true;
2474   } else {
2475     AArch64CC::CondCode CC = AArch64CC::NE;
2476     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2477       // Fake request the condition, otherwise the intrinsic might be completely
2478       // optimized away.
2479       unsigned CondReg = getRegForValue(BI->getCondition());
2480       if (!CondReg)
2481         return false;
2482 
2483       // Emit the branch.
2484       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2485         .addImm(CC)
2486         .addMBB(TBB);
2487 
2488       finishCondBranch(BI->getParent(), TBB, FBB);
2489       return true;
2490     }
2491   }
2492 
2493   unsigned CondReg = getRegForValue(BI->getCondition());
2494   if (CondReg == 0)
2495     return false;
2496   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2497 
2498   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2499   unsigned Opcode = AArch64::TBNZW;
2500   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2501     std::swap(TBB, FBB);
2502     Opcode = AArch64::TBZW;
2503   }
2504 
2505   const MCInstrDesc &II = TII.get(Opcode);
2506   unsigned ConstrainedCondReg
2507     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2508   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2509       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2510       .addImm(0)
2511       .addMBB(TBB);
2512 
2513   finishCondBranch(BI->getParent(), TBB, FBB);
2514   return true;
2515 }
2516 
selectIndirectBr(const Instruction * I)2517 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2518   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2519   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2520   if (AddrReg == 0)
2521     return false;
2522 
2523   // Emit the indirect branch.
2524   const MCInstrDesc &II = TII.get(AArch64::BR);
2525   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2526   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2527 
2528   // Make sure the CFG is up-to-date.
2529   for (auto *Succ : BI->successors())
2530     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2531 
2532   return true;
2533 }
2534 
selectCmp(const Instruction * I)2535 bool AArch64FastISel::selectCmp(const Instruction *I) {
2536   const CmpInst *CI = cast<CmpInst>(I);
2537 
2538   // Vectors of i1 are weird: bail out.
2539   if (CI->getType()->isVectorTy())
2540     return false;
2541 
2542   // Try to optimize or fold the cmp.
2543   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2544   unsigned ResultReg = 0;
2545   switch (Predicate) {
2546   default:
2547     break;
2548   case CmpInst::FCMP_FALSE:
2549     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2550     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2551             TII.get(TargetOpcode::COPY), ResultReg)
2552         .addReg(AArch64::WZR, getKillRegState(true));
2553     break;
2554   case CmpInst::FCMP_TRUE:
2555     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2556     break;
2557   }
2558 
2559   if (ResultReg) {
2560     updateValueMap(I, ResultReg);
2561     return true;
2562   }
2563 
2564   // Emit the cmp.
2565   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2566     return false;
2567 
2568   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2569 
2570   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2571   // condition codes are inverted, because they are used by CSINC.
2572   static unsigned CondCodeTable[2][2] = {
2573     { AArch64CC::NE, AArch64CC::VC },
2574     { AArch64CC::PL, AArch64CC::LE }
2575   };
2576   unsigned *CondCodes = nullptr;
2577   switch (Predicate) {
2578   default:
2579     break;
2580   case CmpInst::FCMP_UEQ:
2581     CondCodes = &CondCodeTable[0][0];
2582     break;
2583   case CmpInst::FCMP_ONE:
2584     CondCodes = &CondCodeTable[1][0];
2585     break;
2586   }
2587 
2588   if (CondCodes) {
2589     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2590     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2591             TmpReg1)
2592         .addReg(AArch64::WZR, getKillRegState(true))
2593         .addReg(AArch64::WZR, getKillRegState(true))
2594         .addImm(CondCodes[0]);
2595     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2596             ResultReg)
2597         .addReg(TmpReg1, getKillRegState(true))
2598         .addReg(AArch64::WZR, getKillRegState(true))
2599         .addImm(CondCodes[1]);
2600 
2601     updateValueMap(I, ResultReg);
2602     return true;
2603   }
2604 
2605   // Now set a register based on the comparison.
2606   AArch64CC::CondCode CC = getCompareCC(Predicate);
2607   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2608   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2609   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2610           ResultReg)
2611       .addReg(AArch64::WZR, getKillRegState(true))
2612       .addReg(AArch64::WZR, getKillRegState(true))
2613       .addImm(invertedCC);
2614 
2615   updateValueMap(I, ResultReg);
2616   return true;
2617 }
2618 
2619 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2620 /// value.
optimizeSelect(const SelectInst * SI)2621 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2622   if (!SI->getType()->isIntegerTy(1))
2623     return false;
2624 
2625   const Value *Src1Val, *Src2Val;
2626   unsigned Opc = 0;
2627   bool NeedExtraOp = false;
2628   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2629     if (CI->isOne()) {
2630       Src1Val = SI->getCondition();
2631       Src2Val = SI->getFalseValue();
2632       Opc = AArch64::ORRWrr;
2633     } else {
2634       assert(CI->isZero());
2635       Src1Val = SI->getFalseValue();
2636       Src2Val = SI->getCondition();
2637       Opc = AArch64::BICWrr;
2638     }
2639   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2640     if (CI->isOne()) {
2641       Src1Val = SI->getCondition();
2642       Src2Val = SI->getTrueValue();
2643       Opc = AArch64::ORRWrr;
2644       NeedExtraOp = true;
2645     } else {
2646       assert(CI->isZero());
2647       Src1Val = SI->getCondition();
2648       Src2Val = SI->getTrueValue();
2649       Opc = AArch64::ANDWrr;
2650     }
2651   }
2652 
2653   if (!Opc)
2654     return false;
2655 
2656   unsigned Src1Reg = getRegForValue(Src1Val);
2657   if (!Src1Reg)
2658     return false;
2659   bool Src1IsKill = hasTrivialKill(Src1Val);
2660 
2661   unsigned Src2Reg = getRegForValue(Src2Val);
2662   if (!Src2Reg)
2663     return false;
2664   bool Src2IsKill = hasTrivialKill(Src2Val);
2665 
2666   if (NeedExtraOp) {
2667     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2668     Src1IsKill = true;
2669   }
2670   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2671                                        Src1IsKill, Src2Reg, Src2IsKill);
2672   updateValueMap(SI, ResultReg);
2673   return true;
2674 }
2675 
selectSelect(const Instruction * I)2676 bool AArch64FastISel::selectSelect(const Instruction *I) {
2677   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2678   MVT VT;
2679   if (!isTypeSupported(I->getType(), VT))
2680     return false;
2681 
2682   unsigned Opc;
2683   const TargetRegisterClass *RC;
2684   switch (VT.SimpleTy) {
2685   default:
2686     return false;
2687   case MVT::i1:
2688   case MVT::i8:
2689   case MVT::i16:
2690   case MVT::i32:
2691     Opc = AArch64::CSELWr;
2692     RC = &AArch64::GPR32RegClass;
2693     break;
2694   case MVT::i64:
2695     Opc = AArch64::CSELXr;
2696     RC = &AArch64::GPR64RegClass;
2697     break;
2698   case MVT::f32:
2699     Opc = AArch64::FCSELSrrr;
2700     RC = &AArch64::FPR32RegClass;
2701     break;
2702   case MVT::f64:
2703     Opc = AArch64::FCSELDrrr;
2704     RC = &AArch64::FPR64RegClass;
2705     break;
2706   }
2707 
2708   const SelectInst *SI = cast<SelectInst>(I);
2709   const Value *Cond = SI->getCondition();
2710   AArch64CC::CondCode CC = AArch64CC::NE;
2711   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2712 
2713   if (optimizeSelect(SI))
2714     return true;
2715 
2716   // Try to pickup the flags, so we don't have to emit another compare.
2717   if (foldXALUIntrinsic(CC, I, Cond)) {
2718     // Fake request the condition to force emission of the XALU intrinsic.
2719     unsigned CondReg = getRegForValue(Cond);
2720     if (!CondReg)
2721       return false;
2722   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2723              isValueAvailable(Cond)) {
2724     const auto *Cmp = cast<CmpInst>(Cond);
2725     // Try to optimize or fold the cmp.
2726     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2727     const Value *FoldSelect = nullptr;
2728     switch (Predicate) {
2729     default:
2730       break;
2731     case CmpInst::FCMP_FALSE:
2732       FoldSelect = SI->getFalseValue();
2733       break;
2734     case CmpInst::FCMP_TRUE:
2735       FoldSelect = SI->getTrueValue();
2736       break;
2737     }
2738 
2739     if (FoldSelect) {
2740       unsigned SrcReg = getRegForValue(FoldSelect);
2741       if (!SrcReg)
2742         return false;
2743       unsigned UseReg = lookUpRegForValue(SI);
2744       if (UseReg)
2745         MRI.clearKillFlags(UseReg);
2746 
2747       updateValueMap(I, SrcReg);
2748       return true;
2749     }
2750 
2751     // Emit the cmp.
2752     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2753       return false;
2754 
2755     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2756     CC = getCompareCC(Predicate);
2757     switch (Predicate) {
2758     default:
2759       break;
2760     case CmpInst::FCMP_UEQ:
2761       ExtraCC = AArch64CC::EQ;
2762       CC = AArch64CC::VS;
2763       break;
2764     case CmpInst::FCMP_ONE:
2765       ExtraCC = AArch64CC::MI;
2766       CC = AArch64CC::GT;
2767       break;
2768     }
2769     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2770   } else {
2771     unsigned CondReg = getRegForValue(Cond);
2772     if (!CondReg)
2773       return false;
2774     bool CondIsKill = hasTrivialKill(Cond);
2775 
2776     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2777     CondReg = constrainOperandRegClass(II, CondReg, 1);
2778 
2779     // Emit a TST instruction (ANDS wzr, reg, #imm).
2780     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2781             AArch64::WZR)
2782         .addReg(CondReg, getKillRegState(CondIsKill))
2783         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2784   }
2785 
2786   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2787   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2788 
2789   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2790   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2791 
2792   if (!Src1Reg || !Src2Reg)
2793     return false;
2794 
2795   if (ExtraCC != AArch64CC::AL) {
2796     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2797                                Src2IsKill, ExtraCC);
2798     Src2IsKill = true;
2799   }
2800   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2801                                         Src2IsKill, CC);
2802   updateValueMap(I, ResultReg);
2803   return true;
2804 }
2805 
selectFPExt(const Instruction * I)2806 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2807   Value *V = I->getOperand(0);
2808   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2809     return false;
2810 
2811   unsigned Op = getRegForValue(V);
2812   if (Op == 0)
2813     return false;
2814 
2815   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2816   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2817           ResultReg).addReg(Op);
2818   updateValueMap(I, ResultReg);
2819   return true;
2820 }
2821 
selectFPTrunc(const Instruction * I)2822 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2823   Value *V = I->getOperand(0);
2824   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2825     return false;
2826 
2827   unsigned Op = getRegForValue(V);
2828   if (Op == 0)
2829     return false;
2830 
2831   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2832   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2833           ResultReg).addReg(Op);
2834   updateValueMap(I, ResultReg);
2835   return true;
2836 }
2837 
2838 // FPToUI and FPToSI
selectFPToInt(const Instruction * I,bool Signed)2839 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2840   MVT DestVT;
2841   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2842     return false;
2843 
2844   unsigned SrcReg = getRegForValue(I->getOperand(0));
2845   if (SrcReg == 0)
2846     return false;
2847 
2848   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2849   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2850     return false;
2851 
2852   unsigned Opc;
2853   if (SrcVT == MVT::f64) {
2854     if (Signed)
2855       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2856     else
2857       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2858   } else {
2859     if (Signed)
2860       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2861     else
2862       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2863   }
2864   unsigned ResultReg = createResultReg(
2865       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2866   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2867       .addReg(SrcReg);
2868   updateValueMap(I, ResultReg);
2869   return true;
2870 }
2871 
selectIntToFP(const Instruction * I,bool Signed)2872 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2873   MVT DestVT;
2874   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2875     return false;
2876   // Let regular ISEL handle FP16
2877   if (DestVT == MVT::f16)
2878     return false;
2879 
2880   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2881          "Unexpected value type.");
2882 
2883   unsigned SrcReg = getRegForValue(I->getOperand(0));
2884   if (!SrcReg)
2885     return false;
2886   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2887 
2888   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2889 
2890   // Handle sign-extension.
2891   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2892     SrcReg =
2893         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2894     if (!SrcReg)
2895       return false;
2896     SrcIsKill = true;
2897   }
2898 
2899   unsigned Opc;
2900   if (SrcVT == MVT::i64) {
2901     if (Signed)
2902       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2903     else
2904       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2905   } else {
2906     if (Signed)
2907       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2908     else
2909       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2910   }
2911 
2912   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2913                                       SrcIsKill);
2914   updateValueMap(I, ResultReg);
2915   return true;
2916 }
2917 
fastLowerArguments()2918 bool AArch64FastISel::fastLowerArguments() {
2919   if (!FuncInfo.CanLowerReturn)
2920     return false;
2921 
2922   const Function *F = FuncInfo.Fn;
2923   if (F->isVarArg())
2924     return false;
2925 
2926   CallingConv::ID CC = F->getCallingConv();
2927   if (CC != CallingConv::C && CC != CallingConv::Swift)
2928     return false;
2929 
2930   if (Subtarget->hasCustomCallingConv())
2931     return false;
2932 
2933   // Only handle simple cases of up to 8 GPR and FPR each.
2934   unsigned GPRCnt = 0;
2935   unsigned FPRCnt = 0;
2936   for (auto const &Arg : F->args()) {
2937     if (Arg.hasAttribute(Attribute::ByVal) ||
2938         Arg.hasAttribute(Attribute::InReg) ||
2939         Arg.hasAttribute(Attribute::StructRet) ||
2940         Arg.hasAttribute(Attribute::SwiftSelf) ||
2941         Arg.hasAttribute(Attribute::SwiftError) ||
2942         Arg.hasAttribute(Attribute::Nest))
2943       return false;
2944 
2945     Type *ArgTy = Arg.getType();
2946     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2947       return false;
2948 
2949     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2950     if (!ArgVT.isSimple())
2951       return false;
2952 
2953     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2954     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2955       return false;
2956 
2957     if (VT.isVector() &&
2958         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2959       return false;
2960 
2961     if (VT >= MVT::i1 && VT <= MVT::i64)
2962       ++GPRCnt;
2963     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2964              VT.is128BitVector())
2965       ++FPRCnt;
2966     else
2967       return false;
2968 
2969     if (GPRCnt > 8 || FPRCnt > 8)
2970       return false;
2971   }
2972 
2973   static const MCPhysReg Registers[6][8] = {
2974     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2975       AArch64::W5, AArch64::W6, AArch64::W7 },
2976     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2977       AArch64::X5, AArch64::X6, AArch64::X7 },
2978     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2979       AArch64::H5, AArch64::H6, AArch64::H7 },
2980     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2981       AArch64::S5, AArch64::S6, AArch64::S7 },
2982     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2983       AArch64::D5, AArch64::D6, AArch64::D7 },
2984     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2985       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2986   };
2987 
2988   unsigned GPRIdx = 0;
2989   unsigned FPRIdx = 0;
2990   for (auto const &Arg : F->args()) {
2991     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2992     unsigned SrcReg;
2993     const TargetRegisterClass *RC;
2994     if (VT >= MVT::i1 && VT <= MVT::i32) {
2995       SrcReg = Registers[0][GPRIdx++];
2996       RC = &AArch64::GPR32RegClass;
2997       VT = MVT::i32;
2998     } else if (VT == MVT::i64) {
2999       SrcReg = Registers[1][GPRIdx++];
3000       RC = &AArch64::GPR64RegClass;
3001     } else if (VT == MVT::f16) {
3002       SrcReg = Registers[2][FPRIdx++];
3003       RC = &AArch64::FPR16RegClass;
3004     } else if (VT ==  MVT::f32) {
3005       SrcReg = Registers[3][FPRIdx++];
3006       RC = &AArch64::FPR32RegClass;
3007     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
3008       SrcReg = Registers[4][FPRIdx++];
3009       RC = &AArch64::FPR64RegClass;
3010     } else if (VT.is128BitVector()) {
3011       SrcReg = Registers[5][FPRIdx++];
3012       RC = &AArch64::FPR128RegClass;
3013     } else
3014       llvm_unreachable("Unexpected value type.");
3015 
3016     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3017     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3018     // Without this, EmitLiveInCopies may eliminate the livein if its only
3019     // use is a bitcast (which isn't turned into an instruction).
3020     unsigned ResultReg = createResultReg(RC);
3021     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3022             TII.get(TargetOpcode::COPY), ResultReg)
3023         .addReg(DstReg, getKillRegState(true));
3024     updateValueMap(&Arg, ResultReg);
3025   }
3026   return true;
3027 }
3028 
processCallArgs(CallLoweringInfo & CLI,SmallVectorImpl<MVT> & OutVTs,unsigned & NumBytes)3029 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3030                                       SmallVectorImpl<MVT> &OutVTs,
3031                                       unsigned &NumBytes) {
3032   CallingConv::ID CC = CLI.CallConv;
3033   SmallVector<CCValAssign, 16> ArgLocs;
3034   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3035   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3036 
3037   // Get a count of how many bytes are to be pushed on the stack.
3038   NumBytes = CCInfo.getNextStackOffset();
3039 
3040   // Issue CALLSEQ_START
3041   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3042   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3043     .addImm(NumBytes).addImm(0);
3044 
3045   // Process the args.
3046   for (CCValAssign &VA : ArgLocs) {
3047     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3048     MVT ArgVT = OutVTs[VA.getValNo()];
3049 
3050     unsigned ArgReg = getRegForValue(ArgVal);
3051     if (!ArgReg)
3052       return false;
3053 
3054     // Handle arg promotion: SExt, ZExt, AExt.
3055     switch (VA.getLocInfo()) {
3056     case CCValAssign::Full:
3057       break;
3058     case CCValAssign::SExt: {
3059       MVT DestVT = VA.getLocVT();
3060       MVT SrcVT = ArgVT;
3061       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3062       if (!ArgReg)
3063         return false;
3064       break;
3065     }
3066     case CCValAssign::AExt:
3067     // Intentional fall-through.
3068     case CCValAssign::ZExt: {
3069       MVT DestVT = VA.getLocVT();
3070       MVT SrcVT = ArgVT;
3071       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3072       if (!ArgReg)
3073         return false;
3074       break;
3075     }
3076     default:
3077       llvm_unreachable("Unknown arg promotion!");
3078     }
3079 
3080     // Now copy/store arg to correct locations.
3081     if (VA.isRegLoc() && !VA.needsCustom()) {
3082       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3083               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3084       CLI.OutRegs.push_back(VA.getLocReg());
3085     } else if (VA.needsCustom()) {
3086       // FIXME: Handle custom args.
3087       return false;
3088     } else {
3089       assert(VA.isMemLoc() && "Assuming store on stack.");
3090 
3091       // Don't emit stores for undef values.
3092       if (isa<UndefValue>(ArgVal))
3093         continue;
3094 
3095       // Need to store on the stack.
3096       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3097 
3098       unsigned BEAlign = 0;
3099       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3100         BEAlign = 8 - ArgSize;
3101 
3102       Address Addr;
3103       Addr.setKind(Address::RegBase);
3104       Addr.setReg(AArch64::SP);
3105       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3106 
3107       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3108       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3109           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3110           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3111 
3112       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3113         return false;
3114     }
3115   }
3116   return true;
3117 }
3118 
finishCall(CallLoweringInfo & CLI,MVT RetVT,unsigned NumBytes)3119 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3120                                  unsigned NumBytes) {
3121   CallingConv::ID CC = CLI.CallConv;
3122 
3123   // Issue CALLSEQ_END
3124   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3125   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3126     .addImm(NumBytes).addImm(0);
3127 
3128   // Now the return value.
3129   if (RetVT != MVT::isVoid) {
3130     SmallVector<CCValAssign, 16> RVLocs;
3131     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3132     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3133 
3134     // Only handle a single return value.
3135     if (RVLocs.size() != 1)
3136       return false;
3137 
3138     // Copy all of the result registers out of their specified physreg.
3139     MVT CopyVT = RVLocs[0].getValVT();
3140 
3141     // TODO: Handle big-endian results
3142     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3143       return false;
3144 
3145     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3146     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3147             TII.get(TargetOpcode::COPY), ResultReg)
3148         .addReg(RVLocs[0].getLocReg());
3149     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3150 
3151     CLI.ResultReg = ResultReg;
3152     CLI.NumResultRegs = 1;
3153   }
3154 
3155   return true;
3156 }
3157 
fastLowerCall(CallLoweringInfo & CLI)3158 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3159   CallingConv::ID CC  = CLI.CallConv;
3160   bool IsTailCall     = CLI.IsTailCall;
3161   bool IsVarArg       = CLI.IsVarArg;
3162   const Value *Callee = CLI.Callee;
3163   MCSymbol *Symbol = CLI.Symbol;
3164 
3165   if (!Callee && !Symbol)
3166     return false;
3167 
3168   // Allow SelectionDAG isel to handle tail calls.
3169   if (IsTailCall)
3170     return false;
3171 
3172   CodeModel::Model CM = TM.getCodeModel();
3173   // Only support the small-addressing and large code models.
3174   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3175     return false;
3176 
3177   // FIXME: Add large code model support for ELF.
3178   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3179     return false;
3180 
3181   // Let SDISel handle vararg functions.
3182   if (IsVarArg)
3183     return false;
3184 
3185   // FIXME: Only handle *simple* calls for now.
3186   MVT RetVT;
3187   if (CLI.RetTy->isVoidTy())
3188     RetVT = MVT::isVoid;
3189   else if (!isTypeLegal(CLI.RetTy, RetVT))
3190     return false;
3191 
3192   for (auto Flag : CLI.OutFlags)
3193     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3194         Flag.isSwiftSelf() || Flag.isSwiftError())
3195       return false;
3196 
3197   // Set up the argument vectors.
3198   SmallVector<MVT, 16> OutVTs;
3199   OutVTs.reserve(CLI.OutVals.size());
3200 
3201   for (auto *Val : CLI.OutVals) {
3202     MVT VT;
3203     if (!isTypeLegal(Val->getType(), VT) &&
3204         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3205       return false;
3206 
3207     // We don't handle vector parameters yet.
3208     if (VT.isVector() || VT.getSizeInBits() > 64)
3209       return false;
3210 
3211     OutVTs.push_back(VT);
3212   }
3213 
3214   Address Addr;
3215   if (Callee && !computeCallAddress(Callee, Addr))
3216     return false;
3217 
3218   // Handle the arguments now that we've gotten them.
3219   unsigned NumBytes;
3220   if (!processCallArgs(CLI, OutVTs, NumBytes))
3221     return false;
3222 
3223   const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3224   if (RegInfo->isAnyArgRegReserved(*MF))
3225     RegInfo->emitReservedArgRegCallError(*MF);
3226 
3227   // Issue the call.
3228   MachineInstrBuilder MIB;
3229   if (Subtarget->useSmallAddressing()) {
3230     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3231     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3232     if (Symbol)
3233       MIB.addSym(Symbol, 0);
3234     else if (Addr.getGlobalValue())
3235       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3236     else if (Addr.getReg()) {
3237       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3238       MIB.addReg(Reg);
3239     } else
3240       return false;
3241   } else {
3242     unsigned CallReg = 0;
3243     if (Symbol) {
3244       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3245       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3246               ADRPReg)
3247           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3248 
3249       CallReg = createResultReg(&AArch64::GPR64RegClass);
3250       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3251               TII.get(AArch64::LDRXui), CallReg)
3252           .addReg(ADRPReg)
3253           .addSym(Symbol,
3254                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3255     } else if (Addr.getGlobalValue())
3256       CallReg = materializeGV(Addr.getGlobalValue());
3257     else if (Addr.getReg())
3258       CallReg = Addr.getReg();
3259 
3260     if (!CallReg)
3261       return false;
3262 
3263     const MCInstrDesc &II = TII.get(AArch64::BLR);
3264     CallReg = constrainOperandRegClass(II, CallReg, 0);
3265     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3266   }
3267 
3268   // Add implicit physical register uses to the call.
3269   for (auto Reg : CLI.OutRegs)
3270     MIB.addReg(Reg, RegState::Implicit);
3271 
3272   // Add a register mask with the call-preserved registers.
3273   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3274   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3275 
3276   CLI.Call = MIB;
3277 
3278   // Finish off the call including any return values.
3279   return finishCall(CLI, RetVT, NumBytes);
3280 }
3281 
isMemCpySmall(uint64_t Len,unsigned Alignment)3282 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3283   if (Alignment)
3284     return Len / Alignment <= 4;
3285   else
3286     return Len < 32;
3287 }
3288 
tryEmitSmallMemCpy(Address Dest,Address Src,uint64_t Len,unsigned Alignment)3289 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3290                                          uint64_t Len, unsigned Alignment) {
3291   // Make sure we don't bloat code by inlining very large memcpy's.
3292   if (!isMemCpySmall(Len, Alignment))
3293     return false;
3294 
3295   int64_t UnscaledOffset = 0;
3296   Address OrigDest = Dest;
3297   Address OrigSrc = Src;
3298 
3299   while (Len) {
3300     MVT VT;
3301     if (!Alignment || Alignment >= 8) {
3302       if (Len >= 8)
3303         VT = MVT::i64;
3304       else if (Len >= 4)
3305         VT = MVT::i32;
3306       else if (Len >= 2)
3307         VT = MVT::i16;
3308       else {
3309         VT = MVT::i8;
3310       }
3311     } else {
3312       // Bound based on alignment.
3313       if (Len >= 4 && Alignment == 4)
3314         VT = MVT::i32;
3315       else if (Len >= 2 && Alignment == 2)
3316         VT = MVT::i16;
3317       else {
3318         VT = MVT::i8;
3319       }
3320     }
3321 
3322     unsigned ResultReg = emitLoad(VT, VT, Src);
3323     if (!ResultReg)
3324       return false;
3325 
3326     if (!emitStore(VT, ResultReg, Dest))
3327       return false;
3328 
3329     int64_t Size = VT.getSizeInBits() / 8;
3330     Len -= Size;
3331     UnscaledOffset += Size;
3332 
3333     // We need to recompute the unscaled offset for each iteration.
3334     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3335     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3336   }
3337 
3338   return true;
3339 }
3340 
3341 /// Check if it is possible to fold the condition from the XALU intrinsic
3342 /// into the user. The condition code will only be updated on success.
foldXALUIntrinsic(AArch64CC::CondCode & CC,const Instruction * I,const Value * Cond)3343 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3344                                         const Instruction *I,
3345                                         const Value *Cond) {
3346   if (!isa<ExtractValueInst>(Cond))
3347     return false;
3348 
3349   const auto *EV = cast<ExtractValueInst>(Cond);
3350   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3351     return false;
3352 
3353   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3354   MVT RetVT;
3355   const Function *Callee = II->getCalledFunction();
3356   Type *RetTy =
3357   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3358   if (!isTypeLegal(RetTy, RetVT))
3359     return false;
3360 
3361   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3362     return false;
3363 
3364   const Value *LHS = II->getArgOperand(0);
3365   const Value *RHS = II->getArgOperand(1);
3366 
3367   // Canonicalize immediate to the RHS.
3368   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3369       isCommutativeIntrinsic(II))
3370     std::swap(LHS, RHS);
3371 
3372   // Simplify multiplies.
3373   Intrinsic::ID IID = II->getIntrinsicID();
3374   switch (IID) {
3375   default:
3376     break;
3377   case Intrinsic::smul_with_overflow:
3378     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3379       if (C->getValue() == 2)
3380         IID = Intrinsic::sadd_with_overflow;
3381     break;
3382   case Intrinsic::umul_with_overflow:
3383     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3384       if (C->getValue() == 2)
3385         IID = Intrinsic::uadd_with_overflow;
3386     break;
3387   }
3388 
3389   AArch64CC::CondCode TmpCC;
3390   switch (IID) {
3391   default:
3392     return false;
3393   case Intrinsic::sadd_with_overflow:
3394   case Intrinsic::ssub_with_overflow:
3395     TmpCC = AArch64CC::VS;
3396     break;
3397   case Intrinsic::uadd_with_overflow:
3398     TmpCC = AArch64CC::HS;
3399     break;
3400   case Intrinsic::usub_with_overflow:
3401     TmpCC = AArch64CC::LO;
3402     break;
3403   case Intrinsic::smul_with_overflow:
3404   case Intrinsic::umul_with_overflow:
3405     TmpCC = AArch64CC::NE;
3406     break;
3407   }
3408 
3409   // Check if both instructions are in the same basic block.
3410   if (!isValueAvailable(II))
3411     return false;
3412 
3413   // Make sure nothing is in the way
3414   BasicBlock::const_iterator Start(I);
3415   BasicBlock::const_iterator End(II);
3416   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3417     // We only expect extractvalue instructions between the intrinsic and the
3418     // instruction to be selected.
3419     if (!isa<ExtractValueInst>(Itr))
3420       return false;
3421 
3422     // Check that the extractvalue operand comes from the intrinsic.
3423     const auto *EVI = cast<ExtractValueInst>(Itr);
3424     if (EVI->getAggregateOperand() != II)
3425       return false;
3426   }
3427 
3428   CC = TmpCC;
3429   return true;
3430 }
3431 
fastLowerIntrinsicCall(const IntrinsicInst * II)3432 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3433   // FIXME: Handle more intrinsics.
3434   switch (II->getIntrinsicID()) {
3435   default: return false;
3436   case Intrinsic::frameaddress: {
3437     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3438     MFI.setFrameAddressIsTaken(true);
3439 
3440     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3441     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3442     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3443     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3444             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3445     // Recursively load frame address
3446     // ldr x0, [fp]
3447     // ldr x0, [x0]
3448     // ldr x0, [x0]
3449     // ...
3450     unsigned DestReg;
3451     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3452     while (Depth--) {
3453       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3454                                 SrcReg, /*IsKill=*/true, 0);
3455       assert(DestReg && "Unexpected LDR instruction emission failure.");
3456       SrcReg = DestReg;
3457     }
3458 
3459     updateValueMap(II, SrcReg);
3460     return true;
3461   }
3462   case Intrinsic::sponentry: {
3463     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3464 
3465     // SP = FP + Fixed Object + 16
3466     int FI = MFI.CreateFixedObject(4, 0, false);
3467     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3468     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3469             TII.get(AArch64::ADDXri), ResultReg)
3470             .addFrameIndex(FI)
3471             .addImm(0)
3472             .addImm(0);
3473 
3474     updateValueMap(II, ResultReg);
3475     return true;
3476   }
3477   case Intrinsic::memcpy:
3478   case Intrinsic::memmove: {
3479     const auto *MTI = cast<MemTransferInst>(II);
3480     // Don't handle volatile.
3481     if (MTI->isVolatile())
3482       return false;
3483 
3484     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3485     // we would emit dead code because we don't currently handle memmoves.
3486     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3487     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3488       // Small memcpy's are common enough that we want to do them without a call
3489       // if possible.
3490       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3491       unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3492                                     MTI->getSourceAlignment());
3493       if (isMemCpySmall(Len, Alignment)) {
3494         Address Dest, Src;
3495         if (!computeAddress(MTI->getRawDest(), Dest) ||
3496             !computeAddress(MTI->getRawSource(), Src))
3497           return false;
3498         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3499           return true;
3500       }
3501     }
3502 
3503     if (!MTI->getLength()->getType()->isIntegerTy(64))
3504       return false;
3505 
3506     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3507       // Fast instruction selection doesn't support the special
3508       // address spaces.
3509       return false;
3510 
3511     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3512     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3513   }
3514   case Intrinsic::memset: {
3515     const MemSetInst *MSI = cast<MemSetInst>(II);
3516     // Don't handle volatile.
3517     if (MSI->isVolatile())
3518       return false;
3519 
3520     if (!MSI->getLength()->getType()->isIntegerTy(64))
3521       return false;
3522 
3523     if (MSI->getDestAddressSpace() > 255)
3524       // Fast instruction selection doesn't support the special
3525       // address spaces.
3526       return false;
3527 
3528     return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3529   }
3530   case Intrinsic::sin:
3531   case Intrinsic::cos:
3532   case Intrinsic::pow: {
3533     MVT RetVT;
3534     if (!isTypeLegal(II->getType(), RetVT))
3535       return false;
3536 
3537     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3538       return false;
3539 
3540     static const RTLIB::Libcall LibCallTable[3][2] = {
3541       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3542       { RTLIB::COS_F32, RTLIB::COS_F64 },
3543       { RTLIB::POW_F32, RTLIB::POW_F64 }
3544     };
3545     RTLIB::Libcall LC;
3546     bool Is64Bit = RetVT == MVT::f64;
3547     switch (II->getIntrinsicID()) {
3548     default:
3549       llvm_unreachable("Unexpected intrinsic.");
3550     case Intrinsic::sin:
3551       LC = LibCallTable[0][Is64Bit];
3552       break;
3553     case Intrinsic::cos:
3554       LC = LibCallTable[1][Is64Bit];
3555       break;
3556     case Intrinsic::pow:
3557       LC = LibCallTable[2][Is64Bit];
3558       break;
3559     }
3560 
3561     ArgListTy Args;
3562     Args.reserve(II->getNumArgOperands());
3563 
3564     // Populate the argument list.
3565     for (auto &Arg : II->arg_operands()) {
3566       ArgListEntry Entry;
3567       Entry.Val = Arg;
3568       Entry.Ty = Arg->getType();
3569       Args.push_back(Entry);
3570     }
3571 
3572     CallLoweringInfo CLI;
3573     MCContext &Ctx = MF->getContext();
3574     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3575                   TLI.getLibcallName(LC), std::move(Args));
3576     if (!lowerCallTo(CLI))
3577       return false;
3578     updateValueMap(II, CLI.ResultReg);
3579     return true;
3580   }
3581   case Intrinsic::fabs: {
3582     MVT VT;
3583     if (!isTypeLegal(II->getType(), VT))
3584       return false;
3585 
3586     unsigned Opc;
3587     switch (VT.SimpleTy) {
3588     default:
3589       return false;
3590     case MVT::f32:
3591       Opc = AArch64::FABSSr;
3592       break;
3593     case MVT::f64:
3594       Opc = AArch64::FABSDr;
3595       break;
3596     }
3597     unsigned SrcReg = getRegForValue(II->getOperand(0));
3598     if (!SrcReg)
3599       return false;
3600     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3601     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3602     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3603       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3604     updateValueMap(II, ResultReg);
3605     return true;
3606   }
3607   case Intrinsic::trap:
3608     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3609         .addImm(1);
3610     return true;
3611 
3612   case Intrinsic::sqrt: {
3613     Type *RetTy = II->getCalledFunction()->getReturnType();
3614 
3615     MVT VT;
3616     if (!isTypeLegal(RetTy, VT))
3617       return false;
3618 
3619     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3620     if (!Op0Reg)
3621       return false;
3622     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3623 
3624     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3625     if (!ResultReg)
3626       return false;
3627 
3628     updateValueMap(II, ResultReg);
3629     return true;
3630   }
3631   case Intrinsic::sadd_with_overflow:
3632   case Intrinsic::uadd_with_overflow:
3633   case Intrinsic::ssub_with_overflow:
3634   case Intrinsic::usub_with_overflow:
3635   case Intrinsic::smul_with_overflow:
3636   case Intrinsic::umul_with_overflow: {
3637     // This implements the basic lowering of the xalu with overflow intrinsics.
3638     const Function *Callee = II->getCalledFunction();
3639     auto *Ty = cast<StructType>(Callee->getReturnType());
3640     Type *RetTy = Ty->getTypeAtIndex(0U);
3641 
3642     MVT VT;
3643     if (!isTypeLegal(RetTy, VT))
3644       return false;
3645 
3646     if (VT != MVT::i32 && VT != MVT::i64)
3647       return false;
3648 
3649     const Value *LHS = II->getArgOperand(0);
3650     const Value *RHS = II->getArgOperand(1);
3651     // Canonicalize immediate to the RHS.
3652     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3653         isCommutativeIntrinsic(II))
3654       std::swap(LHS, RHS);
3655 
3656     // Simplify multiplies.
3657     Intrinsic::ID IID = II->getIntrinsicID();
3658     switch (IID) {
3659     default:
3660       break;
3661     case Intrinsic::smul_with_overflow:
3662       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3663         if (C->getValue() == 2) {
3664           IID = Intrinsic::sadd_with_overflow;
3665           RHS = LHS;
3666         }
3667       break;
3668     case Intrinsic::umul_with_overflow:
3669       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3670         if (C->getValue() == 2) {
3671           IID = Intrinsic::uadd_with_overflow;
3672           RHS = LHS;
3673         }
3674       break;
3675     }
3676 
3677     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3678     AArch64CC::CondCode CC = AArch64CC::Invalid;
3679     switch (IID) {
3680     default: llvm_unreachable("Unexpected intrinsic!");
3681     case Intrinsic::sadd_with_overflow:
3682       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3683       CC = AArch64CC::VS;
3684       break;
3685     case Intrinsic::uadd_with_overflow:
3686       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3687       CC = AArch64CC::HS;
3688       break;
3689     case Intrinsic::ssub_with_overflow:
3690       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3691       CC = AArch64CC::VS;
3692       break;
3693     case Intrinsic::usub_with_overflow:
3694       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3695       CC = AArch64CC::LO;
3696       break;
3697     case Intrinsic::smul_with_overflow: {
3698       CC = AArch64CC::NE;
3699       unsigned LHSReg = getRegForValue(LHS);
3700       if (!LHSReg)
3701         return false;
3702       bool LHSIsKill = hasTrivialKill(LHS);
3703 
3704       unsigned RHSReg = getRegForValue(RHS);
3705       if (!RHSReg)
3706         return false;
3707       bool RHSIsKill = hasTrivialKill(RHS);
3708 
3709       if (VT == MVT::i32) {
3710         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3711         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3712                                        /*IsKill=*/false, 32);
3713         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3714                                             AArch64::sub_32);
3715         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3716                                               AArch64::sub_32);
3717         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3718                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3719       } else {
3720         assert(VT == MVT::i64 && "Unexpected value type.");
3721         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3722         // reused in the next instruction.
3723         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3724                             /*IsKill=*/false);
3725         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3726                                         RHSReg, RHSIsKill);
3727         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3728                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3729       }
3730       break;
3731     }
3732     case Intrinsic::umul_with_overflow: {
3733       CC = AArch64CC::NE;
3734       unsigned LHSReg = getRegForValue(LHS);
3735       if (!LHSReg)
3736         return false;
3737       bool LHSIsKill = hasTrivialKill(LHS);
3738 
3739       unsigned RHSReg = getRegForValue(RHS);
3740       if (!RHSReg)
3741         return false;
3742       bool RHSIsKill = hasTrivialKill(RHS);
3743 
3744       if (VT == MVT::i32) {
3745         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3746         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3747                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3748                     /*WantResult=*/false);
3749         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3750                                             AArch64::sub_32);
3751       } else {
3752         assert(VT == MVT::i64 && "Unexpected value type.");
3753         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3754         // reused in the next instruction.
3755         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3756                             /*IsKill=*/false);
3757         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3758                                         RHSReg, RHSIsKill);
3759         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3760                     /*IsKill=*/false, /*WantResult=*/false);
3761       }
3762       break;
3763     }
3764     }
3765 
3766     if (MulReg) {
3767       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3768       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3769               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3770     }
3771 
3772     if (!ResultReg1)
3773       return false;
3774 
3775     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3776                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3777                                   /*IsKill=*/true, getInvertedCondCode(CC));
3778     (void)ResultReg2;
3779     assert((ResultReg1 + 1) == ResultReg2 &&
3780            "Nonconsecutive result registers.");
3781     updateValueMap(II, ResultReg1, 2);
3782     return true;
3783   }
3784   }
3785   return false;
3786 }
3787 
selectRet(const Instruction * I)3788 bool AArch64FastISel::selectRet(const Instruction *I) {
3789   const ReturnInst *Ret = cast<ReturnInst>(I);
3790   const Function &F = *I->getParent()->getParent();
3791 
3792   if (!FuncInfo.CanLowerReturn)
3793     return false;
3794 
3795   if (F.isVarArg())
3796     return false;
3797 
3798   if (TLI.supportSwiftError() &&
3799       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3800     return false;
3801 
3802   if (TLI.supportSplitCSR(FuncInfo.MF))
3803     return false;
3804 
3805   // Build a list of return value registers.
3806   SmallVector<unsigned, 4> RetRegs;
3807 
3808   if (Ret->getNumOperands() > 0) {
3809     CallingConv::ID CC = F.getCallingConv();
3810     SmallVector<ISD::OutputArg, 4> Outs;
3811     GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3812 
3813     // Analyze operands of the call, assigning locations to each operand.
3814     SmallVector<CCValAssign, 16> ValLocs;
3815     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3816     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3817                                                      : RetCC_AArch64_AAPCS;
3818     CCInfo.AnalyzeReturn(Outs, RetCC);
3819 
3820     // Only handle a single return value for now.
3821     if (ValLocs.size() != 1)
3822       return false;
3823 
3824     CCValAssign &VA = ValLocs[0];
3825     const Value *RV = Ret->getOperand(0);
3826 
3827     // Don't bother handling odd stuff for now.
3828     if ((VA.getLocInfo() != CCValAssign::Full) &&
3829         (VA.getLocInfo() != CCValAssign::BCvt))
3830       return false;
3831 
3832     // Only handle register returns for now.
3833     if (!VA.isRegLoc())
3834       return false;
3835 
3836     unsigned Reg = getRegForValue(RV);
3837     if (Reg == 0)
3838       return false;
3839 
3840     unsigned SrcReg = Reg + VA.getValNo();
3841     unsigned DestReg = VA.getLocReg();
3842     // Avoid a cross-class copy. This is very unlikely.
3843     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3844       return false;
3845 
3846     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3847     if (!RVEVT.isSimple())
3848       return false;
3849 
3850     // Vectors (of > 1 lane) in big endian need tricky handling.
3851     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3852         !Subtarget->isLittleEndian())
3853       return false;
3854 
3855     MVT RVVT = RVEVT.getSimpleVT();
3856     if (RVVT == MVT::f128)
3857       return false;
3858 
3859     MVT DestVT = VA.getValVT();
3860     // Special handling for extended integers.
3861     if (RVVT != DestVT) {
3862       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3863         return false;
3864 
3865       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3866         return false;
3867 
3868       bool IsZExt = Outs[0].Flags.isZExt();
3869       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3870       if (SrcReg == 0)
3871         return false;
3872     }
3873 
3874     // Make the copy.
3875     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3876             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3877 
3878     // Add register to return instruction.
3879     RetRegs.push_back(VA.getLocReg());
3880   }
3881 
3882   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3883                                     TII.get(AArch64::RET_ReallyLR));
3884   for (unsigned RetReg : RetRegs)
3885     MIB.addReg(RetReg, RegState::Implicit);
3886   return true;
3887 }
3888 
selectTrunc(const Instruction * I)3889 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3890   Type *DestTy = I->getType();
3891   Value *Op = I->getOperand(0);
3892   Type *SrcTy = Op->getType();
3893 
3894   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3895   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3896   if (!SrcEVT.isSimple())
3897     return false;
3898   if (!DestEVT.isSimple())
3899     return false;
3900 
3901   MVT SrcVT = SrcEVT.getSimpleVT();
3902   MVT DestVT = DestEVT.getSimpleVT();
3903 
3904   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3905       SrcVT != MVT::i8)
3906     return false;
3907   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3908       DestVT != MVT::i1)
3909     return false;
3910 
3911   unsigned SrcReg = getRegForValue(Op);
3912   if (!SrcReg)
3913     return false;
3914   bool SrcIsKill = hasTrivialKill(Op);
3915 
3916   // If we're truncating from i64 to a smaller non-legal type then generate an
3917   // AND. Otherwise, we know the high bits are undefined and a truncate only
3918   // generate a COPY. We cannot mark the source register also as result
3919   // register, because this can incorrectly transfer the kill flag onto the
3920   // source register.
3921   unsigned ResultReg;
3922   if (SrcVT == MVT::i64) {
3923     uint64_t Mask = 0;
3924     switch (DestVT.SimpleTy) {
3925     default:
3926       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3927       return false;
3928     case MVT::i1:
3929       Mask = 0x1;
3930       break;
3931     case MVT::i8:
3932       Mask = 0xff;
3933       break;
3934     case MVT::i16:
3935       Mask = 0xffff;
3936       break;
3937     }
3938     // Issue an extract_subreg to get the lower 32-bits.
3939     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3940                                                 AArch64::sub_32);
3941     // Create the AND instruction which performs the actual truncation.
3942     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3943     assert(ResultReg && "Unexpected AND instruction emission failure.");
3944   } else {
3945     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3946     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3947             TII.get(TargetOpcode::COPY), ResultReg)
3948         .addReg(SrcReg, getKillRegState(SrcIsKill));
3949   }
3950 
3951   updateValueMap(I, ResultReg);
3952   return true;
3953 }
3954 
emiti1Ext(unsigned SrcReg,MVT DestVT,bool IsZExt)3955 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3956   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3957           DestVT == MVT::i64) &&
3958          "Unexpected value type.");
3959   // Handle i8 and i16 as i32.
3960   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3961     DestVT = MVT::i32;
3962 
3963   if (IsZExt) {
3964     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3965     assert(ResultReg && "Unexpected AND instruction emission failure.");
3966     if (DestVT == MVT::i64) {
3967       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3968       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3969       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3970       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3971               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3972           .addImm(0)
3973           .addReg(ResultReg)
3974           .addImm(AArch64::sub_32);
3975       ResultReg = Reg64;
3976     }
3977     return ResultReg;
3978   } else {
3979     if (DestVT == MVT::i64) {
3980       // FIXME: We're SExt i1 to i64.
3981       return 0;
3982     }
3983     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3984                             /*TODO:IsKill=*/false, 0, 0);
3985   }
3986 }
3987 
emitMul_rr(MVT RetVT,unsigned Op0,bool Op0IsKill,unsigned Op1,bool Op1IsKill)3988 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3989                                       unsigned Op1, bool Op1IsKill) {
3990   unsigned Opc, ZReg;
3991   switch (RetVT.SimpleTy) {
3992   default: return 0;
3993   case MVT::i8:
3994   case MVT::i16:
3995   case MVT::i32:
3996     RetVT = MVT::i32;
3997     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3998   case MVT::i64:
3999     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4000   }
4001 
4002   const TargetRegisterClass *RC =
4003       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4004   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
4005                           /*IsKill=*/ZReg, true);
4006 }
4007 
emitSMULL_rr(MVT RetVT,unsigned Op0,bool Op0IsKill,unsigned Op1,bool Op1IsKill)4008 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4009                                         unsigned Op1, bool Op1IsKill) {
4010   if (RetVT != MVT::i64)
4011     return 0;
4012 
4013   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4014                           Op0, Op0IsKill, Op1, Op1IsKill,
4015                           AArch64::XZR, /*IsKill=*/true);
4016 }
4017 
emitUMULL_rr(MVT RetVT,unsigned Op0,bool Op0IsKill,unsigned Op1,bool Op1IsKill)4018 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4019                                         unsigned Op1, bool Op1IsKill) {
4020   if (RetVT != MVT::i64)
4021     return 0;
4022 
4023   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4024                           Op0, Op0IsKill, Op1, Op1IsKill,
4025                           AArch64::XZR, /*IsKill=*/true);
4026 }
4027 
emitLSL_rr(MVT RetVT,unsigned Op0Reg,bool Op0IsKill,unsigned Op1Reg,bool Op1IsKill)4028 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4029                                      unsigned Op1Reg, bool Op1IsKill) {
4030   unsigned Opc = 0;
4031   bool NeedTrunc = false;
4032   uint64_t Mask = 0;
4033   switch (RetVT.SimpleTy) {
4034   default: return 0;
4035   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4036   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4037   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4038   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4039   }
4040 
4041   const TargetRegisterClass *RC =
4042       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4043   if (NeedTrunc) {
4044     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4045     Op1IsKill = true;
4046   }
4047   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4048                                        Op1IsKill);
4049   if (NeedTrunc)
4050     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4051   return ResultReg;
4052 }
4053 
emitLSL_ri(MVT RetVT,MVT SrcVT,unsigned Op0,bool Op0IsKill,uint64_t Shift,bool IsZExt)4054 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4055                                      bool Op0IsKill, uint64_t Shift,
4056                                      bool IsZExt) {
4057   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4058          "Unexpected source/return type pair.");
4059   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4060           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4061          "Unexpected source value type.");
4062   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4063           RetVT == MVT::i64) && "Unexpected return value type.");
4064 
4065   bool Is64Bit = (RetVT == MVT::i64);
4066   unsigned RegSize = Is64Bit ? 64 : 32;
4067   unsigned DstBits = RetVT.getSizeInBits();
4068   unsigned SrcBits = SrcVT.getSizeInBits();
4069   const TargetRegisterClass *RC =
4070       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4071 
4072   // Just emit a copy for "zero" shifts.
4073   if (Shift == 0) {
4074     if (RetVT == SrcVT) {
4075       unsigned ResultReg = createResultReg(RC);
4076       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4077               TII.get(TargetOpcode::COPY), ResultReg)
4078           .addReg(Op0, getKillRegState(Op0IsKill));
4079       return ResultReg;
4080     } else
4081       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4082   }
4083 
4084   // Don't deal with undefined shifts.
4085   if (Shift >= DstBits)
4086     return 0;
4087 
4088   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4089   // {S|U}BFM Wd, Wn, #r, #s
4090   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4091 
4092   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4093   // %2 = shl i16 %1, 4
4094   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4095   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4096   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4097   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4098 
4099   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4100   // %2 = shl i16 %1, 8
4101   // Wd<32+7-24,32-24> = Wn<7:0>
4102   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4103   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4104   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4105 
4106   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4107   // %2 = shl i16 %1, 12
4108   // Wd<32+3-20,32-20> = Wn<3:0>
4109   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4110   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4111   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4112 
4113   unsigned ImmR = RegSize - Shift;
4114   // Limit the width to the length of the source type.
4115   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4116   static const unsigned OpcTable[2][2] = {
4117     {AArch64::SBFMWri, AArch64::SBFMXri},
4118     {AArch64::UBFMWri, AArch64::UBFMXri}
4119   };
4120   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4121   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4122     unsigned TmpReg = MRI.createVirtualRegister(RC);
4123     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4124             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4125         .addImm(0)
4126         .addReg(Op0, getKillRegState(Op0IsKill))
4127         .addImm(AArch64::sub_32);
4128     Op0 = TmpReg;
4129     Op0IsKill = true;
4130   }
4131   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4132 }
4133 
emitLSR_rr(MVT RetVT,unsigned Op0Reg,bool Op0IsKill,unsigned Op1Reg,bool Op1IsKill)4134 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4135                                      unsigned Op1Reg, bool Op1IsKill) {
4136   unsigned Opc = 0;
4137   bool NeedTrunc = false;
4138   uint64_t Mask = 0;
4139   switch (RetVT.SimpleTy) {
4140   default: return 0;
4141   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4142   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4143   case MVT::i32: Opc = AArch64::LSRVWr; break;
4144   case MVT::i64: Opc = AArch64::LSRVXr; break;
4145   }
4146 
4147   const TargetRegisterClass *RC =
4148       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4149   if (NeedTrunc) {
4150     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4151     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4152     Op0IsKill = Op1IsKill = true;
4153   }
4154   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4155                                        Op1IsKill);
4156   if (NeedTrunc)
4157     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4158   return ResultReg;
4159 }
4160 
emitLSR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,bool Op0IsKill,uint64_t Shift,bool IsZExt)4161 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4162                                      bool Op0IsKill, uint64_t Shift,
4163                                      bool IsZExt) {
4164   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4165          "Unexpected source/return type pair.");
4166   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4167           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4168          "Unexpected source value type.");
4169   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4170           RetVT == MVT::i64) && "Unexpected return value type.");
4171 
4172   bool Is64Bit = (RetVT == MVT::i64);
4173   unsigned RegSize = Is64Bit ? 64 : 32;
4174   unsigned DstBits = RetVT.getSizeInBits();
4175   unsigned SrcBits = SrcVT.getSizeInBits();
4176   const TargetRegisterClass *RC =
4177       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4178 
4179   // Just emit a copy for "zero" shifts.
4180   if (Shift == 0) {
4181     if (RetVT == SrcVT) {
4182       unsigned ResultReg = createResultReg(RC);
4183       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4184               TII.get(TargetOpcode::COPY), ResultReg)
4185       .addReg(Op0, getKillRegState(Op0IsKill));
4186       return ResultReg;
4187     } else
4188       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4189   }
4190 
4191   // Don't deal with undefined shifts.
4192   if (Shift >= DstBits)
4193     return 0;
4194 
4195   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4196   // {S|U}BFM Wd, Wn, #r, #s
4197   // Wd<s-r:0> = Wn<s:r> when r <= s
4198 
4199   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4200   // %2 = lshr i16 %1, 4
4201   // Wd<7-4:0> = Wn<7:4>
4202   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4203   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4204   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4205 
4206   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4207   // %2 = lshr i16 %1, 8
4208   // Wd<7-7,0> = Wn<7:7>
4209   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4210   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4211   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4212 
4213   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4214   // %2 = lshr i16 %1, 12
4215   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4216   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4217   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4218   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4219 
4220   if (Shift >= SrcBits && IsZExt)
4221     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4222 
4223   // It is not possible to fold a sign-extend into the LShr instruction. In this
4224   // case emit a sign-extend.
4225   if (!IsZExt) {
4226     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4227     if (!Op0)
4228       return 0;
4229     Op0IsKill = true;
4230     SrcVT = RetVT;
4231     SrcBits = SrcVT.getSizeInBits();
4232     IsZExt = true;
4233   }
4234 
4235   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4236   unsigned ImmS = SrcBits - 1;
4237   static const unsigned OpcTable[2][2] = {
4238     {AArch64::SBFMWri, AArch64::SBFMXri},
4239     {AArch64::UBFMWri, AArch64::UBFMXri}
4240   };
4241   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4242   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4243     unsigned TmpReg = MRI.createVirtualRegister(RC);
4244     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4245             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4246         .addImm(0)
4247         .addReg(Op0, getKillRegState(Op0IsKill))
4248         .addImm(AArch64::sub_32);
4249     Op0 = TmpReg;
4250     Op0IsKill = true;
4251   }
4252   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4253 }
4254 
emitASR_rr(MVT RetVT,unsigned Op0Reg,bool Op0IsKill,unsigned Op1Reg,bool Op1IsKill)4255 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4256                                      unsigned Op1Reg, bool Op1IsKill) {
4257   unsigned Opc = 0;
4258   bool NeedTrunc = false;
4259   uint64_t Mask = 0;
4260   switch (RetVT.SimpleTy) {
4261   default: return 0;
4262   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4263   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4264   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4265   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4266   }
4267 
4268   const TargetRegisterClass *RC =
4269       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4270   if (NeedTrunc) {
4271     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4272     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4273     Op0IsKill = Op1IsKill = true;
4274   }
4275   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4276                                        Op1IsKill);
4277   if (NeedTrunc)
4278     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4279   return ResultReg;
4280 }
4281 
emitASR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,bool Op0IsKill,uint64_t Shift,bool IsZExt)4282 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4283                                      bool Op0IsKill, uint64_t Shift,
4284                                      bool IsZExt) {
4285   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4286          "Unexpected source/return type pair.");
4287   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4288           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4289          "Unexpected source value type.");
4290   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4291           RetVT == MVT::i64) && "Unexpected return value type.");
4292 
4293   bool Is64Bit = (RetVT == MVT::i64);
4294   unsigned RegSize = Is64Bit ? 64 : 32;
4295   unsigned DstBits = RetVT.getSizeInBits();
4296   unsigned SrcBits = SrcVT.getSizeInBits();
4297   const TargetRegisterClass *RC =
4298       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4299 
4300   // Just emit a copy for "zero" shifts.
4301   if (Shift == 0) {
4302     if (RetVT == SrcVT) {
4303       unsigned ResultReg = createResultReg(RC);
4304       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4305               TII.get(TargetOpcode::COPY), ResultReg)
4306       .addReg(Op0, getKillRegState(Op0IsKill));
4307       return ResultReg;
4308     } else
4309       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4310   }
4311 
4312   // Don't deal with undefined shifts.
4313   if (Shift >= DstBits)
4314     return 0;
4315 
4316   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4317   // {S|U}BFM Wd, Wn, #r, #s
4318   // Wd<s-r:0> = Wn<s:r> when r <= s
4319 
4320   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4321   // %2 = ashr i16 %1, 4
4322   // Wd<7-4:0> = Wn<7:4>
4323   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4324   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4325   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4326 
4327   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4328   // %2 = ashr i16 %1, 8
4329   // Wd<7-7,0> = Wn<7:7>
4330   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4331   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4332   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4333 
4334   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4335   // %2 = ashr i16 %1, 12
4336   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4337   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4338   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4339   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4340 
4341   if (Shift >= SrcBits && IsZExt)
4342     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4343 
4344   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4345   unsigned ImmS = SrcBits - 1;
4346   static const unsigned OpcTable[2][2] = {
4347     {AArch64::SBFMWri, AArch64::SBFMXri},
4348     {AArch64::UBFMWri, AArch64::UBFMXri}
4349   };
4350   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4351   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4352     unsigned TmpReg = MRI.createVirtualRegister(RC);
4353     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4354             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4355         .addImm(0)
4356         .addReg(Op0, getKillRegState(Op0IsKill))
4357         .addImm(AArch64::sub_32);
4358     Op0 = TmpReg;
4359     Op0IsKill = true;
4360   }
4361   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4362 }
4363 
emitIntExt(MVT SrcVT,unsigned SrcReg,MVT DestVT,bool IsZExt)4364 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4365                                      bool IsZExt) {
4366   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4367 
4368   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4369   // DestVT are odd things, so test to make sure that they are both types we can
4370   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4371   // bail out to SelectionDAG.
4372   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4373        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4374       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4375        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4376     return 0;
4377 
4378   unsigned Opc;
4379   unsigned Imm = 0;
4380 
4381   switch (SrcVT.SimpleTy) {
4382   default:
4383     return 0;
4384   case MVT::i1:
4385     return emiti1Ext(SrcReg, DestVT, IsZExt);
4386   case MVT::i8:
4387     if (DestVT == MVT::i64)
4388       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4389     else
4390       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4391     Imm = 7;
4392     break;
4393   case MVT::i16:
4394     if (DestVT == MVT::i64)
4395       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4396     else
4397       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4398     Imm = 15;
4399     break;
4400   case MVT::i32:
4401     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4402     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4403     Imm = 31;
4404     break;
4405   }
4406 
4407   // Handle i8 and i16 as i32.
4408   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4409     DestVT = MVT::i32;
4410   else if (DestVT == MVT::i64) {
4411     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4412     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4413             TII.get(AArch64::SUBREG_TO_REG), Src64)
4414         .addImm(0)
4415         .addReg(SrcReg)
4416         .addImm(AArch64::sub_32);
4417     SrcReg = Src64;
4418   }
4419 
4420   const TargetRegisterClass *RC =
4421       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4422   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4423 }
4424 
isZExtLoad(const MachineInstr * LI)4425 static bool isZExtLoad(const MachineInstr *LI) {
4426   switch (LI->getOpcode()) {
4427   default:
4428     return false;
4429   case AArch64::LDURBBi:
4430   case AArch64::LDURHHi:
4431   case AArch64::LDURWi:
4432   case AArch64::LDRBBui:
4433   case AArch64::LDRHHui:
4434   case AArch64::LDRWui:
4435   case AArch64::LDRBBroX:
4436   case AArch64::LDRHHroX:
4437   case AArch64::LDRWroX:
4438   case AArch64::LDRBBroW:
4439   case AArch64::LDRHHroW:
4440   case AArch64::LDRWroW:
4441     return true;
4442   }
4443 }
4444 
isSExtLoad(const MachineInstr * LI)4445 static bool isSExtLoad(const MachineInstr *LI) {
4446   switch (LI->getOpcode()) {
4447   default:
4448     return false;
4449   case AArch64::LDURSBWi:
4450   case AArch64::LDURSHWi:
4451   case AArch64::LDURSBXi:
4452   case AArch64::LDURSHXi:
4453   case AArch64::LDURSWi:
4454   case AArch64::LDRSBWui:
4455   case AArch64::LDRSHWui:
4456   case AArch64::LDRSBXui:
4457   case AArch64::LDRSHXui:
4458   case AArch64::LDRSWui:
4459   case AArch64::LDRSBWroX:
4460   case AArch64::LDRSHWroX:
4461   case AArch64::LDRSBXroX:
4462   case AArch64::LDRSHXroX:
4463   case AArch64::LDRSWroX:
4464   case AArch64::LDRSBWroW:
4465   case AArch64::LDRSHWroW:
4466   case AArch64::LDRSBXroW:
4467   case AArch64::LDRSHXroW:
4468   case AArch64::LDRSWroW:
4469     return true;
4470   }
4471 }
4472 
optimizeIntExtLoad(const Instruction * I,MVT RetVT,MVT SrcVT)4473 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4474                                          MVT SrcVT) {
4475   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4476   if (!LI || !LI->hasOneUse())
4477     return false;
4478 
4479   // Check if the load instruction has already been selected.
4480   unsigned Reg = lookUpRegForValue(LI);
4481   if (!Reg)
4482     return false;
4483 
4484   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4485   if (!MI)
4486     return false;
4487 
4488   // Check if the correct load instruction has been emitted - SelectionDAG might
4489   // have emitted a zero-extending load, but we need a sign-extending load.
4490   bool IsZExt = isa<ZExtInst>(I);
4491   const auto *LoadMI = MI;
4492   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4493       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4494     unsigned LoadReg = MI->getOperand(1).getReg();
4495     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4496     assert(LoadMI && "Expected valid instruction");
4497   }
4498   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4499     return false;
4500 
4501   // Nothing to be done.
4502   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4503     updateValueMap(I, Reg);
4504     return true;
4505   }
4506 
4507   if (IsZExt) {
4508     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4509     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4510             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4511         .addImm(0)
4512         .addReg(Reg, getKillRegState(true))
4513         .addImm(AArch64::sub_32);
4514     Reg = Reg64;
4515   } else {
4516     assert((MI->getOpcode() == TargetOpcode::COPY &&
4517             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4518            "Expected copy instruction");
4519     Reg = MI->getOperand(1).getReg();
4520     MachineBasicBlock::iterator I(MI);
4521     removeDeadCode(I, std::next(I));
4522   }
4523   updateValueMap(I, Reg);
4524   return true;
4525 }
4526 
selectIntExt(const Instruction * I)4527 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4528   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4529          "Unexpected integer extend instruction.");
4530   MVT RetVT;
4531   MVT SrcVT;
4532   if (!isTypeSupported(I->getType(), RetVT))
4533     return false;
4534 
4535   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4536     return false;
4537 
4538   // Try to optimize already sign-/zero-extended values from load instructions.
4539   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4540     return true;
4541 
4542   unsigned SrcReg = getRegForValue(I->getOperand(0));
4543   if (!SrcReg)
4544     return false;
4545   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4546 
4547   // Try to optimize already sign-/zero-extended values from function arguments.
4548   bool IsZExt = isa<ZExtInst>(I);
4549   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4550     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4551       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4552         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4553         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4554                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4555             .addImm(0)
4556             .addReg(SrcReg, getKillRegState(SrcIsKill))
4557             .addImm(AArch64::sub_32);
4558         SrcReg = ResultReg;
4559       }
4560       // Conservatively clear all kill flags from all uses, because we are
4561       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4562       // level. The result of the instruction at IR level might have been
4563       // trivially dead, which is now not longer true.
4564       unsigned UseReg = lookUpRegForValue(I);
4565       if (UseReg)
4566         MRI.clearKillFlags(UseReg);
4567 
4568       updateValueMap(I, SrcReg);
4569       return true;
4570     }
4571   }
4572 
4573   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4574   if (!ResultReg)
4575     return false;
4576 
4577   updateValueMap(I, ResultReg);
4578   return true;
4579 }
4580 
selectRem(const Instruction * I,unsigned ISDOpcode)4581 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4582   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4583   if (!DestEVT.isSimple())
4584     return false;
4585 
4586   MVT DestVT = DestEVT.getSimpleVT();
4587   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4588     return false;
4589 
4590   unsigned DivOpc;
4591   bool Is64bit = (DestVT == MVT::i64);
4592   switch (ISDOpcode) {
4593   default:
4594     return false;
4595   case ISD::SREM:
4596     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4597     break;
4598   case ISD::UREM:
4599     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4600     break;
4601   }
4602   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4603   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4604   if (!Src0Reg)
4605     return false;
4606   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4607 
4608   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4609   if (!Src1Reg)
4610     return false;
4611   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4612 
4613   const TargetRegisterClass *RC =
4614       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4615   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4616                                      Src1Reg, /*IsKill=*/false);
4617   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4618   // The remainder is computed as numerator - (quotient * denominator) using the
4619   // MSUB instruction.
4620   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4621                                         Src1Reg, Src1IsKill, Src0Reg,
4622                                         Src0IsKill);
4623   updateValueMap(I, ResultReg);
4624   return true;
4625 }
4626 
selectMul(const Instruction * I)4627 bool AArch64FastISel::selectMul(const Instruction *I) {
4628   MVT VT;
4629   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4630     return false;
4631 
4632   if (VT.isVector())
4633     return selectBinaryOp(I, ISD::MUL);
4634 
4635   const Value *Src0 = I->getOperand(0);
4636   const Value *Src1 = I->getOperand(1);
4637   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4638     if (C->getValue().isPowerOf2())
4639       std::swap(Src0, Src1);
4640 
4641   // Try to simplify to a shift instruction.
4642   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4643     if (C->getValue().isPowerOf2()) {
4644       uint64_t ShiftVal = C->getValue().logBase2();
4645       MVT SrcVT = VT;
4646       bool IsZExt = true;
4647       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4648         if (!isIntExtFree(ZExt)) {
4649           MVT VT;
4650           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4651             SrcVT = VT;
4652             IsZExt = true;
4653             Src0 = ZExt->getOperand(0);
4654           }
4655         }
4656       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4657         if (!isIntExtFree(SExt)) {
4658           MVT VT;
4659           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4660             SrcVT = VT;
4661             IsZExt = false;
4662             Src0 = SExt->getOperand(0);
4663           }
4664         }
4665       }
4666 
4667       unsigned Src0Reg = getRegForValue(Src0);
4668       if (!Src0Reg)
4669         return false;
4670       bool Src0IsKill = hasTrivialKill(Src0);
4671 
4672       unsigned ResultReg =
4673           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4674 
4675       if (ResultReg) {
4676         updateValueMap(I, ResultReg);
4677         return true;
4678       }
4679     }
4680 
4681   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4682   if (!Src0Reg)
4683     return false;
4684   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4685 
4686   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4687   if (!Src1Reg)
4688     return false;
4689   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4690 
4691   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4692 
4693   if (!ResultReg)
4694     return false;
4695 
4696   updateValueMap(I, ResultReg);
4697   return true;
4698 }
4699 
selectShift(const Instruction * I)4700 bool AArch64FastISel::selectShift(const Instruction *I) {
4701   MVT RetVT;
4702   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4703     return false;
4704 
4705   if (RetVT.isVector())
4706     return selectOperator(I, I->getOpcode());
4707 
4708   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4709     unsigned ResultReg = 0;
4710     uint64_t ShiftVal = C->getZExtValue();
4711     MVT SrcVT = RetVT;
4712     bool IsZExt = I->getOpcode() != Instruction::AShr;
4713     const Value *Op0 = I->getOperand(0);
4714     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4715       if (!isIntExtFree(ZExt)) {
4716         MVT TmpVT;
4717         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4718           SrcVT = TmpVT;
4719           IsZExt = true;
4720           Op0 = ZExt->getOperand(0);
4721         }
4722       }
4723     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4724       if (!isIntExtFree(SExt)) {
4725         MVT TmpVT;
4726         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4727           SrcVT = TmpVT;
4728           IsZExt = false;
4729           Op0 = SExt->getOperand(0);
4730         }
4731       }
4732     }
4733 
4734     unsigned Op0Reg = getRegForValue(Op0);
4735     if (!Op0Reg)
4736       return false;
4737     bool Op0IsKill = hasTrivialKill(Op0);
4738 
4739     switch (I->getOpcode()) {
4740     default: llvm_unreachable("Unexpected instruction.");
4741     case Instruction::Shl:
4742       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4743       break;
4744     case Instruction::AShr:
4745       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4746       break;
4747     case Instruction::LShr:
4748       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4749       break;
4750     }
4751     if (!ResultReg)
4752       return false;
4753 
4754     updateValueMap(I, ResultReg);
4755     return true;
4756   }
4757 
4758   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4759   if (!Op0Reg)
4760     return false;
4761   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4762 
4763   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4764   if (!Op1Reg)
4765     return false;
4766   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4767 
4768   unsigned ResultReg = 0;
4769   switch (I->getOpcode()) {
4770   default: llvm_unreachable("Unexpected instruction.");
4771   case Instruction::Shl:
4772     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4773     break;
4774   case Instruction::AShr:
4775     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4776     break;
4777   case Instruction::LShr:
4778     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4779     break;
4780   }
4781 
4782   if (!ResultReg)
4783     return false;
4784 
4785   updateValueMap(I, ResultReg);
4786   return true;
4787 }
4788 
selectBitCast(const Instruction * I)4789 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4790   MVT RetVT, SrcVT;
4791 
4792   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4793     return false;
4794   if (!isTypeLegal(I->getType(), RetVT))
4795     return false;
4796 
4797   unsigned Opc;
4798   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4799     Opc = AArch64::FMOVWSr;
4800   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4801     Opc = AArch64::FMOVXDr;
4802   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4803     Opc = AArch64::FMOVSWr;
4804   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4805     Opc = AArch64::FMOVDXr;
4806   else
4807     return false;
4808 
4809   const TargetRegisterClass *RC = nullptr;
4810   switch (RetVT.SimpleTy) {
4811   default: llvm_unreachable("Unexpected value type.");
4812   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4813   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4814   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4815   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4816   }
4817   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4818   if (!Op0Reg)
4819     return false;
4820   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4821   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4822 
4823   if (!ResultReg)
4824     return false;
4825 
4826   updateValueMap(I, ResultReg);
4827   return true;
4828 }
4829 
selectFRem(const Instruction * I)4830 bool AArch64FastISel::selectFRem(const Instruction *I) {
4831   MVT RetVT;
4832   if (!isTypeLegal(I->getType(), RetVT))
4833     return false;
4834 
4835   RTLIB::Libcall LC;
4836   switch (RetVT.SimpleTy) {
4837   default:
4838     return false;
4839   case MVT::f32:
4840     LC = RTLIB::REM_F32;
4841     break;
4842   case MVT::f64:
4843     LC = RTLIB::REM_F64;
4844     break;
4845   }
4846 
4847   ArgListTy Args;
4848   Args.reserve(I->getNumOperands());
4849 
4850   // Populate the argument list.
4851   for (auto &Arg : I->operands()) {
4852     ArgListEntry Entry;
4853     Entry.Val = Arg;
4854     Entry.Ty = Arg->getType();
4855     Args.push_back(Entry);
4856   }
4857 
4858   CallLoweringInfo CLI;
4859   MCContext &Ctx = MF->getContext();
4860   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4861                 TLI.getLibcallName(LC), std::move(Args));
4862   if (!lowerCallTo(CLI))
4863     return false;
4864   updateValueMap(I, CLI.ResultReg);
4865   return true;
4866 }
4867 
selectSDiv(const Instruction * I)4868 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4869   MVT VT;
4870   if (!isTypeLegal(I->getType(), VT))
4871     return false;
4872 
4873   if (!isa<ConstantInt>(I->getOperand(1)))
4874     return selectBinaryOp(I, ISD::SDIV);
4875 
4876   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4877   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4878       !(C.isPowerOf2() || (-C).isPowerOf2()))
4879     return selectBinaryOp(I, ISD::SDIV);
4880 
4881   unsigned Lg2 = C.countTrailingZeros();
4882   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4883   if (!Src0Reg)
4884     return false;
4885   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4886 
4887   if (cast<BinaryOperator>(I)->isExact()) {
4888     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4889     if (!ResultReg)
4890       return false;
4891     updateValueMap(I, ResultReg);
4892     return true;
4893   }
4894 
4895   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4896   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4897   if (!AddReg)
4898     return false;
4899 
4900   // (Src0 < 0) ? Pow2 - 1 : 0;
4901   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4902     return false;
4903 
4904   unsigned SelectOpc;
4905   const TargetRegisterClass *RC;
4906   if (VT == MVT::i64) {
4907     SelectOpc = AArch64::CSELXr;
4908     RC = &AArch64::GPR64RegClass;
4909   } else {
4910     SelectOpc = AArch64::CSELWr;
4911     RC = &AArch64::GPR32RegClass;
4912   }
4913   unsigned SelectReg =
4914       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4915                        Src0IsKill, AArch64CC::LT);
4916   if (!SelectReg)
4917     return false;
4918 
4919   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4920   // negate the result.
4921   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4922   unsigned ResultReg;
4923   if (C.isNegative())
4924     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4925                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4926   else
4927     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4928 
4929   if (!ResultReg)
4930     return false;
4931 
4932   updateValueMap(I, ResultReg);
4933   return true;
4934 }
4935 
4936 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4937 /// have to duplicate it for AArch64, because otherwise we would fail during the
4938 /// sign-extend emission.
getRegForGEPIndex(const Value * Idx)4939 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4940   unsigned IdxN = getRegForValue(Idx);
4941   if (IdxN == 0)
4942     // Unhandled operand. Halt "fast" selection and bail.
4943     return std::pair<unsigned, bool>(0, false);
4944 
4945   bool IdxNIsKill = hasTrivialKill(Idx);
4946 
4947   // If the index is smaller or larger than intptr_t, truncate or extend it.
4948   MVT PtrVT = TLI.getPointerTy(DL);
4949   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4950   if (IdxVT.bitsLT(PtrVT)) {
4951     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4952     IdxNIsKill = true;
4953   } else if (IdxVT.bitsGT(PtrVT))
4954     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4955   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4956 }
4957 
4958 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4959 /// duplicate it for AArch64, because otherwise we would bail out even for
4960 /// simple cases. This is because the standard fastEmit functions don't cover
4961 /// MUL at all and ADD is lowered very inefficientily.
selectGetElementPtr(const Instruction * I)4962 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4963   unsigned N = getRegForValue(I->getOperand(0));
4964   if (!N)
4965     return false;
4966   bool NIsKill = hasTrivialKill(I->getOperand(0));
4967 
4968   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4969   // into a single N = N + TotalOffset.
4970   uint64_t TotalOffs = 0;
4971   MVT VT = TLI.getPointerTy(DL);
4972   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4973        GTI != E; ++GTI) {
4974     const Value *Idx = GTI.getOperand();
4975     if (auto *StTy = GTI.getStructTypeOrNull()) {
4976       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4977       // N = N + Offset
4978       if (Field)
4979         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4980     } else {
4981       Type *Ty = GTI.getIndexedType();
4982 
4983       // If this is a constant subscript, handle it quickly.
4984       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4985         if (CI->isZero())
4986           continue;
4987         // N = N + Offset
4988         TotalOffs +=
4989             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4990         continue;
4991       }
4992       if (TotalOffs) {
4993         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4994         if (!N)
4995           return false;
4996         NIsKill = true;
4997         TotalOffs = 0;
4998       }
4999 
5000       // N = N + Idx * ElementSize;
5001       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5002       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
5003       unsigned IdxN = Pair.first;
5004       bool IdxNIsKill = Pair.second;
5005       if (!IdxN)
5006         return false;
5007 
5008       if (ElementSize != 1) {
5009         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5010         if (!C)
5011           return false;
5012         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5013         if (!IdxN)
5014           return false;
5015         IdxNIsKill = true;
5016       }
5017       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5018       if (!N)
5019         return false;
5020     }
5021   }
5022   if (TotalOffs) {
5023     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5024     if (!N)
5025       return false;
5026   }
5027   updateValueMap(I, N);
5028   return true;
5029 }
5030 
selectAtomicCmpXchg(const AtomicCmpXchgInst * I)5031 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5032   assert(TM.getOptLevel() == CodeGenOpt::None &&
5033          "cmpxchg survived AtomicExpand at optlevel > -O0");
5034 
5035   auto *RetPairTy = cast<StructType>(I->getType());
5036   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5037   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5038          "cmpxchg has a non-i1 status result");
5039 
5040   MVT VT;
5041   if (!isTypeLegal(RetTy, VT))
5042     return false;
5043 
5044   const TargetRegisterClass *ResRC;
5045   unsigned Opc, CmpOpc;
5046   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5047   // extractvalue selection doesn't support that.
5048   if (VT == MVT::i32) {
5049     Opc = AArch64::CMP_SWAP_32;
5050     CmpOpc = AArch64::SUBSWrs;
5051     ResRC = &AArch64::GPR32RegClass;
5052   } else if (VT == MVT::i64) {
5053     Opc = AArch64::CMP_SWAP_64;
5054     CmpOpc = AArch64::SUBSXrs;
5055     ResRC = &AArch64::GPR64RegClass;
5056   } else {
5057     return false;
5058   }
5059 
5060   const MCInstrDesc &II = TII.get(Opc);
5061 
5062   const unsigned AddrReg = constrainOperandRegClass(
5063       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5064   const unsigned DesiredReg = constrainOperandRegClass(
5065       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5066   const unsigned NewReg = constrainOperandRegClass(
5067       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5068 
5069   const unsigned ResultReg1 = createResultReg(ResRC);
5070   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5071   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5072 
5073   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5074   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5075       .addDef(ResultReg1)
5076       .addDef(ScratchReg)
5077       .addUse(AddrReg)
5078       .addUse(DesiredReg)
5079       .addUse(NewReg);
5080 
5081   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5082       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5083       .addUse(ResultReg1)
5084       .addUse(DesiredReg)
5085       .addImm(0);
5086 
5087   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5088       .addDef(ResultReg2)
5089       .addUse(AArch64::WZR)
5090       .addUse(AArch64::WZR)
5091       .addImm(AArch64CC::NE);
5092 
5093   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5094   updateValueMap(I, ResultReg1, 2);
5095   return true;
5096 }
5097 
fastSelectInstruction(const Instruction * I)5098 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5099   switch (I->getOpcode()) {
5100   default:
5101     break;
5102   case Instruction::Add:
5103   case Instruction::Sub:
5104     return selectAddSub(I);
5105   case Instruction::Mul:
5106     return selectMul(I);
5107   case Instruction::SDiv:
5108     return selectSDiv(I);
5109   case Instruction::SRem:
5110     if (!selectBinaryOp(I, ISD::SREM))
5111       return selectRem(I, ISD::SREM);
5112     return true;
5113   case Instruction::URem:
5114     if (!selectBinaryOp(I, ISD::UREM))
5115       return selectRem(I, ISD::UREM);
5116     return true;
5117   case Instruction::Shl:
5118   case Instruction::LShr:
5119   case Instruction::AShr:
5120     return selectShift(I);
5121   case Instruction::And:
5122   case Instruction::Or:
5123   case Instruction::Xor:
5124     return selectLogicalOp(I);
5125   case Instruction::Br:
5126     return selectBranch(I);
5127   case Instruction::IndirectBr:
5128     return selectIndirectBr(I);
5129   case Instruction::BitCast:
5130     if (!FastISel::selectBitCast(I))
5131       return selectBitCast(I);
5132     return true;
5133   case Instruction::FPToSI:
5134     if (!selectCast(I, ISD::FP_TO_SINT))
5135       return selectFPToInt(I, /*Signed=*/true);
5136     return true;
5137   case Instruction::FPToUI:
5138     return selectFPToInt(I, /*Signed=*/false);
5139   case Instruction::ZExt:
5140   case Instruction::SExt:
5141     return selectIntExt(I);
5142   case Instruction::Trunc:
5143     if (!selectCast(I, ISD::TRUNCATE))
5144       return selectTrunc(I);
5145     return true;
5146   case Instruction::FPExt:
5147     return selectFPExt(I);
5148   case Instruction::FPTrunc:
5149     return selectFPTrunc(I);
5150   case Instruction::SIToFP:
5151     if (!selectCast(I, ISD::SINT_TO_FP))
5152       return selectIntToFP(I, /*Signed=*/true);
5153     return true;
5154   case Instruction::UIToFP:
5155     return selectIntToFP(I, /*Signed=*/false);
5156   case Instruction::Load:
5157     return selectLoad(I);
5158   case Instruction::Store:
5159     return selectStore(I);
5160   case Instruction::FCmp:
5161   case Instruction::ICmp:
5162     return selectCmp(I);
5163   case Instruction::Select:
5164     return selectSelect(I);
5165   case Instruction::Ret:
5166     return selectRet(I);
5167   case Instruction::FRem:
5168     return selectFRem(I);
5169   case Instruction::GetElementPtr:
5170     return selectGetElementPtr(I);
5171   case Instruction::AtomicCmpXchg:
5172     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5173   }
5174 
5175   // Silence warnings.
5176   (void)&CC_AArch64_DarwinPCS_VarArg;
5177   (void)&CC_AArch64_Win64_VarArg;
5178 
5179   // fall-back to target-independent instruction selection.
5180   return selectOperator(I, I->getOpcode());
5181 }
5182 
5183 namespace llvm {
5184 
createFastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)5185 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5186                                         const TargetLibraryInfo *LibInfo) {
5187   return new AArch64FastISel(FuncInfo, LibInfo);
5188 }
5189 
5190 } // end namespace llvm
5191