1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
16 #include "AArch64CallingConvention.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "MCTargetDesc/AArch64AddressingModes.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/Analysis/BranchProbabilityInfo.h"
26 #include "llvm/CodeGen/CallingConvLower.h"
27 #include "llvm/CodeGen/FastISel.h"
28 #include "llvm/CodeGen/FunctionLoweringInfo.h"
29 #include "llvm/CodeGen/ISDOpcodes.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineConstantPool.h"
32 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/MachineInstr.h"
34 #include "llvm/CodeGen/MachineInstrBuilder.h"
35 #include "llvm/CodeGen/MachineMemOperand.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/RuntimeLibcalls.h"
38 #include "llvm/CodeGen/ValueTypes.h"
39 #include "llvm/IR/Argument.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/Constant.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DerivedTypes.h"
47 #include "llvm/IR/Function.h"
48 #include "llvm/IR/GetElementPtrTypeIterator.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Intrinsics.h"
55 #include "llvm/IR/Operator.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/User.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/MC/MCInstrDesc.h"
60 #include "llvm/MC/MCRegisterInfo.h"
61 #include "llvm/MC/MCSymbol.h"
62 #include "llvm/Support/AtomicOrdering.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CodeGen.h"
65 #include "llvm/Support/Compiler.h"
66 #include "llvm/Support/ErrorHandling.h"
67 #include "llvm/Support/MachineValueType.h"
68 #include "llvm/Support/MathExtras.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 namespace {
78 
79 class AArch64FastISel final : public FastISel {
80   class Address {
81   public:
82     using BaseKind = enum {
83       RegBase,
84       FrameIndexBase
85     };
86 
87   private:
88     BaseKind Kind = RegBase;
89     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
90     union {
91       unsigned Reg;
92       int FI;
93     } Base;
94     unsigned OffsetReg = 0;
95     unsigned Shift = 0;
96     int64_t Offset = 0;
97     const GlobalValue *GV = nullptr;
98 
99   public:
100     Address() { Base.Reg = 0; }
101 
102     void setKind(BaseKind K) { Kind = K; }
103     BaseKind getKind() const { return Kind; }
104     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
105     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
106     bool isRegBase() const { return Kind == RegBase; }
107     bool isFIBase() const { return Kind == FrameIndexBase; }
108 
109     void setReg(unsigned Reg) {
110       assert(isRegBase() && "Invalid base register access!");
111       Base.Reg = Reg;
112     }
113 
114     unsigned getReg() const {
115       assert(isRegBase() && "Invalid base register access!");
116       return Base.Reg;
117     }
118 
119     void setOffsetReg(unsigned Reg) {
120       OffsetReg = Reg;
121     }
122 
123     unsigned getOffsetReg() const {
124       return OffsetReg;
125     }
126 
127     void setFI(unsigned FI) {
128       assert(isFIBase() && "Invalid base frame index  access!");
129       Base.FI = FI;
130     }
131 
132     unsigned getFI() const {
133       assert(isFIBase() && "Invalid base frame index access!");
134       return Base.FI;
135     }
136 
137     void setOffset(int64_t O) { Offset = O; }
138     int64_t getOffset() { return Offset; }
139     void setShift(unsigned S) { Shift = S; }
140     unsigned getShift() { return Shift; }
141 
142     void setGlobalValue(const GlobalValue *G) { GV = G; }
143     const GlobalValue *getGlobalValue() { return GV; }
144   };
145 
146   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147   /// make the right decision when generating code for different targets.
148   const AArch64Subtarget *Subtarget;
149   LLVMContext *Context;
150 
151   bool fastLowerArguments() override;
152   bool fastLowerCall(CallLoweringInfo &CLI) override;
153   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
154 
155 private:
156   // Selection routines.
157   bool selectAddSub(const Instruction *I);
158   bool selectLogicalOp(const Instruction *I);
159   bool selectLoad(const Instruction *I);
160   bool selectStore(const Instruction *I);
161   bool selectBranch(const Instruction *I);
162   bool selectIndirectBr(const Instruction *I);
163   bool selectCmp(const Instruction *I);
164   bool selectSelect(const Instruction *I);
165   bool selectFPExt(const Instruction *I);
166   bool selectFPTrunc(const Instruction *I);
167   bool selectFPToInt(const Instruction *I, bool Signed);
168   bool selectIntToFP(const Instruction *I, bool Signed);
169   bool selectRem(const Instruction *I, unsigned ISDOpcode);
170   bool selectRet(const Instruction *I);
171   bool selectTrunc(const Instruction *I);
172   bool selectIntExt(const Instruction *I);
173   bool selectMul(const Instruction *I);
174   bool selectShift(const Instruction *I);
175   bool selectBitCast(const Instruction *I);
176   bool selectFRem(const Instruction *I);
177   bool selectSDiv(const Instruction *I);
178   bool selectGetElementPtr(const Instruction *I);
179   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
180 
181   // Utility helper routines.
182   bool isTypeLegal(Type *Ty, MVT &VT);
183   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184   bool isValueAvailable(const Value *V) const;
185   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186   bool computeCallAddress(const Value *V, Address &Addr);
187   bool simplifyAddress(Address &Addr, MVT VT);
188   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
189                             MachineMemOperand::Flags Flags,
190                             unsigned ScaleFactor, MachineMemOperand *MMO);
191   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193                           unsigned Alignment);
194   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195                          const Value *Cond);
196   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197   bool optimizeSelect(const SelectInst *SI);
198   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
199 
200   // Emit helper routines.
201   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202                       const Value *RHS, bool SetFlags = false,
203                       bool WantResult = true,  bool IsZExt = false);
204   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
206                          bool SetFlags = false, bool WantResult = true);
207   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
209                          bool WantResult = true);
210   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
212                          AArch64_AM::ShiftExtendType ShiftType,
213                          uint64_t ShiftImm, bool SetFlags = false,
214                          bool WantResult = true);
215   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
217                           AArch64_AM::ShiftExtendType ExtType,
218                           uint64_t ShiftImm, bool SetFlags = false,
219                          bool WantResult = true);
220 
221   // Emit functions.
222   bool emitCompareAndBranch(const BranchInst *BI);
223   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
226   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228                     MachineMemOperand *MMO = nullptr);
229   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230                  MachineMemOperand *MMO = nullptr);
231   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232                         MachineMemOperand *MMO = nullptr);
233   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236                    bool SetFlags = false, bool WantResult = true,
237                    bool IsZExt = false);
238   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
239   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240                    bool SetFlags = false, bool WantResult = true,
241                    bool IsZExt = false);
242   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
243                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
244   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
245                        unsigned RHSReg, bool RHSIsKill,
246                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247                        bool WantResult = true);
248   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249                          const Value *RHS);
250   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251                             bool LHSIsKill, uint64_t Imm);
252   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
254                             uint64_t ShiftImm);
255   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
256   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
257                       unsigned Op1, bool Op1IsKill);
258   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
259                         unsigned Op1, bool Op1IsKill);
260   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
261                         unsigned Op1, bool Op1IsKill);
262   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
263                       unsigned Op1Reg, bool Op1IsKill);
264   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
265                       uint64_t Imm, bool IsZExt = true);
266   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
267                       unsigned Op1Reg, bool Op1IsKill);
268   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
269                       uint64_t Imm, bool IsZExt = true);
270   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
271                       unsigned Op1Reg, bool Op1IsKill);
272   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
273                       uint64_t Imm, bool IsZExt = false);
274 
275   unsigned materializeInt(const ConstantInt *CI, MVT VT);
276   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
277   unsigned materializeGV(const GlobalValue *GV);
278 
279   // Call handling routines.
280 private:
281   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
282   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
283                        unsigned &NumBytes);
284   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
285 
286 public:
287   // Backend specific FastISel code.
288   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
289   unsigned fastMaterializeConstant(const Constant *C) override;
290   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
291 
292   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
293                            const TargetLibraryInfo *LibInfo)
294       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
295     Subtarget =
296         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
297     Context = &FuncInfo.Fn->getContext();
298   }
299 
300   bool fastSelectInstruction(const Instruction *I) override;
301 
302 #include "AArch64GenFastISel.inc"
303 };
304 
305 } // end anonymous namespace
306 
307 /// Check if the sign-/zero-extend will be a noop.
308 static bool isIntExtFree(const Instruction *I) {
309   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
310          "Unexpected integer extend instruction.");
311   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
312          "Unexpected value type.");
313   bool IsZExt = isa<ZExtInst>(I);
314 
315   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
316     if (LI->hasOneUse())
317       return true;
318 
319   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
320     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
321       return true;
322 
323   return false;
324 }
325 
326 /// Determine the implicit scale factor that is applied by a memory
327 /// operation for a given value type.
328 static unsigned getImplicitScaleFactor(MVT VT) {
329   switch (VT.SimpleTy) {
330   default:
331     return 0;    // invalid
332   case MVT::i1:  // fall-through
333   case MVT::i8:
334     return 1;
335   case MVT::i16:
336     return 2;
337   case MVT::i32: // fall-through
338   case MVT::f32:
339     return 4;
340   case MVT::i64: // fall-through
341   case MVT::f64:
342     return 8;
343   }
344 }
345 
346 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
347   if (CC == CallingConv::WebKit_JS)
348     return CC_AArch64_WebKit_JS;
349   if (CC == CallingConv::GHC)
350     return CC_AArch64_GHC;
351   if (CC == CallingConv::CFGuard_Check)
352     return CC_AArch64_Win64_CFGuard_Check;
353   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
354 }
355 
356 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
357   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
358          "Alloca should always return a pointer.");
359 
360   // Don't handle dynamic allocas.
361   if (!FuncInfo.StaticAllocaMap.count(AI))
362     return 0;
363 
364   DenseMap<const AllocaInst *, int>::iterator SI =
365       FuncInfo.StaticAllocaMap.find(AI);
366 
367   if (SI != FuncInfo.StaticAllocaMap.end()) {
368     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
369     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
370             ResultReg)
371         .addFrameIndex(SI->second)
372         .addImm(0)
373         .addImm(0);
374     return ResultReg;
375   }
376 
377   return 0;
378 }
379 
380 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
381   if (VT > MVT::i64)
382     return 0;
383 
384   if (!CI->isZero())
385     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
386 
387   // Create a copy from the zero register to materialize a "0" value.
388   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
389                                                    : &AArch64::GPR32RegClass;
390   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
391   unsigned ResultReg = createResultReg(RC);
392   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
393           ResultReg).addReg(ZeroReg, getKillRegState(true));
394   return ResultReg;
395 }
396 
397 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
398   // Positive zero (+0.0) has to be materialized with a fmov from the zero
399   // register, because the immediate version of fmov cannot encode zero.
400   if (CFP->isNullValue())
401     return fastMaterializeFloatZero(CFP);
402 
403   if (VT != MVT::f32 && VT != MVT::f64)
404     return 0;
405 
406   const APFloat Val = CFP->getValueAPF();
407   bool Is64Bit = (VT == MVT::f64);
408   // This checks to see if we can use FMOV instructions to materialize
409   // a constant, otherwise we have to materialize via the constant pool.
410   int Imm =
411       Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
412   if (Imm != -1) {
413     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
414     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
415   }
416 
417   // For the MachO large code model materialize the FP constant in code.
418   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
419     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
420     const TargetRegisterClass *RC = Is64Bit ?
421         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
422 
423     unsigned TmpReg = createResultReg(RC);
424     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
425         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
426 
427     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
428     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
429             TII.get(TargetOpcode::COPY), ResultReg)
430         .addReg(TmpReg, getKillRegState(true));
431 
432     return ResultReg;
433   }
434 
435   // Materialize via constant pool.  MachineConstantPool wants an explicit
436   // alignment.
437   Align Alignment = DL.getPrefTypeAlign(CFP->getType());
438 
439   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
440   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
441   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
442           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
443 
444   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
445   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
446   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
447       .addReg(ADRPReg)
448       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
449   return ResultReg;
450 }
451 
452 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
453   // We can't handle thread-local variables quickly yet.
454   if (GV->isThreadLocal())
455     return 0;
456 
457   // MachO still uses GOT for large code-model accesses, but ELF requires
458   // movz/movk sequences, which FastISel doesn't handle yet.
459   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
460     return 0;
461 
462   unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
463 
464   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
465   if (!DestEVT.isSimple())
466     return 0;
467 
468   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
469   unsigned ResultReg;
470 
471   if (OpFlags & AArch64II::MO_GOT) {
472     // ADRP + LDRX
473     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
474             ADRPReg)
475         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
476 
477     unsigned LdrOpc;
478     if (Subtarget->isTargetILP32()) {
479       ResultReg = createResultReg(&AArch64::GPR32RegClass);
480       LdrOpc = AArch64::LDRWui;
481     } else {
482       ResultReg = createResultReg(&AArch64::GPR64RegClass);
483       LdrOpc = AArch64::LDRXui;
484     }
485     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
486             ResultReg)
487       .addReg(ADRPReg)
488       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
489                         AArch64II::MO_NC | OpFlags);
490     if (!Subtarget->isTargetILP32())
491       return ResultReg;
492 
493     // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
494     // so we must extend the result on ILP32.
495     unsigned Result64 = createResultReg(&AArch64::GPR64RegClass);
496     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
497             TII.get(TargetOpcode::SUBREG_TO_REG))
498         .addDef(Result64)
499         .addImm(0)
500         .addReg(ResultReg, RegState::Kill)
501         .addImm(AArch64::sub_32);
502     return Result64;
503   } else {
504     // ADRP + ADDX
505     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
506             ADRPReg)
507         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
508 
509     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
510     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
511             ResultReg)
512         .addReg(ADRPReg)
513         .addGlobalAddress(GV, 0,
514                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
515         .addImm(0);
516   }
517   return ResultReg;
518 }
519 
520 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
521   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
522 
523   // Only handle simple types.
524   if (!CEVT.isSimple())
525     return 0;
526   MVT VT = CEVT.getSimpleVT();
527   // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
528   // 'null' pointers need to have a somewhat special treatment.
529   if (isa<ConstantPointerNull>(C)) {
530     assert(VT == MVT::i64 && "Expected 64-bit pointers");
531     return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
532   }
533 
534   if (const auto *CI = dyn_cast<ConstantInt>(C))
535     return materializeInt(CI, VT);
536   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
537     return materializeFP(CFP, VT);
538   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
539     return materializeGV(GV);
540 
541   return 0;
542 }
543 
544 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
545   assert(CFP->isNullValue() &&
546          "Floating-point constant is not a positive zero.");
547   MVT VT;
548   if (!isTypeLegal(CFP->getType(), VT))
549     return 0;
550 
551   if (VT != MVT::f32 && VT != MVT::f64)
552     return 0;
553 
554   bool Is64Bit = (VT == MVT::f64);
555   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
556   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
557   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
558 }
559 
560 /// Check if the multiply is by a power-of-2 constant.
561 static bool isMulPowOf2(const Value *I) {
562   if (const auto *MI = dyn_cast<MulOperator>(I)) {
563     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
564       if (C->getValue().isPowerOf2())
565         return true;
566     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
567       if (C->getValue().isPowerOf2())
568         return true;
569   }
570   return false;
571 }
572 
573 // Computes the address to get to an object.
574 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
575 {
576   const User *U = nullptr;
577   unsigned Opcode = Instruction::UserOp1;
578   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
579     // Don't walk into other basic blocks unless the object is an alloca from
580     // another block, otherwise it may not have a virtual register assigned.
581     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
582         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
583       Opcode = I->getOpcode();
584       U = I;
585     }
586   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
587     Opcode = C->getOpcode();
588     U = C;
589   }
590 
591   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
592     if (Ty->getAddressSpace() > 255)
593       // Fast instruction selection doesn't support the special
594       // address spaces.
595       return false;
596 
597   switch (Opcode) {
598   default:
599     break;
600   case Instruction::BitCast:
601     // Look through bitcasts.
602     return computeAddress(U->getOperand(0), Addr, Ty);
603 
604   case Instruction::IntToPtr:
605     // Look past no-op inttoptrs.
606     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
607         TLI.getPointerTy(DL))
608       return computeAddress(U->getOperand(0), Addr, Ty);
609     break;
610 
611   case Instruction::PtrToInt:
612     // Look past no-op ptrtoints.
613     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
614       return computeAddress(U->getOperand(0), Addr, Ty);
615     break;
616 
617   case Instruction::GetElementPtr: {
618     Address SavedAddr = Addr;
619     uint64_t TmpOffset = Addr.getOffset();
620 
621     // Iterate through the GEP folding the constants into offsets where
622     // we can.
623     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
624          GTI != E; ++GTI) {
625       const Value *Op = GTI.getOperand();
626       if (StructType *STy = GTI.getStructTypeOrNull()) {
627         const StructLayout *SL = DL.getStructLayout(STy);
628         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
629         TmpOffset += SL->getElementOffset(Idx);
630       } else {
631         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
632         while (true) {
633           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
634             // Constant-offset addressing.
635             TmpOffset += CI->getSExtValue() * S;
636             break;
637           }
638           if (canFoldAddIntoGEP(U, Op)) {
639             // A compatible add with a constant operand. Fold the constant.
640             ConstantInt *CI =
641                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
642             TmpOffset += CI->getSExtValue() * S;
643             // Iterate on the other operand.
644             Op = cast<AddOperator>(Op)->getOperand(0);
645             continue;
646           }
647           // Unsupported
648           goto unsupported_gep;
649         }
650       }
651     }
652 
653     // Try to grab the base operand now.
654     Addr.setOffset(TmpOffset);
655     if (computeAddress(U->getOperand(0), Addr, Ty))
656       return true;
657 
658     // We failed, restore everything and try the other options.
659     Addr = SavedAddr;
660 
661   unsupported_gep:
662     break;
663   }
664   case Instruction::Alloca: {
665     const AllocaInst *AI = cast<AllocaInst>(Obj);
666     DenseMap<const AllocaInst *, int>::iterator SI =
667         FuncInfo.StaticAllocaMap.find(AI);
668     if (SI != FuncInfo.StaticAllocaMap.end()) {
669       Addr.setKind(Address::FrameIndexBase);
670       Addr.setFI(SI->second);
671       return true;
672     }
673     break;
674   }
675   case Instruction::Add: {
676     // Adds of constants are common and easy enough.
677     const Value *LHS = U->getOperand(0);
678     const Value *RHS = U->getOperand(1);
679 
680     if (isa<ConstantInt>(LHS))
681       std::swap(LHS, RHS);
682 
683     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
684       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
685       return computeAddress(LHS, Addr, Ty);
686     }
687 
688     Address Backup = Addr;
689     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
690       return true;
691     Addr = Backup;
692 
693     break;
694   }
695   case Instruction::Sub: {
696     // Subs of constants are common and easy enough.
697     const Value *LHS = U->getOperand(0);
698     const Value *RHS = U->getOperand(1);
699 
700     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
701       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
702       return computeAddress(LHS, Addr, Ty);
703     }
704     break;
705   }
706   case Instruction::Shl: {
707     if (Addr.getOffsetReg())
708       break;
709 
710     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
711     if (!CI)
712       break;
713 
714     unsigned Val = CI->getZExtValue();
715     if (Val < 1 || Val > 3)
716       break;
717 
718     uint64_t NumBytes = 0;
719     if (Ty && Ty->isSized()) {
720       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
721       NumBytes = NumBits / 8;
722       if (!isPowerOf2_64(NumBits))
723         NumBytes = 0;
724     }
725 
726     if (NumBytes != (1ULL << Val))
727       break;
728 
729     Addr.setShift(Val);
730     Addr.setExtendType(AArch64_AM::LSL);
731 
732     const Value *Src = U->getOperand(0);
733     if (const auto *I = dyn_cast<Instruction>(Src)) {
734       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
735         // Fold the zext or sext when it won't become a noop.
736         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
737           if (!isIntExtFree(ZE) &&
738               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
739             Addr.setExtendType(AArch64_AM::UXTW);
740             Src = ZE->getOperand(0);
741           }
742         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
743           if (!isIntExtFree(SE) &&
744               SE->getOperand(0)->getType()->isIntegerTy(32)) {
745             Addr.setExtendType(AArch64_AM::SXTW);
746             Src = SE->getOperand(0);
747           }
748         }
749       }
750     }
751 
752     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
753       if (AI->getOpcode() == Instruction::And) {
754         const Value *LHS = AI->getOperand(0);
755         const Value *RHS = AI->getOperand(1);
756 
757         if (const auto *C = dyn_cast<ConstantInt>(LHS))
758           if (C->getValue() == 0xffffffff)
759             std::swap(LHS, RHS);
760 
761         if (const auto *C = dyn_cast<ConstantInt>(RHS))
762           if (C->getValue() == 0xffffffff) {
763             Addr.setExtendType(AArch64_AM::UXTW);
764             unsigned Reg = getRegForValue(LHS);
765             if (!Reg)
766               return false;
767             bool RegIsKill = hasTrivialKill(LHS);
768             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
769                                              AArch64::sub_32);
770             Addr.setOffsetReg(Reg);
771             return true;
772           }
773       }
774 
775     unsigned Reg = getRegForValue(Src);
776     if (!Reg)
777       return false;
778     Addr.setOffsetReg(Reg);
779     return true;
780   }
781   case Instruction::Mul: {
782     if (Addr.getOffsetReg())
783       break;
784 
785     if (!isMulPowOf2(U))
786       break;
787 
788     const Value *LHS = U->getOperand(0);
789     const Value *RHS = U->getOperand(1);
790 
791     // Canonicalize power-of-2 value to the RHS.
792     if (const auto *C = dyn_cast<ConstantInt>(LHS))
793       if (C->getValue().isPowerOf2())
794         std::swap(LHS, RHS);
795 
796     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
797     const auto *C = cast<ConstantInt>(RHS);
798     unsigned Val = C->getValue().logBase2();
799     if (Val < 1 || Val > 3)
800       break;
801 
802     uint64_t NumBytes = 0;
803     if (Ty && Ty->isSized()) {
804       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
805       NumBytes = NumBits / 8;
806       if (!isPowerOf2_64(NumBits))
807         NumBytes = 0;
808     }
809 
810     if (NumBytes != (1ULL << Val))
811       break;
812 
813     Addr.setShift(Val);
814     Addr.setExtendType(AArch64_AM::LSL);
815 
816     const Value *Src = LHS;
817     if (const auto *I = dyn_cast<Instruction>(Src)) {
818       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
819         // Fold the zext or sext when it won't become a noop.
820         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
821           if (!isIntExtFree(ZE) &&
822               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
823             Addr.setExtendType(AArch64_AM::UXTW);
824             Src = ZE->getOperand(0);
825           }
826         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
827           if (!isIntExtFree(SE) &&
828               SE->getOperand(0)->getType()->isIntegerTy(32)) {
829             Addr.setExtendType(AArch64_AM::SXTW);
830             Src = SE->getOperand(0);
831           }
832         }
833       }
834     }
835 
836     unsigned Reg = getRegForValue(Src);
837     if (!Reg)
838       return false;
839     Addr.setOffsetReg(Reg);
840     return true;
841   }
842   case Instruction::And: {
843     if (Addr.getOffsetReg())
844       break;
845 
846     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
847       break;
848 
849     const Value *LHS = U->getOperand(0);
850     const Value *RHS = U->getOperand(1);
851 
852     if (const auto *C = dyn_cast<ConstantInt>(LHS))
853       if (C->getValue() == 0xffffffff)
854         std::swap(LHS, RHS);
855 
856     if (const auto *C = dyn_cast<ConstantInt>(RHS))
857       if (C->getValue() == 0xffffffff) {
858         Addr.setShift(0);
859         Addr.setExtendType(AArch64_AM::LSL);
860         Addr.setExtendType(AArch64_AM::UXTW);
861 
862         unsigned Reg = getRegForValue(LHS);
863         if (!Reg)
864           return false;
865         bool RegIsKill = hasTrivialKill(LHS);
866         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
867                                          AArch64::sub_32);
868         Addr.setOffsetReg(Reg);
869         return true;
870       }
871     break;
872   }
873   case Instruction::SExt:
874   case Instruction::ZExt: {
875     if (!Addr.getReg() || Addr.getOffsetReg())
876       break;
877 
878     const Value *Src = nullptr;
879     // Fold the zext or sext when it won't become a noop.
880     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
881       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
882         Addr.setExtendType(AArch64_AM::UXTW);
883         Src = ZE->getOperand(0);
884       }
885     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
886       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
887         Addr.setExtendType(AArch64_AM::SXTW);
888         Src = SE->getOperand(0);
889       }
890     }
891 
892     if (!Src)
893       break;
894 
895     Addr.setShift(0);
896     unsigned Reg = getRegForValue(Src);
897     if (!Reg)
898       return false;
899     Addr.setOffsetReg(Reg);
900     return true;
901   }
902   } // end switch
903 
904   if (Addr.isRegBase() && !Addr.getReg()) {
905     unsigned Reg = getRegForValue(Obj);
906     if (!Reg)
907       return false;
908     Addr.setReg(Reg);
909     return true;
910   }
911 
912   if (!Addr.getOffsetReg()) {
913     unsigned Reg = getRegForValue(Obj);
914     if (!Reg)
915       return false;
916     Addr.setOffsetReg(Reg);
917     return true;
918   }
919 
920   return false;
921 }
922 
923 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
924   const User *U = nullptr;
925   unsigned Opcode = Instruction::UserOp1;
926   bool InMBB = true;
927 
928   if (const auto *I = dyn_cast<Instruction>(V)) {
929     Opcode = I->getOpcode();
930     U = I;
931     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
932   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
933     Opcode = C->getOpcode();
934     U = C;
935   }
936 
937   switch (Opcode) {
938   default: break;
939   case Instruction::BitCast:
940     // Look past bitcasts if its operand is in the same BB.
941     if (InMBB)
942       return computeCallAddress(U->getOperand(0), Addr);
943     break;
944   case Instruction::IntToPtr:
945     // Look past no-op inttoptrs if its operand is in the same BB.
946     if (InMBB &&
947         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
948             TLI.getPointerTy(DL))
949       return computeCallAddress(U->getOperand(0), Addr);
950     break;
951   case Instruction::PtrToInt:
952     // Look past no-op ptrtoints if its operand is in the same BB.
953     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
954       return computeCallAddress(U->getOperand(0), Addr);
955     break;
956   }
957 
958   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
959     Addr.setGlobalValue(GV);
960     return true;
961   }
962 
963   // If all else fails, try to materialize the value in a register.
964   if (!Addr.getGlobalValue()) {
965     Addr.setReg(getRegForValue(V));
966     return Addr.getReg() != 0;
967   }
968 
969   return false;
970 }
971 
972 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
973   EVT evt = TLI.getValueType(DL, Ty, true);
974 
975   if (Subtarget->isTargetILP32() && Ty->isPointerTy())
976     return false;
977 
978   // Only handle simple types.
979   if (evt == MVT::Other || !evt.isSimple())
980     return false;
981   VT = evt.getSimpleVT();
982 
983   // This is a legal type, but it's not something we handle in fast-isel.
984   if (VT == MVT::f128)
985     return false;
986 
987   // Handle all other legal types, i.e. a register that will directly hold this
988   // value.
989   return TLI.isTypeLegal(VT);
990 }
991 
992 /// Determine if the value type is supported by FastISel.
993 ///
994 /// FastISel for AArch64 can handle more value types than are legal. This adds
995 /// simple value type such as i1, i8, and i16.
996 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
997   if (Ty->isVectorTy() && !IsVectorAllowed)
998     return false;
999 
1000   if (isTypeLegal(Ty, VT))
1001     return true;
1002 
1003   // If this is a type than can be sign or zero-extended to a basic operation
1004   // go ahead and accept it now.
1005   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1006     return true;
1007 
1008   return false;
1009 }
1010 
1011 bool AArch64FastISel::isValueAvailable(const Value *V) const {
1012   if (!isa<Instruction>(V))
1013     return true;
1014 
1015   const auto *I = cast<Instruction>(V);
1016   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1017 }
1018 
1019 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1020   if (Subtarget->isTargetILP32())
1021     return false;
1022 
1023   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1024   if (!ScaleFactor)
1025     return false;
1026 
1027   bool ImmediateOffsetNeedsLowering = false;
1028   bool RegisterOffsetNeedsLowering = false;
1029   int64_t Offset = Addr.getOffset();
1030   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1031     ImmediateOffsetNeedsLowering = true;
1032   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1033            !isUInt<12>(Offset / ScaleFactor))
1034     ImmediateOffsetNeedsLowering = true;
1035 
1036   // Cannot encode an offset register and an immediate offset in the same
1037   // instruction. Fold the immediate offset into the load/store instruction and
1038   // emit an additional add to take care of the offset register.
1039   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1040     RegisterOffsetNeedsLowering = true;
1041 
1042   // Cannot encode zero register as base.
1043   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1044     RegisterOffsetNeedsLowering = true;
1045 
1046   // If this is a stack pointer and the offset needs to be simplified then put
1047   // the alloca address into a register, set the base type back to register and
1048   // continue. This should almost never happen.
1049   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1050   {
1051     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1052     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1053             ResultReg)
1054       .addFrameIndex(Addr.getFI())
1055       .addImm(0)
1056       .addImm(0);
1057     Addr.setKind(Address::RegBase);
1058     Addr.setReg(ResultReg);
1059   }
1060 
1061   if (RegisterOffsetNeedsLowering) {
1062     unsigned ResultReg = 0;
1063     if (Addr.getReg()) {
1064       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1065           Addr.getExtendType() == AArch64_AM::UXTW   )
1066         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1067                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1068                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
1069                                   Addr.getShift());
1070       else
1071         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1072                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1073                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
1074                                   Addr.getShift());
1075     } else {
1076       if (Addr.getExtendType() == AArch64_AM::UXTW)
1077         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1078                                /*Op0IsKill=*/false, Addr.getShift(),
1079                                /*IsZExt=*/true);
1080       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1081         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1082                                /*Op0IsKill=*/false, Addr.getShift(),
1083                                /*IsZExt=*/false);
1084       else
1085         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1086                                /*Op0IsKill=*/false, Addr.getShift());
1087     }
1088     if (!ResultReg)
1089       return false;
1090 
1091     Addr.setReg(ResultReg);
1092     Addr.setOffsetReg(0);
1093     Addr.setShift(0);
1094     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1095   }
1096 
1097   // Since the offset is too large for the load/store instruction get the
1098   // reg+offset into a register.
1099   if (ImmediateOffsetNeedsLowering) {
1100     unsigned ResultReg;
1101     if (Addr.getReg())
1102       // Try to fold the immediate into the add instruction.
1103       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1104     else
1105       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1106 
1107     if (!ResultReg)
1108       return false;
1109     Addr.setReg(ResultReg);
1110     Addr.setOffset(0);
1111   }
1112   return true;
1113 }
1114 
1115 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1116                                            const MachineInstrBuilder &MIB,
1117                                            MachineMemOperand::Flags Flags,
1118                                            unsigned ScaleFactor,
1119                                            MachineMemOperand *MMO) {
1120   int64_t Offset = Addr.getOffset() / ScaleFactor;
1121   // Frame base works a bit differently. Handle it separately.
1122   if (Addr.isFIBase()) {
1123     int FI = Addr.getFI();
1124     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1125     // and alignment should be based on the VT.
1126     MMO = FuncInfo.MF->getMachineMemOperand(
1127         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1128         MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1129     // Now add the rest of the operands.
1130     MIB.addFrameIndex(FI).addImm(Offset);
1131   } else {
1132     assert(Addr.isRegBase() && "Unexpected address kind.");
1133     const MCInstrDesc &II = MIB->getDesc();
1134     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1135     Addr.setReg(
1136       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1137     Addr.setOffsetReg(
1138       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1139     if (Addr.getOffsetReg()) {
1140       assert(Addr.getOffset() == 0 && "Unexpected offset");
1141       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1142                       Addr.getExtendType() == AArch64_AM::SXTX;
1143       MIB.addReg(Addr.getReg());
1144       MIB.addReg(Addr.getOffsetReg());
1145       MIB.addImm(IsSigned);
1146       MIB.addImm(Addr.getShift() != 0);
1147     } else
1148       MIB.addReg(Addr.getReg()).addImm(Offset);
1149   }
1150 
1151   if (MMO)
1152     MIB.addMemOperand(MMO);
1153 }
1154 
1155 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1156                                      const Value *RHS, bool SetFlags,
1157                                      bool WantResult,  bool IsZExt) {
1158   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1159   bool NeedExtend = false;
1160   switch (RetVT.SimpleTy) {
1161   default:
1162     return 0;
1163   case MVT::i1:
1164     NeedExtend = true;
1165     break;
1166   case MVT::i8:
1167     NeedExtend = true;
1168     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1169     break;
1170   case MVT::i16:
1171     NeedExtend = true;
1172     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1173     break;
1174   case MVT::i32:  // fall-through
1175   case MVT::i64:
1176     break;
1177   }
1178   MVT SrcVT = RetVT;
1179   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1180 
1181   // Canonicalize immediates to the RHS first.
1182   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1183     std::swap(LHS, RHS);
1184 
1185   // Canonicalize mul by power of 2 to the RHS.
1186   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1187     if (isMulPowOf2(LHS))
1188       std::swap(LHS, RHS);
1189 
1190   // Canonicalize shift immediate to the RHS.
1191   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1192     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1193       if (isa<ConstantInt>(SI->getOperand(1)))
1194         if (SI->getOpcode() == Instruction::Shl  ||
1195             SI->getOpcode() == Instruction::LShr ||
1196             SI->getOpcode() == Instruction::AShr   )
1197           std::swap(LHS, RHS);
1198 
1199   unsigned LHSReg = getRegForValue(LHS);
1200   if (!LHSReg)
1201     return 0;
1202   bool LHSIsKill = hasTrivialKill(LHS);
1203 
1204   if (NeedExtend)
1205     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1206 
1207   unsigned ResultReg = 0;
1208   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1209     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1210     if (C->isNegative())
1211       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1212                                 SetFlags, WantResult);
1213     else
1214       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1215                                 WantResult);
1216   } else if (const auto *C = dyn_cast<Constant>(RHS))
1217     if (C->isNullValue())
1218       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1219                                 WantResult);
1220 
1221   if (ResultReg)
1222     return ResultReg;
1223 
1224   // Only extend the RHS within the instruction if there is a valid extend type.
1225   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1226       isValueAvailable(RHS)) {
1227     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1228       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1229         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1230           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1231           if (!RHSReg)
1232             return 0;
1233           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1234           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1235                                RHSIsKill, ExtendType, C->getZExtValue(),
1236                                SetFlags, WantResult);
1237         }
1238     unsigned RHSReg = getRegForValue(RHS);
1239     if (!RHSReg)
1240       return 0;
1241     bool RHSIsKill = hasTrivialKill(RHS);
1242     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1243                          ExtendType, 0, SetFlags, WantResult);
1244   }
1245 
1246   // Check if the mul can be folded into the instruction.
1247   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1248     if (isMulPowOf2(RHS)) {
1249       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1250       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1251 
1252       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1253         if (C->getValue().isPowerOf2())
1254           std::swap(MulLHS, MulRHS);
1255 
1256       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1257       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1258       unsigned RHSReg = getRegForValue(MulLHS);
1259       if (!RHSReg)
1260         return 0;
1261       bool RHSIsKill = hasTrivialKill(MulLHS);
1262       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1263                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1264                                 WantResult);
1265       if (ResultReg)
1266         return ResultReg;
1267     }
1268   }
1269 
1270   // Check if the shift can be folded into the instruction.
1271   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1272     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1273       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1274         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1275         switch (SI->getOpcode()) {
1276         default: break;
1277         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1278         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1279         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1280         }
1281         uint64_t ShiftVal = C->getZExtValue();
1282         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1283           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1284           if (!RHSReg)
1285             return 0;
1286           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1287           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1288                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
1289                                     WantResult);
1290           if (ResultReg)
1291             return ResultReg;
1292         }
1293       }
1294     }
1295   }
1296 
1297   unsigned RHSReg = getRegForValue(RHS);
1298   if (!RHSReg)
1299     return 0;
1300   bool RHSIsKill = hasTrivialKill(RHS);
1301 
1302   if (NeedExtend)
1303     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1304 
1305   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1306                        SetFlags, WantResult);
1307 }
1308 
1309 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1310                                         bool LHSIsKill, unsigned RHSReg,
1311                                         bool RHSIsKill, bool SetFlags,
1312                                         bool WantResult) {
1313   assert(LHSReg && RHSReg && "Invalid register number.");
1314 
1315   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1316       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1317     return 0;
1318 
1319   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1320     return 0;
1321 
1322   static const unsigned OpcTable[2][2][2] = {
1323     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1324       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1325     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1326       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1327   };
1328   bool Is64Bit = RetVT == MVT::i64;
1329   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1330   const TargetRegisterClass *RC =
1331       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1332   unsigned ResultReg;
1333   if (WantResult)
1334     ResultReg = createResultReg(RC);
1335   else
1336     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1337 
1338   const MCInstrDesc &II = TII.get(Opc);
1339   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1340   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1341   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1342       .addReg(LHSReg, getKillRegState(LHSIsKill))
1343       .addReg(RHSReg, getKillRegState(RHSIsKill));
1344   return ResultReg;
1345 }
1346 
1347 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1348                                         bool LHSIsKill, uint64_t Imm,
1349                                         bool SetFlags, bool WantResult) {
1350   assert(LHSReg && "Invalid register number.");
1351 
1352   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1353     return 0;
1354 
1355   unsigned ShiftImm;
1356   if (isUInt<12>(Imm))
1357     ShiftImm = 0;
1358   else if ((Imm & 0xfff000) == Imm) {
1359     ShiftImm = 12;
1360     Imm >>= 12;
1361   } else
1362     return 0;
1363 
1364   static const unsigned OpcTable[2][2][2] = {
1365     { { AArch64::SUBWri,  AArch64::SUBXri  },
1366       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1367     { { AArch64::SUBSWri, AArch64::SUBSXri },
1368       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1369   };
1370   bool Is64Bit = RetVT == MVT::i64;
1371   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1372   const TargetRegisterClass *RC;
1373   if (SetFlags)
1374     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1375   else
1376     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1377   unsigned ResultReg;
1378   if (WantResult)
1379     ResultReg = createResultReg(RC);
1380   else
1381     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1382 
1383   const MCInstrDesc &II = TII.get(Opc);
1384   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1385   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1386       .addReg(LHSReg, getKillRegState(LHSIsKill))
1387       .addImm(Imm)
1388       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1389   return ResultReg;
1390 }
1391 
1392 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1393                                         bool LHSIsKill, unsigned RHSReg,
1394                                         bool RHSIsKill,
1395                                         AArch64_AM::ShiftExtendType ShiftType,
1396                                         uint64_t ShiftImm, bool SetFlags,
1397                                         bool WantResult) {
1398   assert(LHSReg && RHSReg && "Invalid register number.");
1399   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1400          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1401 
1402   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1403     return 0;
1404 
1405   // Don't deal with undefined shifts.
1406   if (ShiftImm >= RetVT.getSizeInBits())
1407     return 0;
1408 
1409   static const unsigned OpcTable[2][2][2] = {
1410     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1411       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1412     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1413       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1414   };
1415   bool Is64Bit = RetVT == MVT::i64;
1416   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1417   const TargetRegisterClass *RC =
1418       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1419   unsigned ResultReg;
1420   if (WantResult)
1421     ResultReg = createResultReg(RC);
1422   else
1423     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1424 
1425   const MCInstrDesc &II = TII.get(Opc);
1426   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1427   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1428   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1429       .addReg(LHSReg, getKillRegState(LHSIsKill))
1430       .addReg(RHSReg, getKillRegState(RHSIsKill))
1431       .addImm(getShifterImm(ShiftType, ShiftImm));
1432   return ResultReg;
1433 }
1434 
1435 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1436                                         bool LHSIsKill, unsigned RHSReg,
1437                                         bool RHSIsKill,
1438                                         AArch64_AM::ShiftExtendType ExtType,
1439                                         uint64_t ShiftImm, bool SetFlags,
1440                                         bool WantResult) {
1441   assert(LHSReg && RHSReg && "Invalid register number.");
1442   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1443          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1444 
1445   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1446     return 0;
1447 
1448   if (ShiftImm >= 4)
1449     return 0;
1450 
1451   static const unsigned OpcTable[2][2][2] = {
1452     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1453       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1454     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1455       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1456   };
1457   bool Is64Bit = RetVT == MVT::i64;
1458   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1459   const TargetRegisterClass *RC = nullptr;
1460   if (SetFlags)
1461     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1462   else
1463     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1464   unsigned ResultReg;
1465   if (WantResult)
1466     ResultReg = createResultReg(RC);
1467   else
1468     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1469 
1470   const MCInstrDesc &II = TII.get(Opc);
1471   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1472   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1473   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1474       .addReg(LHSReg, getKillRegState(LHSIsKill))
1475       .addReg(RHSReg, getKillRegState(RHSIsKill))
1476       .addImm(getArithExtendImm(ExtType, ShiftImm));
1477   return ResultReg;
1478 }
1479 
1480 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1481   Type *Ty = LHS->getType();
1482   EVT EVT = TLI.getValueType(DL, Ty, true);
1483   if (!EVT.isSimple())
1484     return false;
1485   MVT VT = EVT.getSimpleVT();
1486 
1487   switch (VT.SimpleTy) {
1488   default:
1489     return false;
1490   case MVT::i1:
1491   case MVT::i8:
1492   case MVT::i16:
1493   case MVT::i32:
1494   case MVT::i64:
1495     return emitICmp(VT, LHS, RHS, IsZExt);
1496   case MVT::f32:
1497   case MVT::f64:
1498     return emitFCmp(VT, LHS, RHS);
1499   }
1500 }
1501 
1502 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1503                                bool IsZExt) {
1504   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1505                  IsZExt) != 0;
1506 }
1507 
1508 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1509                                   uint64_t Imm) {
1510   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1511                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1512 }
1513 
1514 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1515   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1516     return false;
1517 
1518   // Check to see if the 2nd operand is a constant that we can encode directly
1519   // in the compare.
1520   bool UseImm = false;
1521   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1522     if (CFP->isZero() && !CFP->isNegative())
1523       UseImm = true;
1524 
1525   unsigned LHSReg = getRegForValue(LHS);
1526   if (!LHSReg)
1527     return false;
1528   bool LHSIsKill = hasTrivialKill(LHS);
1529 
1530   if (UseImm) {
1531     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1532     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1533         .addReg(LHSReg, getKillRegState(LHSIsKill));
1534     return true;
1535   }
1536 
1537   unsigned RHSReg = getRegForValue(RHS);
1538   if (!RHSReg)
1539     return false;
1540   bool RHSIsKill = hasTrivialKill(RHS);
1541 
1542   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1543   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1544       .addReg(LHSReg, getKillRegState(LHSIsKill))
1545       .addReg(RHSReg, getKillRegState(RHSIsKill));
1546   return true;
1547 }
1548 
1549 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1550                                   bool SetFlags, bool WantResult, bool IsZExt) {
1551   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1552                     IsZExt);
1553 }
1554 
1555 /// This method is a wrapper to simplify add emission.
1556 ///
1557 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1558 /// that fails, then try to materialize the immediate into a register and use
1559 /// emitAddSub_rr instead.
1560 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1561                                       int64_t Imm) {
1562   unsigned ResultReg;
1563   if (Imm < 0)
1564     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1565   else
1566     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1567 
1568   if (ResultReg)
1569     return ResultReg;
1570 
1571   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1572   if (!CReg)
1573     return 0;
1574 
1575   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1576   return ResultReg;
1577 }
1578 
1579 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1580                                   bool SetFlags, bool WantResult, bool IsZExt) {
1581   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1582                     IsZExt);
1583 }
1584 
1585 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1586                                       bool LHSIsKill, unsigned RHSReg,
1587                                       bool RHSIsKill, bool WantResult) {
1588   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1589                        RHSIsKill, /*SetFlags=*/true, WantResult);
1590 }
1591 
1592 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1593                                       bool LHSIsKill, unsigned RHSReg,
1594                                       bool RHSIsKill,
1595                                       AArch64_AM::ShiftExtendType ShiftType,
1596                                       uint64_t ShiftImm, bool WantResult) {
1597   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1598                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1599                        WantResult);
1600 }
1601 
1602 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1603                                         const Value *LHS, const Value *RHS) {
1604   // Canonicalize immediates to the RHS first.
1605   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1606     std::swap(LHS, RHS);
1607 
1608   // Canonicalize mul by power-of-2 to the RHS.
1609   if (LHS->hasOneUse() && isValueAvailable(LHS))
1610     if (isMulPowOf2(LHS))
1611       std::swap(LHS, RHS);
1612 
1613   // Canonicalize shift immediate to the RHS.
1614   if (LHS->hasOneUse() && isValueAvailable(LHS))
1615     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1616       if (isa<ConstantInt>(SI->getOperand(1)))
1617         std::swap(LHS, RHS);
1618 
1619   unsigned LHSReg = getRegForValue(LHS);
1620   if (!LHSReg)
1621     return 0;
1622   bool LHSIsKill = hasTrivialKill(LHS);
1623 
1624   unsigned ResultReg = 0;
1625   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1626     uint64_t Imm = C->getZExtValue();
1627     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1628   }
1629   if (ResultReg)
1630     return ResultReg;
1631 
1632   // Check if the mul can be folded into the instruction.
1633   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1634     if (isMulPowOf2(RHS)) {
1635       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1636       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1637 
1638       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1639         if (C->getValue().isPowerOf2())
1640           std::swap(MulLHS, MulRHS);
1641 
1642       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1643       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1644 
1645       unsigned RHSReg = getRegForValue(MulLHS);
1646       if (!RHSReg)
1647         return 0;
1648       bool RHSIsKill = hasTrivialKill(MulLHS);
1649       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1650                                    RHSIsKill, ShiftVal);
1651       if (ResultReg)
1652         return ResultReg;
1653     }
1654   }
1655 
1656   // Check if the shift can be folded into the instruction.
1657   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1658     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1659       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1660         uint64_t ShiftVal = C->getZExtValue();
1661         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1662         if (!RHSReg)
1663           return 0;
1664         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1665         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1666                                      RHSIsKill, ShiftVal);
1667         if (ResultReg)
1668           return ResultReg;
1669       }
1670   }
1671 
1672   unsigned RHSReg = getRegForValue(RHS);
1673   if (!RHSReg)
1674     return 0;
1675   bool RHSIsKill = hasTrivialKill(RHS);
1676 
1677   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1678   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1679   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1680     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1681     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1682   }
1683   return ResultReg;
1684 }
1685 
1686 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1687                                            unsigned LHSReg, bool LHSIsKill,
1688                                            uint64_t Imm) {
1689   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1690                 "ISD nodes are not consecutive!");
1691   static const unsigned OpcTable[3][2] = {
1692     { AArch64::ANDWri, AArch64::ANDXri },
1693     { AArch64::ORRWri, AArch64::ORRXri },
1694     { AArch64::EORWri, AArch64::EORXri }
1695   };
1696   const TargetRegisterClass *RC;
1697   unsigned Opc;
1698   unsigned RegSize;
1699   switch (RetVT.SimpleTy) {
1700   default:
1701     return 0;
1702   case MVT::i1:
1703   case MVT::i8:
1704   case MVT::i16:
1705   case MVT::i32: {
1706     unsigned Idx = ISDOpc - ISD::AND;
1707     Opc = OpcTable[Idx][0];
1708     RC = &AArch64::GPR32spRegClass;
1709     RegSize = 32;
1710     break;
1711   }
1712   case MVT::i64:
1713     Opc = OpcTable[ISDOpc - ISD::AND][1];
1714     RC = &AArch64::GPR64spRegClass;
1715     RegSize = 64;
1716     break;
1717   }
1718 
1719   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1720     return 0;
1721 
1722   unsigned ResultReg =
1723       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1724                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1725   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1726     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1727     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1728   }
1729   return ResultReg;
1730 }
1731 
1732 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1733                                            unsigned LHSReg, bool LHSIsKill,
1734                                            unsigned RHSReg, bool RHSIsKill,
1735                                            uint64_t ShiftImm) {
1736   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1737                 "ISD nodes are not consecutive!");
1738   static const unsigned OpcTable[3][2] = {
1739     { AArch64::ANDWrs, AArch64::ANDXrs },
1740     { AArch64::ORRWrs, AArch64::ORRXrs },
1741     { AArch64::EORWrs, AArch64::EORXrs }
1742   };
1743 
1744   // Don't deal with undefined shifts.
1745   if (ShiftImm >= RetVT.getSizeInBits())
1746     return 0;
1747 
1748   const TargetRegisterClass *RC;
1749   unsigned Opc;
1750   switch (RetVT.SimpleTy) {
1751   default:
1752     return 0;
1753   case MVT::i1:
1754   case MVT::i8:
1755   case MVT::i16:
1756   case MVT::i32:
1757     Opc = OpcTable[ISDOpc - ISD::AND][0];
1758     RC = &AArch64::GPR32RegClass;
1759     break;
1760   case MVT::i64:
1761     Opc = OpcTable[ISDOpc - ISD::AND][1];
1762     RC = &AArch64::GPR64RegClass;
1763     break;
1764   }
1765   unsigned ResultReg =
1766       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1767                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1768   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1769     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1770     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1771   }
1772   return ResultReg;
1773 }
1774 
1775 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1776                                      uint64_t Imm) {
1777   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1778 }
1779 
1780 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1781                                    bool WantZExt, MachineMemOperand *MMO) {
1782   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1783     return 0;
1784 
1785   // Simplify this down to something we can handle.
1786   if (!simplifyAddress(Addr, VT))
1787     return 0;
1788 
1789   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1790   if (!ScaleFactor)
1791     llvm_unreachable("Unexpected value type.");
1792 
1793   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1794   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1795   bool UseScaled = true;
1796   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1797     UseScaled = false;
1798     ScaleFactor = 1;
1799   }
1800 
1801   static const unsigned GPOpcTable[2][8][4] = {
1802     // Sign-extend.
1803     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1804         AArch64::LDURXi  },
1805       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1806         AArch64::LDURXi  },
1807       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1808         AArch64::LDRXui  },
1809       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1810         AArch64::LDRXui  },
1811       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1812         AArch64::LDRXroX },
1813       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1814         AArch64::LDRXroX },
1815       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1816         AArch64::LDRXroW },
1817       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1818         AArch64::LDRXroW }
1819     },
1820     // Zero-extend.
1821     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1822         AArch64::LDURXi  },
1823       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1824         AArch64::LDURXi  },
1825       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1826         AArch64::LDRXui  },
1827       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1828         AArch64::LDRXui  },
1829       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1830         AArch64::LDRXroX },
1831       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1832         AArch64::LDRXroX },
1833       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1834         AArch64::LDRXroW },
1835       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1836         AArch64::LDRXroW }
1837     }
1838   };
1839 
1840   static const unsigned FPOpcTable[4][2] = {
1841     { AArch64::LDURSi,  AArch64::LDURDi  },
1842     { AArch64::LDRSui,  AArch64::LDRDui  },
1843     { AArch64::LDRSroX, AArch64::LDRDroX },
1844     { AArch64::LDRSroW, AArch64::LDRDroW }
1845   };
1846 
1847   unsigned Opc;
1848   const TargetRegisterClass *RC;
1849   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1850                       Addr.getOffsetReg();
1851   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1852   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1853       Addr.getExtendType() == AArch64_AM::SXTW)
1854     Idx++;
1855 
1856   bool IsRet64Bit = RetVT == MVT::i64;
1857   switch (VT.SimpleTy) {
1858   default:
1859     llvm_unreachable("Unexpected value type.");
1860   case MVT::i1: // Intentional fall-through.
1861   case MVT::i8:
1862     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1863     RC = (IsRet64Bit && !WantZExt) ?
1864              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1865     break;
1866   case MVT::i16:
1867     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1868     RC = (IsRet64Bit && !WantZExt) ?
1869              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1870     break;
1871   case MVT::i32:
1872     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1873     RC = (IsRet64Bit && !WantZExt) ?
1874              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1875     break;
1876   case MVT::i64:
1877     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1878     RC = &AArch64::GPR64RegClass;
1879     break;
1880   case MVT::f32:
1881     Opc = FPOpcTable[Idx][0];
1882     RC = &AArch64::FPR32RegClass;
1883     break;
1884   case MVT::f64:
1885     Opc = FPOpcTable[Idx][1];
1886     RC = &AArch64::FPR64RegClass;
1887     break;
1888   }
1889 
1890   // Create the base instruction, then add the operands.
1891   unsigned ResultReg = createResultReg(RC);
1892   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1893                                     TII.get(Opc), ResultReg);
1894   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1895 
1896   // Loading an i1 requires special handling.
1897   if (VT == MVT::i1) {
1898     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1899     assert(ANDReg && "Unexpected AND instruction emission failure.");
1900     ResultReg = ANDReg;
1901   }
1902 
1903   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1904   // the 32bit reg to a 64bit reg.
1905   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1906     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1907     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1908             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1909         .addImm(0)
1910         .addReg(ResultReg, getKillRegState(true))
1911         .addImm(AArch64::sub_32);
1912     ResultReg = Reg64;
1913   }
1914   return ResultReg;
1915 }
1916 
1917 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1918   MVT VT;
1919   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1920     return false;
1921 
1922   if (VT.isVector())
1923     return selectOperator(I, I->getOpcode());
1924 
1925   unsigned ResultReg;
1926   switch (I->getOpcode()) {
1927   default:
1928     llvm_unreachable("Unexpected instruction.");
1929   case Instruction::Add:
1930     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1931     break;
1932   case Instruction::Sub:
1933     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1934     break;
1935   }
1936   if (!ResultReg)
1937     return false;
1938 
1939   updateValueMap(I, ResultReg);
1940   return true;
1941 }
1942 
1943 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1944   MVT VT;
1945   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1946     return false;
1947 
1948   if (VT.isVector())
1949     return selectOperator(I, I->getOpcode());
1950 
1951   unsigned ResultReg;
1952   switch (I->getOpcode()) {
1953   default:
1954     llvm_unreachable("Unexpected instruction.");
1955   case Instruction::And:
1956     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1957     break;
1958   case Instruction::Or:
1959     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1960     break;
1961   case Instruction::Xor:
1962     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1963     break;
1964   }
1965   if (!ResultReg)
1966     return false;
1967 
1968   updateValueMap(I, ResultReg);
1969   return true;
1970 }
1971 
1972 bool AArch64FastISel::selectLoad(const Instruction *I) {
1973   MVT VT;
1974   // Verify we have a legal type before going any further.  Currently, we handle
1975   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1976   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1977   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1978       cast<LoadInst>(I)->isAtomic())
1979     return false;
1980 
1981   const Value *SV = I->getOperand(0);
1982   if (TLI.supportSwiftError()) {
1983     // Swifterror values can come from either a function parameter with
1984     // swifterror attribute or an alloca with swifterror attribute.
1985     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1986       if (Arg->hasSwiftErrorAttr())
1987         return false;
1988     }
1989 
1990     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1991       if (Alloca->isSwiftError())
1992         return false;
1993     }
1994   }
1995 
1996   // See if we can handle this address.
1997   Address Addr;
1998   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1999     return false;
2000 
2001   // Fold the following sign-/zero-extend into the load instruction.
2002   bool WantZExt = true;
2003   MVT RetVT = VT;
2004   const Value *IntExtVal = nullptr;
2005   if (I->hasOneUse()) {
2006     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
2007       if (isTypeSupported(ZE->getType(), RetVT))
2008         IntExtVal = ZE;
2009       else
2010         RetVT = VT;
2011     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
2012       if (isTypeSupported(SE->getType(), RetVT))
2013         IntExtVal = SE;
2014       else
2015         RetVT = VT;
2016       WantZExt = false;
2017     }
2018   }
2019 
2020   unsigned ResultReg =
2021       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
2022   if (!ResultReg)
2023     return false;
2024 
2025   // There are a few different cases we have to handle, because the load or the
2026   // sign-/zero-extend might not be selected by FastISel if we fall-back to
2027   // SelectionDAG. There is also an ordering issue when both instructions are in
2028   // different basic blocks.
2029   // 1.) The load instruction is selected by FastISel, but the integer extend
2030   //     not. This usually happens when the integer extend is in a different
2031   //     basic block and SelectionDAG took over for that basic block.
2032   // 2.) The load instruction is selected before the integer extend. This only
2033   //     happens when the integer extend is in a different basic block.
2034   // 3.) The load instruction is selected by SelectionDAG and the integer extend
2035   //     by FastISel. This happens if there are instructions between the load
2036   //     and the integer extend that couldn't be selected by FastISel.
2037   if (IntExtVal) {
2038     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2039     // could select it. Emit a copy to subreg if necessary. FastISel will remove
2040     // it when it selects the integer extend.
2041     unsigned Reg = lookUpRegForValue(IntExtVal);
2042     auto *MI = MRI.getUniqueVRegDef(Reg);
2043     if (!MI) {
2044       if (RetVT == MVT::i64 && VT <= MVT::i32) {
2045         if (WantZExt) {
2046           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2047           MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2048           ResultReg = std::prev(I)->getOperand(0).getReg();
2049           removeDeadCode(I, std::next(I));
2050         } else
2051           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2052                                                  /*IsKill=*/true,
2053                                                  AArch64::sub_32);
2054       }
2055       updateValueMap(I, ResultReg);
2056       return true;
2057     }
2058 
2059     // The integer extend has already been emitted - delete all the instructions
2060     // that have been emitted by the integer extend lowering code and use the
2061     // result from the load instruction directly.
2062     while (MI) {
2063       Reg = 0;
2064       for (auto &Opnd : MI->uses()) {
2065         if (Opnd.isReg()) {
2066           Reg = Opnd.getReg();
2067           break;
2068         }
2069       }
2070       MachineBasicBlock::iterator I(MI);
2071       removeDeadCode(I, std::next(I));
2072       MI = nullptr;
2073       if (Reg)
2074         MI = MRI.getUniqueVRegDef(Reg);
2075     }
2076     updateValueMap(IntExtVal, ResultReg);
2077     return true;
2078   }
2079 
2080   updateValueMap(I, ResultReg);
2081   return true;
2082 }
2083 
2084 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2085                                        unsigned AddrReg,
2086                                        MachineMemOperand *MMO) {
2087   unsigned Opc;
2088   switch (VT.SimpleTy) {
2089   default: return false;
2090   case MVT::i8:  Opc = AArch64::STLRB; break;
2091   case MVT::i16: Opc = AArch64::STLRH; break;
2092   case MVT::i32: Opc = AArch64::STLRW; break;
2093   case MVT::i64: Opc = AArch64::STLRX; break;
2094   }
2095 
2096   const MCInstrDesc &II = TII.get(Opc);
2097   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2098   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2099   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2100       .addReg(SrcReg)
2101       .addReg(AddrReg)
2102       .addMemOperand(MMO);
2103   return true;
2104 }
2105 
2106 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2107                                 MachineMemOperand *MMO) {
2108   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2109     return false;
2110 
2111   // Simplify this down to something we can handle.
2112   if (!simplifyAddress(Addr, VT))
2113     return false;
2114 
2115   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2116   if (!ScaleFactor)
2117     llvm_unreachable("Unexpected value type.");
2118 
2119   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2120   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2121   bool UseScaled = true;
2122   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2123     UseScaled = false;
2124     ScaleFactor = 1;
2125   }
2126 
2127   static const unsigned OpcTable[4][6] = {
2128     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2129       AArch64::STURSi,   AArch64::STURDi },
2130     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2131       AArch64::STRSui,   AArch64::STRDui },
2132     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2133       AArch64::STRSroX,  AArch64::STRDroX },
2134     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2135       AArch64::STRSroW,  AArch64::STRDroW }
2136   };
2137 
2138   unsigned Opc;
2139   bool VTIsi1 = false;
2140   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2141                       Addr.getOffsetReg();
2142   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2143   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2144       Addr.getExtendType() == AArch64_AM::SXTW)
2145     Idx++;
2146 
2147   switch (VT.SimpleTy) {
2148   default: llvm_unreachable("Unexpected value type.");
2149   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
2150   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2151   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2152   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2153   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2154   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2155   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2156   }
2157 
2158   // Storing an i1 requires special handling.
2159   if (VTIsi1 && SrcReg != AArch64::WZR) {
2160     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2161     assert(ANDReg && "Unexpected AND instruction emission failure.");
2162     SrcReg = ANDReg;
2163   }
2164   // Create the base instruction, then add the operands.
2165   const MCInstrDesc &II = TII.get(Opc);
2166   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2167   MachineInstrBuilder MIB =
2168       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2169   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2170 
2171   return true;
2172 }
2173 
2174 bool AArch64FastISel::selectStore(const Instruction *I) {
2175   MVT VT;
2176   const Value *Op0 = I->getOperand(0);
2177   // Verify we have a legal type before going any further.  Currently, we handle
2178   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2179   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2180   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2181     return false;
2182 
2183   const Value *PtrV = I->getOperand(1);
2184   if (TLI.supportSwiftError()) {
2185     // Swifterror values can come from either a function parameter with
2186     // swifterror attribute or an alloca with swifterror attribute.
2187     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2188       if (Arg->hasSwiftErrorAttr())
2189         return false;
2190     }
2191 
2192     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2193       if (Alloca->isSwiftError())
2194         return false;
2195     }
2196   }
2197 
2198   // Get the value to be stored into a register. Use the zero register directly
2199   // when possible to avoid an unnecessary copy and a wasted register.
2200   unsigned SrcReg = 0;
2201   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2202     if (CI->isZero())
2203       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2204   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2205     if (CF->isZero() && !CF->isNegative()) {
2206       VT = MVT::getIntegerVT(VT.getSizeInBits());
2207       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2208     }
2209   }
2210 
2211   if (!SrcReg)
2212     SrcReg = getRegForValue(Op0);
2213 
2214   if (!SrcReg)
2215     return false;
2216 
2217   auto *SI = cast<StoreInst>(I);
2218 
2219   // Try to emit a STLR for seq_cst/release.
2220   if (SI->isAtomic()) {
2221     AtomicOrdering Ord = SI->getOrdering();
2222     // The non-atomic instructions are sufficient for relaxed stores.
2223     if (isReleaseOrStronger(Ord)) {
2224       // The STLR addressing mode only supports a base reg; pass that directly.
2225       unsigned AddrReg = getRegForValue(PtrV);
2226       return emitStoreRelease(VT, SrcReg, AddrReg,
2227                               createMachineMemOperandFor(I));
2228     }
2229   }
2230 
2231   // See if we can handle this address.
2232   Address Addr;
2233   if (!computeAddress(PtrV, Addr, Op0->getType()))
2234     return false;
2235 
2236   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2237     return false;
2238   return true;
2239 }
2240 
2241 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2242   switch (Pred) {
2243   case CmpInst::FCMP_ONE:
2244   case CmpInst::FCMP_UEQ:
2245   default:
2246     // AL is our "false" for now. The other two need more compares.
2247     return AArch64CC::AL;
2248   case CmpInst::ICMP_EQ:
2249   case CmpInst::FCMP_OEQ:
2250     return AArch64CC::EQ;
2251   case CmpInst::ICMP_SGT:
2252   case CmpInst::FCMP_OGT:
2253     return AArch64CC::GT;
2254   case CmpInst::ICMP_SGE:
2255   case CmpInst::FCMP_OGE:
2256     return AArch64CC::GE;
2257   case CmpInst::ICMP_UGT:
2258   case CmpInst::FCMP_UGT:
2259     return AArch64CC::HI;
2260   case CmpInst::FCMP_OLT:
2261     return AArch64CC::MI;
2262   case CmpInst::ICMP_ULE:
2263   case CmpInst::FCMP_OLE:
2264     return AArch64CC::LS;
2265   case CmpInst::FCMP_ORD:
2266     return AArch64CC::VC;
2267   case CmpInst::FCMP_UNO:
2268     return AArch64CC::VS;
2269   case CmpInst::FCMP_UGE:
2270     return AArch64CC::PL;
2271   case CmpInst::ICMP_SLT:
2272   case CmpInst::FCMP_ULT:
2273     return AArch64CC::LT;
2274   case CmpInst::ICMP_SLE:
2275   case CmpInst::FCMP_ULE:
2276     return AArch64CC::LE;
2277   case CmpInst::FCMP_UNE:
2278   case CmpInst::ICMP_NE:
2279     return AArch64CC::NE;
2280   case CmpInst::ICMP_UGE:
2281     return AArch64CC::HS;
2282   case CmpInst::ICMP_ULT:
2283     return AArch64CC::LO;
2284   }
2285 }
2286 
2287 /// Try to emit a combined compare-and-branch instruction.
2288 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2289   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2290   // will not be produced, as they are conditional branch instructions that do
2291   // not set flags.
2292   if (FuncInfo.MF->getFunction().hasFnAttribute(
2293           Attribute::SpeculativeLoadHardening))
2294     return false;
2295 
2296   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2297   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2298   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2299 
2300   const Value *LHS = CI->getOperand(0);
2301   const Value *RHS = CI->getOperand(1);
2302 
2303   MVT VT;
2304   if (!isTypeSupported(LHS->getType(), VT))
2305     return false;
2306 
2307   unsigned BW = VT.getSizeInBits();
2308   if (BW > 64)
2309     return false;
2310 
2311   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2312   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2313 
2314   // Try to take advantage of fallthrough opportunities.
2315   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2316     std::swap(TBB, FBB);
2317     Predicate = CmpInst::getInversePredicate(Predicate);
2318   }
2319 
2320   int TestBit = -1;
2321   bool IsCmpNE;
2322   switch (Predicate) {
2323   default:
2324     return false;
2325   case CmpInst::ICMP_EQ:
2326   case CmpInst::ICMP_NE:
2327     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2328       std::swap(LHS, RHS);
2329 
2330     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2331       return false;
2332 
2333     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2334       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2335         const Value *AndLHS = AI->getOperand(0);
2336         const Value *AndRHS = AI->getOperand(1);
2337 
2338         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2339           if (C->getValue().isPowerOf2())
2340             std::swap(AndLHS, AndRHS);
2341 
2342         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2343           if (C->getValue().isPowerOf2()) {
2344             TestBit = C->getValue().logBase2();
2345             LHS = AndLHS;
2346           }
2347       }
2348 
2349     if (VT == MVT::i1)
2350       TestBit = 0;
2351 
2352     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2353     break;
2354   case CmpInst::ICMP_SLT:
2355   case CmpInst::ICMP_SGE:
2356     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2357       return false;
2358 
2359     TestBit = BW - 1;
2360     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2361     break;
2362   case CmpInst::ICMP_SGT:
2363   case CmpInst::ICMP_SLE:
2364     if (!isa<ConstantInt>(RHS))
2365       return false;
2366 
2367     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2368       return false;
2369 
2370     TestBit = BW - 1;
2371     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2372     break;
2373   } // end switch
2374 
2375   static const unsigned OpcTable[2][2][2] = {
2376     { {AArch64::CBZW,  AArch64::CBZX },
2377       {AArch64::CBNZW, AArch64::CBNZX} },
2378     { {AArch64::TBZW,  AArch64::TBZX },
2379       {AArch64::TBNZW, AArch64::TBNZX} }
2380   };
2381 
2382   bool IsBitTest = TestBit != -1;
2383   bool Is64Bit = BW == 64;
2384   if (TestBit < 32 && TestBit >= 0)
2385     Is64Bit = false;
2386 
2387   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2388   const MCInstrDesc &II = TII.get(Opc);
2389 
2390   unsigned SrcReg = getRegForValue(LHS);
2391   if (!SrcReg)
2392     return false;
2393   bool SrcIsKill = hasTrivialKill(LHS);
2394 
2395   if (BW == 64 && !Is64Bit)
2396     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2397                                         AArch64::sub_32);
2398 
2399   if ((BW < 32) && !IsBitTest)
2400     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2401 
2402   // Emit the combined compare and branch instruction.
2403   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2404   MachineInstrBuilder MIB =
2405       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2406           .addReg(SrcReg, getKillRegState(SrcIsKill));
2407   if (IsBitTest)
2408     MIB.addImm(TestBit);
2409   MIB.addMBB(TBB);
2410 
2411   finishCondBranch(BI->getParent(), TBB, FBB);
2412   return true;
2413 }
2414 
2415 bool AArch64FastISel::selectBranch(const Instruction *I) {
2416   const BranchInst *BI = cast<BranchInst>(I);
2417   if (BI->isUnconditional()) {
2418     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2419     fastEmitBranch(MSucc, BI->getDebugLoc());
2420     return true;
2421   }
2422 
2423   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2424   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2425 
2426   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2427     if (CI->hasOneUse() && isValueAvailable(CI)) {
2428       // Try to optimize or fold the cmp.
2429       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2430       switch (Predicate) {
2431       default:
2432         break;
2433       case CmpInst::FCMP_FALSE:
2434         fastEmitBranch(FBB, DbgLoc);
2435         return true;
2436       case CmpInst::FCMP_TRUE:
2437         fastEmitBranch(TBB, DbgLoc);
2438         return true;
2439       }
2440 
2441       // Try to emit a combined compare-and-branch first.
2442       if (emitCompareAndBranch(BI))
2443         return true;
2444 
2445       // Try to take advantage of fallthrough opportunities.
2446       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2447         std::swap(TBB, FBB);
2448         Predicate = CmpInst::getInversePredicate(Predicate);
2449       }
2450 
2451       // Emit the cmp.
2452       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2453         return false;
2454 
2455       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2456       // instruction.
2457       AArch64CC::CondCode CC = getCompareCC(Predicate);
2458       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2459       switch (Predicate) {
2460       default:
2461         break;
2462       case CmpInst::FCMP_UEQ:
2463         ExtraCC = AArch64CC::EQ;
2464         CC = AArch64CC::VS;
2465         break;
2466       case CmpInst::FCMP_ONE:
2467         ExtraCC = AArch64CC::MI;
2468         CC = AArch64CC::GT;
2469         break;
2470       }
2471       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2472 
2473       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2474       if (ExtraCC != AArch64CC::AL) {
2475         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2476             .addImm(ExtraCC)
2477             .addMBB(TBB);
2478       }
2479 
2480       // Emit the branch.
2481       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2482           .addImm(CC)
2483           .addMBB(TBB);
2484 
2485       finishCondBranch(BI->getParent(), TBB, FBB);
2486       return true;
2487     }
2488   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2489     uint64_t Imm = CI->getZExtValue();
2490     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2491     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2492         .addMBB(Target);
2493 
2494     // Obtain the branch probability and add the target to the successor list.
2495     if (FuncInfo.BPI) {
2496       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2497           BI->getParent(), Target->getBasicBlock());
2498       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2499     } else
2500       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2501     return true;
2502   } else {
2503     AArch64CC::CondCode CC = AArch64CC::NE;
2504     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2505       // Fake request the condition, otherwise the intrinsic might be completely
2506       // optimized away.
2507       unsigned CondReg = getRegForValue(BI->getCondition());
2508       if (!CondReg)
2509         return false;
2510 
2511       // Emit the branch.
2512       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2513         .addImm(CC)
2514         .addMBB(TBB);
2515 
2516       finishCondBranch(BI->getParent(), TBB, FBB);
2517       return true;
2518     }
2519   }
2520 
2521   unsigned CondReg = getRegForValue(BI->getCondition());
2522   if (CondReg == 0)
2523     return false;
2524   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2525 
2526   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2527   unsigned Opcode = AArch64::TBNZW;
2528   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2529     std::swap(TBB, FBB);
2530     Opcode = AArch64::TBZW;
2531   }
2532 
2533   const MCInstrDesc &II = TII.get(Opcode);
2534   unsigned ConstrainedCondReg
2535     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2536   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2537       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2538       .addImm(0)
2539       .addMBB(TBB);
2540 
2541   finishCondBranch(BI->getParent(), TBB, FBB);
2542   return true;
2543 }
2544 
2545 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2546   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2547   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2548   if (AddrReg == 0)
2549     return false;
2550 
2551   // Emit the indirect branch.
2552   const MCInstrDesc &II = TII.get(AArch64::BR);
2553   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2554   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2555 
2556   // Make sure the CFG is up-to-date.
2557   for (auto *Succ : BI->successors())
2558     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2559 
2560   return true;
2561 }
2562 
2563 bool AArch64FastISel::selectCmp(const Instruction *I) {
2564   const CmpInst *CI = cast<CmpInst>(I);
2565 
2566   // Vectors of i1 are weird: bail out.
2567   if (CI->getType()->isVectorTy())
2568     return false;
2569 
2570   // Try to optimize or fold the cmp.
2571   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2572   unsigned ResultReg = 0;
2573   switch (Predicate) {
2574   default:
2575     break;
2576   case CmpInst::FCMP_FALSE:
2577     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2578     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2579             TII.get(TargetOpcode::COPY), ResultReg)
2580         .addReg(AArch64::WZR, getKillRegState(true));
2581     break;
2582   case CmpInst::FCMP_TRUE:
2583     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2584     break;
2585   }
2586 
2587   if (ResultReg) {
2588     updateValueMap(I, ResultReg);
2589     return true;
2590   }
2591 
2592   // Emit the cmp.
2593   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2594     return false;
2595 
2596   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2597 
2598   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2599   // condition codes are inverted, because they are used by CSINC.
2600   static unsigned CondCodeTable[2][2] = {
2601     { AArch64CC::NE, AArch64CC::VC },
2602     { AArch64CC::PL, AArch64CC::LE }
2603   };
2604   unsigned *CondCodes = nullptr;
2605   switch (Predicate) {
2606   default:
2607     break;
2608   case CmpInst::FCMP_UEQ:
2609     CondCodes = &CondCodeTable[0][0];
2610     break;
2611   case CmpInst::FCMP_ONE:
2612     CondCodes = &CondCodeTable[1][0];
2613     break;
2614   }
2615 
2616   if (CondCodes) {
2617     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2618     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2619             TmpReg1)
2620         .addReg(AArch64::WZR, getKillRegState(true))
2621         .addReg(AArch64::WZR, getKillRegState(true))
2622         .addImm(CondCodes[0]);
2623     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2624             ResultReg)
2625         .addReg(TmpReg1, getKillRegState(true))
2626         .addReg(AArch64::WZR, getKillRegState(true))
2627         .addImm(CondCodes[1]);
2628 
2629     updateValueMap(I, ResultReg);
2630     return true;
2631   }
2632 
2633   // Now set a register based on the comparison.
2634   AArch64CC::CondCode CC = getCompareCC(Predicate);
2635   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2636   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2637   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2638           ResultReg)
2639       .addReg(AArch64::WZR, getKillRegState(true))
2640       .addReg(AArch64::WZR, getKillRegState(true))
2641       .addImm(invertedCC);
2642 
2643   updateValueMap(I, ResultReg);
2644   return true;
2645 }
2646 
2647 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2648 /// value.
2649 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2650   if (!SI->getType()->isIntegerTy(1))
2651     return false;
2652 
2653   const Value *Src1Val, *Src2Val;
2654   unsigned Opc = 0;
2655   bool NeedExtraOp = false;
2656   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2657     if (CI->isOne()) {
2658       Src1Val = SI->getCondition();
2659       Src2Val = SI->getFalseValue();
2660       Opc = AArch64::ORRWrr;
2661     } else {
2662       assert(CI->isZero());
2663       Src1Val = SI->getFalseValue();
2664       Src2Val = SI->getCondition();
2665       Opc = AArch64::BICWrr;
2666     }
2667   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2668     if (CI->isOne()) {
2669       Src1Val = SI->getCondition();
2670       Src2Val = SI->getTrueValue();
2671       Opc = AArch64::ORRWrr;
2672       NeedExtraOp = true;
2673     } else {
2674       assert(CI->isZero());
2675       Src1Val = SI->getCondition();
2676       Src2Val = SI->getTrueValue();
2677       Opc = AArch64::ANDWrr;
2678     }
2679   }
2680 
2681   if (!Opc)
2682     return false;
2683 
2684   unsigned Src1Reg = getRegForValue(Src1Val);
2685   if (!Src1Reg)
2686     return false;
2687   bool Src1IsKill = hasTrivialKill(Src1Val);
2688 
2689   unsigned Src2Reg = getRegForValue(Src2Val);
2690   if (!Src2Reg)
2691     return false;
2692   bool Src2IsKill = hasTrivialKill(Src2Val);
2693 
2694   if (NeedExtraOp) {
2695     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2696     Src1IsKill = true;
2697   }
2698   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2699                                        Src1IsKill, Src2Reg, Src2IsKill);
2700   updateValueMap(SI, ResultReg);
2701   return true;
2702 }
2703 
2704 bool AArch64FastISel::selectSelect(const Instruction *I) {
2705   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2706   MVT VT;
2707   if (!isTypeSupported(I->getType(), VT))
2708     return false;
2709 
2710   unsigned Opc;
2711   const TargetRegisterClass *RC;
2712   switch (VT.SimpleTy) {
2713   default:
2714     return false;
2715   case MVT::i1:
2716   case MVT::i8:
2717   case MVT::i16:
2718   case MVT::i32:
2719     Opc = AArch64::CSELWr;
2720     RC = &AArch64::GPR32RegClass;
2721     break;
2722   case MVT::i64:
2723     Opc = AArch64::CSELXr;
2724     RC = &AArch64::GPR64RegClass;
2725     break;
2726   case MVT::f32:
2727     Opc = AArch64::FCSELSrrr;
2728     RC = &AArch64::FPR32RegClass;
2729     break;
2730   case MVT::f64:
2731     Opc = AArch64::FCSELDrrr;
2732     RC = &AArch64::FPR64RegClass;
2733     break;
2734   }
2735 
2736   const SelectInst *SI = cast<SelectInst>(I);
2737   const Value *Cond = SI->getCondition();
2738   AArch64CC::CondCode CC = AArch64CC::NE;
2739   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2740 
2741   if (optimizeSelect(SI))
2742     return true;
2743 
2744   // Try to pickup the flags, so we don't have to emit another compare.
2745   if (foldXALUIntrinsic(CC, I, Cond)) {
2746     // Fake request the condition to force emission of the XALU intrinsic.
2747     unsigned CondReg = getRegForValue(Cond);
2748     if (!CondReg)
2749       return false;
2750   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2751              isValueAvailable(Cond)) {
2752     const auto *Cmp = cast<CmpInst>(Cond);
2753     // Try to optimize or fold the cmp.
2754     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2755     const Value *FoldSelect = nullptr;
2756     switch (Predicate) {
2757     default:
2758       break;
2759     case CmpInst::FCMP_FALSE:
2760       FoldSelect = SI->getFalseValue();
2761       break;
2762     case CmpInst::FCMP_TRUE:
2763       FoldSelect = SI->getTrueValue();
2764       break;
2765     }
2766 
2767     if (FoldSelect) {
2768       unsigned SrcReg = getRegForValue(FoldSelect);
2769       if (!SrcReg)
2770         return false;
2771       unsigned UseReg = lookUpRegForValue(SI);
2772       if (UseReg)
2773         MRI.clearKillFlags(UseReg);
2774 
2775       updateValueMap(I, SrcReg);
2776       return true;
2777     }
2778 
2779     // Emit the cmp.
2780     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2781       return false;
2782 
2783     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2784     CC = getCompareCC(Predicate);
2785     switch (Predicate) {
2786     default:
2787       break;
2788     case CmpInst::FCMP_UEQ:
2789       ExtraCC = AArch64CC::EQ;
2790       CC = AArch64CC::VS;
2791       break;
2792     case CmpInst::FCMP_ONE:
2793       ExtraCC = AArch64CC::MI;
2794       CC = AArch64CC::GT;
2795       break;
2796     }
2797     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2798   } else {
2799     unsigned CondReg = getRegForValue(Cond);
2800     if (!CondReg)
2801       return false;
2802     bool CondIsKill = hasTrivialKill(Cond);
2803 
2804     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2805     CondReg = constrainOperandRegClass(II, CondReg, 1);
2806 
2807     // Emit a TST instruction (ANDS wzr, reg, #imm).
2808     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2809             AArch64::WZR)
2810         .addReg(CondReg, getKillRegState(CondIsKill))
2811         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2812   }
2813 
2814   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2815   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2816 
2817   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2818   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2819 
2820   if (!Src1Reg || !Src2Reg)
2821     return false;
2822 
2823   if (ExtraCC != AArch64CC::AL) {
2824     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2825                                Src2IsKill, ExtraCC);
2826     Src2IsKill = true;
2827   }
2828   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2829                                         Src2IsKill, CC);
2830   updateValueMap(I, ResultReg);
2831   return true;
2832 }
2833 
2834 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2835   Value *V = I->getOperand(0);
2836   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2837     return false;
2838 
2839   unsigned Op = getRegForValue(V);
2840   if (Op == 0)
2841     return false;
2842 
2843   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2844   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2845           ResultReg).addReg(Op);
2846   updateValueMap(I, ResultReg);
2847   return true;
2848 }
2849 
2850 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2851   Value *V = I->getOperand(0);
2852   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2853     return false;
2854 
2855   unsigned Op = getRegForValue(V);
2856   if (Op == 0)
2857     return false;
2858 
2859   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2860   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2861           ResultReg).addReg(Op);
2862   updateValueMap(I, ResultReg);
2863   return true;
2864 }
2865 
2866 // FPToUI and FPToSI
2867 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2868   MVT DestVT;
2869   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2870     return false;
2871 
2872   unsigned SrcReg = getRegForValue(I->getOperand(0));
2873   if (SrcReg == 0)
2874     return false;
2875 
2876   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2877   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2878     return false;
2879 
2880   unsigned Opc;
2881   if (SrcVT == MVT::f64) {
2882     if (Signed)
2883       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2884     else
2885       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2886   } else {
2887     if (Signed)
2888       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2889     else
2890       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2891   }
2892   unsigned ResultReg = createResultReg(
2893       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2894   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2895       .addReg(SrcReg);
2896   updateValueMap(I, ResultReg);
2897   return true;
2898 }
2899 
2900 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2901   MVT DestVT;
2902   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2903     return false;
2904   // Let regular ISEL handle FP16
2905   if (DestVT == MVT::f16)
2906     return false;
2907 
2908   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2909          "Unexpected value type.");
2910 
2911   unsigned SrcReg = getRegForValue(I->getOperand(0));
2912   if (!SrcReg)
2913     return false;
2914   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2915 
2916   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2917 
2918   // Handle sign-extension.
2919   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2920     SrcReg =
2921         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2922     if (!SrcReg)
2923       return false;
2924     SrcIsKill = true;
2925   }
2926 
2927   unsigned Opc;
2928   if (SrcVT == MVT::i64) {
2929     if (Signed)
2930       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2931     else
2932       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2933   } else {
2934     if (Signed)
2935       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2936     else
2937       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2938   }
2939 
2940   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2941                                       SrcIsKill);
2942   updateValueMap(I, ResultReg);
2943   return true;
2944 }
2945 
2946 bool AArch64FastISel::fastLowerArguments() {
2947   if (!FuncInfo.CanLowerReturn)
2948     return false;
2949 
2950   const Function *F = FuncInfo.Fn;
2951   if (F->isVarArg())
2952     return false;
2953 
2954   CallingConv::ID CC = F->getCallingConv();
2955   if (CC != CallingConv::C && CC != CallingConv::Swift)
2956     return false;
2957 
2958   if (Subtarget->hasCustomCallingConv())
2959     return false;
2960 
2961   // Only handle simple cases of up to 8 GPR and FPR each.
2962   unsigned GPRCnt = 0;
2963   unsigned FPRCnt = 0;
2964   for (auto const &Arg : F->args()) {
2965     if (Arg.hasAttribute(Attribute::ByVal) ||
2966         Arg.hasAttribute(Attribute::InReg) ||
2967         Arg.hasAttribute(Attribute::StructRet) ||
2968         Arg.hasAttribute(Attribute::SwiftSelf) ||
2969         Arg.hasAttribute(Attribute::SwiftError) ||
2970         Arg.hasAttribute(Attribute::Nest))
2971       return false;
2972 
2973     Type *ArgTy = Arg.getType();
2974     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2975       return false;
2976 
2977     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2978     if (!ArgVT.isSimple())
2979       return false;
2980 
2981     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2982     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2983       return false;
2984 
2985     if (VT.isVector() &&
2986         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2987       return false;
2988 
2989     if (VT >= MVT::i1 && VT <= MVT::i64)
2990       ++GPRCnt;
2991     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2992              VT.is128BitVector())
2993       ++FPRCnt;
2994     else
2995       return false;
2996 
2997     if (GPRCnt > 8 || FPRCnt > 8)
2998       return false;
2999   }
3000 
3001   static const MCPhysReg Registers[6][8] = {
3002     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
3003       AArch64::W5, AArch64::W6, AArch64::W7 },
3004     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
3005       AArch64::X5, AArch64::X6, AArch64::X7 },
3006     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
3007       AArch64::H5, AArch64::H6, AArch64::H7 },
3008     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
3009       AArch64::S5, AArch64::S6, AArch64::S7 },
3010     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
3011       AArch64::D5, AArch64::D6, AArch64::D7 },
3012     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
3013       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
3014   };
3015 
3016   unsigned GPRIdx = 0;
3017   unsigned FPRIdx = 0;
3018   for (auto const &Arg : F->args()) {
3019     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3020     unsigned SrcReg;
3021     const TargetRegisterClass *RC;
3022     if (VT >= MVT::i1 && VT <= MVT::i32) {
3023       SrcReg = Registers[0][GPRIdx++];
3024       RC = &AArch64::GPR32RegClass;
3025       VT = MVT::i32;
3026     } else if (VT == MVT::i64) {
3027       SrcReg = Registers[1][GPRIdx++];
3028       RC = &AArch64::GPR64RegClass;
3029     } else if (VT == MVT::f16) {
3030       SrcReg = Registers[2][FPRIdx++];
3031       RC = &AArch64::FPR16RegClass;
3032     } else if (VT ==  MVT::f32) {
3033       SrcReg = Registers[3][FPRIdx++];
3034       RC = &AArch64::FPR32RegClass;
3035     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
3036       SrcReg = Registers[4][FPRIdx++];
3037       RC = &AArch64::FPR64RegClass;
3038     } else if (VT.is128BitVector()) {
3039       SrcReg = Registers[5][FPRIdx++];
3040       RC = &AArch64::FPR128RegClass;
3041     } else
3042       llvm_unreachable("Unexpected value type.");
3043 
3044     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3045     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3046     // Without this, EmitLiveInCopies may eliminate the livein if its only
3047     // use is a bitcast (which isn't turned into an instruction).
3048     unsigned ResultReg = createResultReg(RC);
3049     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3050             TII.get(TargetOpcode::COPY), ResultReg)
3051         .addReg(DstReg, getKillRegState(true));
3052     updateValueMap(&Arg, ResultReg);
3053   }
3054   return true;
3055 }
3056 
3057 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3058                                       SmallVectorImpl<MVT> &OutVTs,
3059                                       unsigned &NumBytes) {
3060   CallingConv::ID CC = CLI.CallConv;
3061   SmallVector<CCValAssign, 16> ArgLocs;
3062   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3063   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3064 
3065   // Get a count of how many bytes are to be pushed on the stack.
3066   NumBytes = CCInfo.getNextStackOffset();
3067 
3068   // Issue CALLSEQ_START
3069   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3070   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3071     .addImm(NumBytes).addImm(0);
3072 
3073   // Process the args.
3074   for (CCValAssign &VA : ArgLocs) {
3075     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3076     MVT ArgVT = OutVTs[VA.getValNo()];
3077 
3078     unsigned ArgReg = getRegForValue(ArgVal);
3079     if (!ArgReg)
3080       return false;
3081 
3082     // Handle arg promotion: SExt, ZExt, AExt.
3083     switch (VA.getLocInfo()) {
3084     case CCValAssign::Full:
3085       break;
3086     case CCValAssign::SExt: {
3087       MVT DestVT = VA.getLocVT();
3088       MVT SrcVT = ArgVT;
3089       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3090       if (!ArgReg)
3091         return false;
3092       break;
3093     }
3094     case CCValAssign::AExt:
3095     // Intentional fall-through.
3096     case CCValAssign::ZExt: {
3097       MVT DestVT = VA.getLocVT();
3098       MVT SrcVT = ArgVT;
3099       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3100       if (!ArgReg)
3101         return false;
3102       break;
3103     }
3104     default:
3105       llvm_unreachable("Unknown arg promotion!");
3106     }
3107 
3108     // Now copy/store arg to correct locations.
3109     if (VA.isRegLoc() && !VA.needsCustom()) {
3110       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3111               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3112       CLI.OutRegs.push_back(VA.getLocReg());
3113     } else if (VA.needsCustom()) {
3114       // FIXME: Handle custom args.
3115       return false;
3116     } else {
3117       assert(VA.isMemLoc() && "Assuming store on stack.");
3118 
3119       // Don't emit stores for undef values.
3120       if (isa<UndefValue>(ArgVal))
3121         continue;
3122 
3123       // Need to store on the stack.
3124       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3125 
3126       unsigned BEAlign = 0;
3127       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3128         BEAlign = 8 - ArgSize;
3129 
3130       Address Addr;
3131       Addr.setKind(Address::RegBase);
3132       Addr.setReg(AArch64::SP);
3133       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3134 
3135       Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3136       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3137           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3138           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3139 
3140       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3141         return false;
3142     }
3143   }
3144   return true;
3145 }
3146 
3147 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3148                                  unsigned NumBytes) {
3149   CallingConv::ID CC = CLI.CallConv;
3150 
3151   // Issue CALLSEQ_END
3152   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3153   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3154     .addImm(NumBytes).addImm(0);
3155 
3156   // Now the return value.
3157   if (RetVT != MVT::isVoid) {
3158     SmallVector<CCValAssign, 16> RVLocs;
3159     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3160     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3161 
3162     // Only handle a single return value.
3163     if (RVLocs.size() != 1)
3164       return false;
3165 
3166     // Copy all of the result registers out of their specified physreg.
3167     MVT CopyVT = RVLocs[0].getValVT();
3168 
3169     // TODO: Handle big-endian results
3170     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3171       return false;
3172 
3173     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3174     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3175             TII.get(TargetOpcode::COPY), ResultReg)
3176         .addReg(RVLocs[0].getLocReg());
3177     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3178 
3179     CLI.ResultReg = ResultReg;
3180     CLI.NumResultRegs = 1;
3181   }
3182 
3183   return true;
3184 }
3185 
3186 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3187   CallingConv::ID CC  = CLI.CallConv;
3188   bool IsTailCall     = CLI.IsTailCall;
3189   bool IsVarArg       = CLI.IsVarArg;
3190   const Value *Callee = CLI.Callee;
3191   MCSymbol *Symbol = CLI.Symbol;
3192 
3193   if (!Callee && !Symbol)
3194     return false;
3195 
3196   // Allow SelectionDAG isel to handle tail calls.
3197   if (IsTailCall)
3198     return false;
3199 
3200   // FIXME: we could and should support this, but for now correctness at -O0 is
3201   // more important.
3202   if (Subtarget->isTargetILP32())
3203     return false;
3204 
3205   CodeModel::Model CM = TM.getCodeModel();
3206   // Only support the small-addressing and large code models.
3207   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3208     return false;
3209 
3210   // FIXME: Add large code model support for ELF.
3211   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3212     return false;
3213 
3214   // Let SDISel handle vararg functions.
3215   if (IsVarArg)
3216     return false;
3217 
3218   // FIXME: Only handle *simple* calls for now.
3219   MVT RetVT;
3220   if (CLI.RetTy->isVoidTy())
3221     RetVT = MVT::isVoid;
3222   else if (!isTypeLegal(CLI.RetTy, RetVT))
3223     return false;
3224 
3225   for (auto Flag : CLI.OutFlags)
3226     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3227         Flag.isSwiftSelf() || Flag.isSwiftError())
3228       return false;
3229 
3230   // Set up the argument vectors.
3231   SmallVector<MVT, 16> OutVTs;
3232   OutVTs.reserve(CLI.OutVals.size());
3233 
3234   for (auto *Val : CLI.OutVals) {
3235     MVT VT;
3236     if (!isTypeLegal(Val->getType(), VT) &&
3237         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3238       return false;
3239 
3240     // We don't handle vector parameters yet.
3241     if (VT.isVector() || VT.getSizeInBits() > 64)
3242       return false;
3243 
3244     OutVTs.push_back(VT);
3245   }
3246 
3247   Address Addr;
3248   if (Callee && !computeCallAddress(Callee, Addr))
3249     return false;
3250 
3251   // The weak function target may be zero; in that case we must use indirect
3252   // addressing via a stub on windows as it may be out of range for a
3253   // PC-relative jump.
3254   if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3255       Addr.getGlobalValue()->hasExternalWeakLinkage())
3256     return false;
3257 
3258   // Handle the arguments now that we've gotten them.
3259   unsigned NumBytes;
3260   if (!processCallArgs(CLI, OutVTs, NumBytes))
3261     return false;
3262 
3263   const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3264   if (RegInfo->isAnyArgRegReserved(*MF))
3265     RegInfo->emitReservedArgRegCallError(*MF);
3266 
3267   // Issue the call.
3268   MachineInstrBuilder MIB;
3269   if (Subtarget->useSmallAddressing()) {
3270     const MCInstrDesc &II =
3271         TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3272     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3273     if (Symbol)
3274       MIB.addSym(Symbol, 0);
3275     else if (Addr.getGlobalValue())
3276       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3277     else if (Addr.getReg()) {
3278       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3279       MIB.addReg(Reg);
3280     } else
3281       return false;
3282   } else {
3283     unsigned CallReg = 0;
3284     if (Symbol) {
3285       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3286       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3287               ADRPReg)
3288           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3289 
3290       CallReg = createResultReg(&AArch64::GPR64RegClass);
3291       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3292               TII.get(AArch64::LDRXui), CallReg)
3293           .addReg(ADRPReg)
3294           .addSym(Symbol,
3295                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3296     } else if (Addr.getGlobalValue())
3297       CallReg = materializeGV(Addr.getGlobalValue());
3298     else if (Addr.getReg())
3299       CallReg = Addr.getReg();
3300 
3301     if (!CallReg)
3302       return false;
3303 
3304     const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3305     CallReg = constrainOperandRegClass(II, CallReg, 0);
3306     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3307   }
3308 
3309   // Add implicit physical register uses to the call.
3310   for (auto Reg : CLI.OutRegs)
3311     MIB.addReg(Reg, RegState::Implicit);
3312 
3313   // Add a register mask with the call-preserved registers.
3314   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3315   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3316 
3317   CLI.Call = MIB;
3318 
3319   // Finish off the call including any return values.
3320   return finishCall(CLI, RetVT, NumBytes);
3321 }
3322 
3323 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3324   if (Alignment)
3325     return Len / Alignment <= 4;
3326   else
3327     return Len < 32;
3328 }
3329 
3330 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3331                                          uint64_t Len, unsigned Alignment) {
3332   // Make sure we don't bloat code by inlining very large memcpy's.
3333   if (!isMemCpySmall(Len, Alignment))
3334     return false;
3335 
3336   int64_t UnscaledOffset = 0;
3337   Address OrigDest = Dest;
3338   Address OrigSrc = Src;
3339 
3340   while (Len) {
3341     MVT VT;
3342     if (!Alignment || Alignment >= 8) {
3343       if (Len >= 8)
3344         VT = MVT::i64;
3345       else if (Len >= 4)
3346         VT = MVT::i32;
3347       else if (Len >= 2)
3348         VT = MVT::i16;
3349       else {
3350         VT = MVT::i8;
3351       }
3352     } else {
3353       // Bound based on alignment.
3354       if (Len >= 4 && Alignment == 4)
3355         VT = MVT::i32;
3356       else if (Len >= 2 && Alignment == 2)
3357         VT = MVT::i16;
3358       else {
3359         VT = MVT::i8;
3360       }
3361     }
3362 
3363     unsigned ResultReg = emitLoad(VT, VT, Src);
3364     if (!ResultReg)
3365       return false;
3366 
3367     if (!emitStore(VT, ResultReg, Dest))
3368       return false;
3369 
3370     int64_t Size = VT.getSizeInBits() / 8;
3371     Len -= Size;
3372     UnscaledOffset += Size;
3373 
3374     // We need to recompute the unscaled offset for each iteration.
3375     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3376     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3377   }
3378 
3379   return true;
3380 }
3381 
3382 /// Check if it is possible to fold the condition from the XALU intrinsic
3383 /// into the user. The condition code will only be updated on success.
3384 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3385                                         const Instruction *I,
3386                                         const Value *Cond) {
3387   if (!isa<ExtractValueInst>(Cond))
3388     return false;
3389 
3390   const auto *EV = cast<ExtractValueInst>(Cond);
3391   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3392     return false;
3393 
3394   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3395   MVT RetVT;
3396   const Function *Callee = II->getCalledFunction();
3397   Type *RetTy =
3398   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3399   if (!isTypeLegal(RetTy, RetVT))
3400     return false;
3401 
3402   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3403     return false;
3404 
3405   const Value *LHS = II->getArgOperand(0);
3406   const Value *RHS = II->getArgOperand(1);
3407 
3408   // Canonicalize immediate to the RHS.
3409   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3410     std::swap(LHS, RHS);
3411 
3412   // Simplify multiplies.
3413   Intrinsic::ID IID = II->getIntrinsicID();
3414   switch (IID) {
3415   default:
3416     break;
3417   case Intrinsic::smul_with_overflow:
3418     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3419       if (C->getValue() == 2)
3420         IID = Intrinsic::sadd_with_overflow;
3421     break;
3422   case Intrinsic::umul_with_overflow:
3423     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3424       if (C->getValue() == 2)
3425         IID = Intrinsic::uadd_with_overflow;
3426     break;
3427   }
3428 
3429   AArch64CC::CondCode TmpCC;
3430   switch (IID) {
3431   default:
3432     return false;
3433   case Intrinsic::sadd_with_overflow:
3434   case Intrinsic::ssub_with_overflow:
3435     TmpCC = AArch64CC::VS;
3436     break;
3437   case Intrinsic::uadd_with_overflow:
3438     TmpCC = AArch64CC::HS;
3439     break;
3440   case Intrinsic::usub_with_overflow:
3441     TmpCC = AArch64CC::LO;
3442     break;
3443   case Intrinsic::smul_with_overflow:
3444   case Intrinsic::umul_with_overflow:
3445     TmpCC = AArch64CC::NE;
3446     break;
3447   }
3448 
3449   // Check if both instructions are in the same basic block.
3450   if (!isValueAvailable(II))
3451     return false;
3452 
3453   // Make sure nothing is in the way
3454   BasicBlock::const_iterator Start(I);
3455   BasicBlock::const_iterator End(II);
3456   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3457     // We only expect extractvalue instructions between the intrinsic and the
3458     // instruction to be selected.
3459     if (!isa<ExtractValueInst>(Itr))
3460       return false;
3461 
3462     // Check that the extractvalue operand comes from the intrinsic.
3463     const auto *EVI = cast<ExtractValueInst>(Itr);
3464     if (EVI->getAggregateOperand() != II)
3465       return false;
3466   }
3467 
3468   CC = TmpCC;
3469   return true;
3470 }
3471 
3472 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3473   // FIXME: Handle more intrinsics.
3474   switch (II->getIntrinsicID()) {
3475   default: return false;
3476   case Intrinsic::frameaddress: {
3477     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3478     MFI.setFrameAddressIsTaken(true);
3479 
3480     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3481     Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3482     Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3483     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3484             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3485     // Recursively load frame address
3486     // ldr x0, [fp]
3487     // ldr x0, [x0]
3488     // ldr x0, [x0]
3489     // ...
3490     unsigned DestReg;
3491     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3492     while (Depth--) {
3493       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3494                                 SrcReg, /*IsKill=*/true, 0);
3495       assert(DestReg && "Unexpected LDR instruction emission failure.");
3496       SrcReg = DestReg;
3497     }
3498 
3499     updateValueMap(II, SrcReg);
3500     return true;
3501   }
3502   case Intrinsic::sponentry: {
3503     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3504 
3505     // SP = FP + Fixed Object + 16
3506     int FI = MFI.CreateFixedObject(4, 0, false);
3507     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3508     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3509             TII.get(AArch64::ADDXri), ResultReg)
3510             .addFrameIndex(FI)
3511             .addImm(0)
3512             .addImm(0);
3513 
3514     updateValueMap(II, ResultReg);
3515     return true;
3516   }
3517   case Intrinsic::memcpy:
3518   case Intrinsic::memmove: {
3519     const auto *MTI = cast<MemTransferInst>(II);
3520     // Don't handle volatile.
3521     if (MTI->isVolatile())
3522       return false;
3523 
3524     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3525     // we would emit dead code because we don't currently handle memmoves.
3526     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3527     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3528       // Small memcpy's are common enough that we want to do them without a call
3529       // if possible.
3530       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3531       unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3532                                     MTI->getSourceAlignment());
3533       if (isMemCpySmall(Len, Alignment)) {
3534         Address Dest, Src;
3535         if (!computeAddress(MTI->getRawDest(), Dest) ||
3536             !computeAddress(MTI->getRawSource(), Src))
3537           return false;
3538         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3539           return true;
3540       }
3541     }
3542 
3543     if (!MTI->getLength()->getType()->isIntegerTy(64))
3544       return false;
3545 
3546     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3547       // Fast instruction selection doesn't support the special
3548       // address spaces.
3549       return false;
3550 
3551     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3552     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3553   }
3554   case Intrinsic::memset: {
3555     const MemSetInst *MSI = cast<MemSetInst>(II);
3556     // Don't handle volatile.
3557     if (MSI->isVolatile())
3558       return false;
3559 
3560     if (!MSI->getLength()->getType()->isIntegerTy(64))
3561       return false;
3562 
3563     if (MSI->getDestAddressSpace() > 255)
3564       // Fast instruction selection doesn't support the special
3565       // address spaces.
3566       return false;
3567 
3568     return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3569   }
3570   case Intrinsic::sin:
3571   case Intrinsic::cos:
3572   case Intrinsic::pow: {
3573     MVT RetVT;
3574     if (!isTypeLegal(II->getType(), RetVT))
3575       return false;
3576 
3577     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3578       return false;
3579 
3580     static const RTLIB::Libcall LibCallTable[3][2] = {
3581       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3582       { RTLIB::COS_F32, RTLIB::COS_F64 },
3583       { RTLIB::POW_F32, RTLIB::POW_F64 }
3584     };
3585     RTLIB::Libcall LC;
3586     bool Is64Bit = RetVT == MVT::f64;
3587     switch (II->getIntrinsicID()) {
3588     default:
3589       llvm_unreachable("Unexpected intrinsic.");
3590     case Intrinsic::sin:
3591       LC = LibCallTable[0][Is64Bit];
3592       break;
3593     case Intrinsic::cos:
3594       LC = LibCallTable[1][Is64Bit];
3595       break;
3596     case Intrinsic::pow:
3597       LC = LibCallTable[2][Is64Bit];
3598       break;
3599     }
3600 
3601     ArgListTy Args;
3602     Args.reserve(II->getNumArgOperands());
3603 
3604     // Populate the argument list.
3605     for (auto &Arg : II->arg_operands()) {
3606       ArgListEntry Entry;
3607       Entry.Val = Arg;
3608       Entry.Ty = Arg->getType();
3609       Args.push_back(Entry);
3610     }
3611 
3612     CallLoweringInfo CLI;
3613     MCContext &Ctx = MF->getContext();
3614     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3615                   TLI.getLibcallName(LC), std::move(Args));
3616     if (!lowerCallTo(CLI))
3617       return false;
3618     updateValueMap(II, CLI.ResultReg);
3619     return true;
3620   }
3621   case Intrinsic::fabs: {
3622     MVT VT;
3623     if (!isTypeLegal(II->getType(), VT))
3624       return false;
3625 
3626     unsigned Opc;
3627     switch (VT.SimpleTy) {
3628     default:
3629       return false;
3630     case MVT::f32:
3631       Opc = AArch64::FABSSr;
3632       break;
3633     case MVT::f64:
3634       Opc = AArch64::FABSDr;
3635       break;
3636     }
3637     unsigned SrcReg = getRegForValue(II->getOperand(0));
3638     if (!SrcReg)
3639       return false;
3640     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3641     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3642     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3643       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3644     updateValueMap(II, ResultReg);
3645     return true;
3646   }
3647   case Intrinsic::trap:
3648     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3649         .addImm(1);
3650     return true;
3651   case Intrinsic::debugtrap:
3652     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3653         .addImm(0xF000);
3654     return true;
3655 
3656   case Intrinsic::sqrt: {
3657     Type *RetTy = II->getCalledFunction()->getReturnType();
3658 
3659     MVT VT;
3660     if (!isTypeLegal(RetTy, VT))
3661       return false;
3662 
3663     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3664     if (!Op0Reg)
3665       return false;
3666     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3667 
3668     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3669     if (!ResultReg)
3670       return false;
3671 
3672     updateValueMap(II, ResultReg);
3673     return true;
3674   }
3675   case Intrinsic::sadd_with_overflow:
3676   case Intrinsic::uadd_with_overflow:
3677   case Intrinsic::ssub_with_overflow:
3678   case Intrinsic::usub_with_overflow:
3679   case Intrinsic::smul_with_overflow:
3680   case Intrinsic::umul_with_overflow: {
3681     // This implements the basic lowering of the xalu with overflow intrinsics.
3682     const Function *Callee = II->getCalledFunction();
3683     auto *Ty = cast<StructType>(Callee->getReturnType());
3684     Type *RetTy = Ty->getTypeAtIndex(0U);
3685 
3686     MVT VT;
3687     if (!isTypeLegal(RetTy, VT))
3688       return false;
3689 
3690     if (VT != MVT::i32 && VT != MVT::i64)
3691       return false;
3692 
3693     const Value *LHS = II->getArgOperand(0);
3694     const Value *RHS = II->getArgOperand(1);
3695     // Canonicalize immediate to the RHS.
3696     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3697       std::swap(LHS, RHS);
3698 
3699     // Simplify multiplies.
3700     Intrinsic::ID IID = II->getIntrinsicID();
3701     switch (IID) {
3702     default:
3703       break;
3704     case Intrinsic::smul_with_overflow:
3705       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3706         if (C->getValue() == 2) {
3707           IID = Intrinsic::sadd_with_overflow;
3708           RHS = LHS;
3709         }
3710       break;
3711     case Intrinsic::umul_with_overflow:
3712       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3713         if (C->getValue() == 2) {
3714           IID = Intrinsic::uadd_with_overflow;
3715           RHS = LHS;
3716         }
3717       break;
3718     }
3719 
3720     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3721     AArch64CC::CondCode CC = AArch64CC::Invalid;
3722     switch (IID) {
3723     default: llvm_unreachable("Unexpected intrinsic!");
3724     case Intrinsic::sadd_with_overflow:
3725       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3726       CC = AArch64CC::VS;
3727       break;
3728     case Intrinsic::uadd_with_overflow:
3729       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3730       CC = AArch64CC::HS;
3731       break;
3732     case Intrinsic::ssub_with_overflow:
3733       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3734       CC = AArch64CC::VS;
3735       break;
3736     case Intrinsic::usub_with_overflow:
3737       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3738       CC = AArch64CC::LO;
3739       break;
3740     case Intrinsic::smul_with_overflow: {
3741       CC = AArch64CC::NE;
3742       unsigned LHSReg = getRegForValue(LHS);
3743       if (!LHSReg)
3744         return false;
3745       bool LHSIsKill = hasTrivialKill(LHS);
3746 
3747       unsigned RHSReg = getRegForValue(RHS);
3748       if (!RHSReg)
3749         return false;
3750       bool RHSIsKill = hasTrivialKill(RHS);
3751 
3752       if (VT == MVT::i32) {
3753         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3754         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3755                                        /*IsKill=*/false, 32);
3756         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3757                                             AArch64::sub_32);
3758         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3759                                               AArch64::sub_32);
3760         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3761                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3762       } else {
3763         assert(VT == MVT::i64 && "Unexpected value type.");
3764         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3765         // reused in the next instruction.
3766         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3767                             /*IsKill=*/false);
3768         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3769                                         RHSReg, RHSIsKill);
3770         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3771                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3772       }
3773       break;
3774     }
3775     case Intrinsic::umul_with_overflow: {
3776       CC = AArch64CC::NE;
3777       unsigned LHSReg = getRegForValue(LHS);
3778       if (!LHSReg)
3779         return false;
3780       bool LHSIsKill = hasTrivialKill(LHS);
3781 
3782       unsigned RHSReg = getRegForValue(RHS);
3783       if (!RHSReg)
3784         return false;
3785       bool RHSIsKill = hasTrivialKill(RHS);
3786 
3787       if (VT == MVT::i32) {
3788         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3789         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3790                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3791                     /*WantResult=*/false);
3792         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3793                                             AArch64::sub_32);
3794       } else {
3795         assert(VT == MVT::i64 && "Unexpected value type.");
3796         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3797         // reused in the next instruction.
3798         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3799                             /*IsKill=*/false);
3800         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3801                                         RHSReg, RHSIsKill);
3802         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3803                     /*IsKill=*/false, /*WantResult=*/false);
3804       }
3805       break;
3806     }
3807     }
3808 
3809     if (MulReg) {
3810       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3811       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3812               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3813     }
3814 
3815     if (!ResultReg1)
3816       return false;
3817 
3818     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3819                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3820                                   /*IsKill=*/true, getInvertedCondCode(CC));
3821     (void)ResultReg2;
3822     assert((ResultReg1 + 1) == ResultReg2 &&
3823            "Nonconsecutive result registers.");
3824     updateValueMap(II, ResultReg1, 2);
3825     return true;
3826   }
3827   }
3828   return false;
3829 }
3830 
3831 bool AArch64FastISel::selectRet(const Instruction *I) {
3832   const ReturnInst *Ret = cast<ReturnInst>(I);
3833   const Function &F = *I->getParent()->getParent();
3834 
3835   if (!FuncInfo.CanLowerReturn)
3836     return false;
3837 
3838   if (F.isVarArg())
3839     return false;
3840 
3841   if (TLI.supportSwiftError() &&
3842       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3843     return false;
3844 
3845   if (TLI.supportSplitCSR(FuncInfo.MF))
3846     return false;
3847 
3848   // Build a list of return value registers.
3849   SmallVector<unsigned, 4> RetRegs;
3850 
3851   if (Ret->getNumOperands() > 0) {
3852     CallingConv::ID CC = F.getCallingConv();
3853     SmallVector<ISD::OutputArg, 4> Outs;
3854     GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3855 
3856     // Analyze operands of the call, assigning locations to each operand.
3857     SmallVector<CCValAssign, 16> ValLocs;
3858     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3859     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3860                                                      : RetCC_AArch64_AAPCS;
3861     CCInfo.AnalyzeReturn(Outs, RetCC);
3862 
3863     // Only handle a single return value for now.
3864     if (ValLocs.size() != 1)
3865       return false;
3866 
3867     CCValAssign &VA = ValLocs[0];
3868     const Value *RV = Ret->getOperand(0);
3869 
3870     // Don't bother handling odd stuff for now.
3871     if ((VA.getLocInfo() != CCValAssign::Full) &&
3872         (VA.getLocInfo() != CCValAssign::BCvt))
3873       return false;
3874 
3875     // Only handle register returns for now.
3876     if (!VA.isRegLoc())
3877       return false;
3878 
3879     unsigned Reg = getRegForValue(RV);
3880     if (Reg == 0)
3881       return false;
3882 
3883     unsigned SrcReg = Reg + VA.getValNo();
3884     Register DestReg = VA.getLocReg();
3885     // Avoid a cross-class copy. This is very unlikely.
3886     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3887       return false;
3888 
3889     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3890     if (!RVEVT.isSimple())
3891       return false;
3892 
3893     // Vectors (of > 1 lane) in big endian need tricky handling.
3894     if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3895         !Subtarget->isLittleEndian())
3896       return false;
3897 
3898     MVT RVVT = RVEVT.getSimpleVT();
3899     if (RVVT == MVT::f128)
3900       return false;
3901 
3902     MVT DestVT = VA.getValVT();
3903     // Special handling for extended integers.
3904     if (RVVT != DestVT) {
3905       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3906         return false;
3907 
3908       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3909         return false;
3910 
3911       bool IsZExt = Outs[0].Flags.isZExt();
3912       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3913       if (SrcReg == 0)
3914         return false;
3915     }
3916 
3917     // "Callee" (i.e. value producer) zero extends pointers at function
3918     // boundary.
3919     if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3920       SrcReg = emitAnd_ri(MVT::i64, SrcReg, false, 0xffffffff);
3921 
3922     // Make the copy.
3923     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3924             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3925 
3926     // Add register to return instruction.
3927     RetRegs.push_back(VA.getLocReg());
3928   }
3929 
3930   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3931                                     TII.get(AArch64::RET_ReallyLR));
3932   for (unsigned RetReg : RetRegs)
3933     MIB.addReg(RetReg, RegState::Implicit);
3934   return true;
3935 }
3936 
3937 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3938   Type *DestTy = I->getType();
3939   Value *Op = I->getOperand(0);
3940   Type *SrcTy = Op->getType();
3941 
3942   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3943   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3944   if (!SrcEVT.isSimple())
3945     return false;
3946   if (!DestEVT.isSimple())
3947     return false;
3948 
3949   MVT SrcVT = SrcEVT.getSimpleVT();
3950   MVT DestVT = DestEVT.getSimpleVT();
3951 
3952   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3953       SrcVT != MVT::i8)
3954     return false;
3955   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3956       DestVT != MVT::i1)
3957     return false;
3958 
3959   unsigned SrcReg = getRegForValue(Op);
3960   if (!SrcReg)
3961     return false;
3962   bool SrcIsKill = hasTrivialKill(Op);
3963 
3964   // If we're truncating from i64 to a smaller non-legal type then generate an
3965   // AND. Otherwise, we know the high bits are undefined and a truncate only
3966   // generate a COPY. We cannot mark the source register also as result
3967   // register, because this can incorrectly transfer the kill flag onto the
3968   // source register.
3969   unsigned ResultReg;
3970   if (SrcVT == MVT::i64) {
3971     uint64_t Mask = 0;
3972     switch (DestVT.SimpleTy) {
3973     default:
3974       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3975       return false;
3976     case MVT::i1:
3977       Mask = 0x1;
3978       break;
3979     case MVT::i8:
3980       Mask = 0xff;
3981       break;
3982     case MVT::i16:
3983       Mask = 0xffff;
3984       break;
3985     }
3986     // Issue an extract_subreg to get the lower 32-bits.
3987     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3988                                                 AArch64::sub_32);
3989     // Create the AND instruction which performs the actual truncation.
3990     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3991     assert(ResultReg && "Unexpected AND instruction emission failure.");
3992   } else {
3993     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3994     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3995             TII.get(TargetOpcode::COPY), ResultReg)
3996         .addReg(SrcReg, getKillRegState(SrcIsKill));
3997   }
3998 
3999   updateValueMap(I, ResultReg);
4000   return true;
4001 }
4002 
4003 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4004   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4005           DestVT == MVT::i64) &&
4006          "Unexpected value type.");
4007   // Handle i8 and i16 as i32.
4008   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4009     DestVT = MVT::i32;
4010 
4011   if (IsZExt) {
4012     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
4013     assert(ResultReg && "Unexpected AND instruction emission failure.");
4014     if (DestVT == MVT::i64) {
4015       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
4016       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
4017       Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4018       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4019               TII.get(AArch64::SUBREG_TO_REG), Reg64)
4020           .addImm(0)
4021           .addReg(ResultReg)
4022           .addImm(AArch64::sub_32);
4023       ResultReg = Reg64;
4024     }
4025     return ResultReg;
4026   } else {
4027     if (DestVT == MVT::i64) {
4028       // FIXME: We're SExt i1 to i64.
4029       return 0;
4030     }
4031     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4032                             /*TODO:IsKill=*/false, 0, 0);
4033   }
4034 }
4035 
4036 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4037                                       unsigned Op1, bool Op1IsKill) {
4038   unsigned Opc, ZReg;
4039   switch (RetVT.SimpleTy) {
4040   default: return 0;
4041   case MVT::i8:
4042   case MVT::i16:
4043   case MVT::i32:
4044     RetVT = MVT::i32;
4045     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4046   case MVT::i64:
4047     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4048   }
4049 
4050   const TargetRegisterClass *RC =
4051       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4052   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
4053                           /*IsKill=*/ZReg, true);
4054 }
4055 
4056 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4057                                         unsigned Op1, bool Op1IsKill) {
4058   if (RetVT != MVT::i64)
4059     return 0;
4060 
4061   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4062                           Op0, Op0IsKill, Op1, Op1IsKill,
4063                           AArch64::XZR, /*IsKill=*/true);
4064 }
4065 
4066 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4067                                         unsigned Op1, bool Op1IsKill) {
4068   if (RetVT != MVT::i64)
4069     return 0;
4070 
4071   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4072                           Op0, Op0IsKill, Op1, Op1IsKill,
4073                           AArch64::XZR, /*IsKill=*/true);
4074 }
4075 
4076 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4077                                      unsigned Op1Reg, bool Op1IsKill) {
4078   unsigned Opc = 0;
4079   bool NeedTrunc = false;
4080   uint64_t Mask = 0;
4081   switch (RetVT.SimpleTy) {
4082   default: return 0;
4083   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4084   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4085   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4086   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4087   }
4088 
4089   const TargetRegisterClass *RC =
4090       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4091   if (NeedTrunc) {
4092     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4093     Op1IsKill = true;
4094   }
4095   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4096                                        Op1IsKill);
4097   if (NeedTrunc)
4098     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4099   return ResultReg;
4100 }
4101 
4102 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4103                                      bool Op0IsKill, uint64_t Shift,
4104                                      bool IsZExt) {
4105   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4106          "Unexpected source/return type pair.");
4107   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4108           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4109          "Unexpected source value type.");
4110   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4111           RetVT == MVT::i64) && "Unexpected return value type.");
4112 
4113   bool Is64Bit = (RetVT == MVT::i64);
4114   unsigned RegSize = Is64Bit ? 64 : 32;
4115   unsigned DstBits = RetVT.getSizeInBits();
4116   unsigned SrcBits = SrcVT.getSizeInBits();
4117   const TargetRegisterClass *RC =
4118       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4119 
4120   // Just emit a copy for "zero" shifts.
4121   if (Shift == 0) {
4122     if (RetVT == SrcVT) {
4123       unsigned ResultReg = createResultReg(RC);
4124       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4125               TII.get(TargetOpcode::COPY), ResultReg)
4126           .addReg(Op0, getKillRegState(Op0IsKill));
4127       return ResultReg;
4128     } else
4129       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4130   }
4131 
4132   // Don't deal with undefined shifts.
4133   if (Shift >= DstBits)
4134     return 0;
4135 
4136   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4137   // {S|U}BFM Wd, Wn, #r, #s
4138   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4139 
4140   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4141   // %2 = shl i16 %1, 4
4142   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4143   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4144   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4145   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4146 
4147   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4148   // %2 = shl i16 %1, 8
4149   // Wd<32+7-24,32-24> = Wn<7:0>
4150   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4151   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4152   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4153 
4154   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4155   // %2 = shl i16 %1, 12
4156   // Wd<32+3-20,32-20> = Wn<3:0>
4157   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4158   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4159   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4160 
4161   unsigned ImmR = RegSize - Shift;
4162   // Limit the width to the length of the source type.
4163   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4164   static const unsigned OpcTable[2][2] = {
4165     {AArch64::SBFMWri, AArch64::SBFMXri},
4166     {AArch64::UBFMWri, AArch64::UBFMXri}
4167   };
4168   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4169   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4170     Register TmpReg = MRI.createVirtualRegister(RC);
4171     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4172             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4173         .addImm(0)
4174         .addReg(Op0, getKillRegState(Op0IsKill))
4175         .addImm(AArch64::sub_32);
4176     Op0 = TmpReg;
4177     Op0IsKill = true;
4178   }
4179   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4180 }
4181 
4182 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4183                                      unsigned Op1Reg, bool Op1IsKill) {
4184   unsigned Opc = 0;
4185   bool NeedTrunc = false;
4186   uint64_t Mask = 0;
4187   switch (RetVT.SimpleTy) {
4188   default: return 0;
4189   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4190   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4191   case MVT::i32: Opc = AArch64::LSRVWr; break;
4192   case MVT::i64: Opc = AArch64::LSRVXr; break;
4193   }
4194 
4195   const TargetRegisterClass *RC =
4196       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4197   if (NeedTrunc) {
4198     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4199     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4200     Op0IsKill = Op1IsKill = true;
4201   }
4202   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4203                                        Op1IsKill);
4204   if (NeedTrunc)
4205     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4206   return ResultReg;
4207 }
4208 
4209 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4210                                      bool Op0IsKill, uint64_t Shift,
4211                                      bool IsZExt) {
4212   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4213          "Unexpected source/return type pair.");
4214   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4215           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4216          "Unexpected source value type.");
4217   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4218           RetVT == MVT::i64) && "Unexpected return value type.");
4219 
4220   bool Is64Bit = (RetVT == MVT::i64);
4221   unsigned RegSize = Is64Bit ? 64 : 32;
4222   unsigned DstBits = RetVT.getSizeInBits();
4223   unsigned SrcBits = SrcVT.getSizeInBits();
4224   const TargetRegisterClass *RC =
4225       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4226 
4227   // Just emit a copy for "zero" shifts.
4228   if (Shift == 0) {
4229     if (RetVT == SrcVT) {
4230       unsigned ResultReg = createResultReg(RC);
4231       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4232               TII.get(TargetOpcode::COPY), ResultReg)
4233       .addReg(Op0, getKillRegState(Op0IsKill));
4234       return ResultReg;
4235     } else
4236       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4237   }
4238 
4239   // Don't deal with undefined shifts.
4240   if (Shift >= DstBits)
4241     return 0;
4242 
4243   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4244   // {S|U}BFM Wd, Wn, #r, #s
4245   // Wd<s-r:0> = Wn<s:r> when r <= s
4246 
4247   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4248   // %2 = lshr i16 %1, 4
4249   // Wd<7-4:0> = Wn<7:4>
4250   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4251   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4252   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4253 
4254   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4255   // %2 = lshr i16 %1, 8
4256   // Wd<7-7,0> = Wn<7:7>
4257   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4258   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4259   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4260 
4261   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4262   // %2 = lshr i16 %1, 12
4263   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4264   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4265   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4266   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4267 
4268   if (Shift >= SrcBits && IsZExt)
4269     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4270 
4271   // It is not possible to fold a sign-extend into the LShr instruction. In this
4272   // case emit a sign-extend.
4273   if (!IsZExt) {
4274     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4275     if (!Op0)
4276       return 0;
4277     Op0IsKill = true;
4278     SrcVT = RetVT;
4279     SrcBits = SrcVT.getSizeInBits();
4280     IsZExt = true;
4281   }
4282 
4283   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4284   unsigned ImmS = SrcBits - 1;
4285   static const unsigned OpcTable[2][2] = {
4286     {AArch64::SBFMWri, AArch64::SBFMXri},
4287     {AArch64::UBFMWri, AArch64::UBFMXri}
4288   };
4289   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4290   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4291     Register TmpReg = MRI.createVirtualRegister(RC);
4292     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4293             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4294         .addImm(0)
4295         .addReg(Op0, getKillRegState(Op0IsKill))
4296         .addImm(AArch64::sub_32);
4297     Op0 = TmpReg;
4298     Op0IsKill = true;
4299   }
4300   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4301 }
4302 
4303 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4304                                      unsigned Op1Reg, bool Op1IsKill) {
4305   unsigned Opc = 0;
4306   bool NeedTrunc = false;
4307   uint64_t Mask = 0;
4308   switch (RetVT.SimpleTy) {
4309   default: return 0;
4310   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4311   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4312   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4313   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4314   }
4315 
4316   const TargetRegisterClass *RC =
4317       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4318   if (NeedTrunc) {
4319     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4320     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4321     Op0IsKill = Op1IsKill = true;
4322   }
4323   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4324                                        Op1IsKill);
4325   if (NeedTrunc)
4326     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4327   return ResultReg;
4328 }
4329 
4330 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4331                                      bool Op0IsKill, uint64_t Shift,
4332                                      bool IsZExt) {
4333   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4334          "Unexpected source/return type pair.");
4335   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4336           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4337          "Unexpected source value type.");
4338   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4339           RetVT == MVT::i64) && "Unexpected return value type.");
4340 
4341   bool Is64Bit = (RetVT == MVT::i64);
4342   unsigned RegSize = Is64Bit ? 64 : 32;
4343   unsigned DstBits = RetVT.getSizeInBits();
4344   unsigned SrcBits = SrcVT.getSizeInBits();
4345   const TargetRegisterClass *RC =
4346       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4347 
4348   // Just emit a copy for "zero" shifts.
4349   if (Shift == 0) {
4350     if (RetVT == SrcVT) {
4351       unsigned ResultReg = createResultReg(RC);
4352       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4353               TII.get(TargetOpcode::COPY), ResultReg)
4354       .addReg(Op0, getKillRegState(Op0IsKill));
4355       return ResultReg;
4356     } else
4357       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4358   }
4359 
4360   // Don't deal with undefined shifts.
4361   if (Shift >= DstBits)
4362     return 0;
4363 
4364   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4365   // {S|U}BFM Wd, Wn, #r, #s
4366   // Wd<s-r:0> = Wn<s:r> when r <= s
4367 
4368   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4369   // %2 = ashr i16 %1, 4
4370   // Wd<7-4:0> = Wn<7:4>
4371   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4372   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4373   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4374 
4375   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4376   // %2 = ashr i16 %1, 8
4377   // Wd<7-7,0> = Wn<7:7>
4378   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4379   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4380   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4381 
4382   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4383   // %2 = ashr i16 %1, 12
4384   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4385   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4386   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4387   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4388 
4389   if (Shift >= SrcBits && IsZExt)
4390     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4391 
4392   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4393   unsigned ImmS = SrcBits - 1;
4394   static const unsigned OpcTable[2][2] = {
4395     {AArch64::SBFMWri, AArch64::SBFMXri},
4396     {AArch64::UBFMWri, AArch64::UBFMXri}
4397   };
4398   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4399   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4400     Register TmpReg = MRI.createVirtualRegister(RC);
4401     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4402             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4403         .addImm(0)
4404         .addReg(Op0, getKillRegState(Op0IsKill))
4405         .addImm(AArch64::sub_32);
4406     Op0 = TmpReg;
4407     Op0IsKill = true;
4408   }
4409   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4410 }
4411 
4412 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4413                                      bool IsZExt) {
4414   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4415 
4416   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4417   // DestVT are odd things, so test to make sure that they are both types we can
4418   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4419   // bail out to SelectionDAG.
4420   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4421        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4422       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4423        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4424     return 0;
4425 
4426   unsigned Opc;
4427   unsigned Imm = 0;
4428 
4429   switch (SrcVT.SimpleTy) {
4430   default:
4431     return 0;
4432   case MVT::i1:
4433     return emiti1Ext(SrcReg, DestVT, IsZExt);
4434   case MVT::i8:
4435     if (DestVT == MVT::i64)
4436       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4437     else
4438       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4439     Imm = 7;
4440     break;
4441   case MVT::i16:
4442     if (DestVT == MVT::i64)
4443       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4444     else
4445       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4446     Imm = 15;
4447     break;
4448   case MVT::i32:
4449     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4450     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4451     Imm = 31;
4452     break;
4453   }
4454 
4455   // Handle i8 and i16 as i32.
4456   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4457     DestVT = MVT::i32;
4458   else if (DestVT == MVT::i64) {
4459     Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4460     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4461             TII.get(AArch64::SUBREG_TO_REG), Src64)
4462         .addImm(0)
4463         .addReg(SrcReg)
4464         .addImm(AArch64::sub_32);
4465     SrcReg = Src64;
4466   }
4467 
4468   const TargetRegisterClass *RC =
4469       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4470   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4471 }
4472 
4473 static bool isZExtLoad(const MachineInstr *LI) {
4474   switch (LI->getOpcode()) {
4475   default:
4476     return false;
4477   case AArch64::LDURBBi:
4478   case AArch64::LDURHHi:
4479   case AArch64::LDURWi:
4480   case AArch64::LDRBBui:
4481   case AArch64::LDRHHui:
4482   case AArch64::LDRWui:
4483   case AArch64::LDRBBroX:
4484   case AArch64::LDRHHroX:
4485   case AArch64::LDRWroX:
4486   case AArch64::LDRBBroW:
4487   case AArch64::LDRHHroW:
4488   case AArch64::LDRWroW:
4489     return true;
4490   }
4491 }
4492 
4493 static bool isSExtLoad(const MachineInstr *LI) {
4494   switch (LI->getOpcode()) {
4495   default:
4496     return false;
4497   case AArch64::LDURSBWi:
4498   case AArch64::LDURSHWi:
4499   case AArch64::LDURSBXi:
4500   case AArch64::LDURSHXi:
4501   case AArch64::LDURSWi:
4502   case AArch64::LDRSBWui:
4503   case AArch64::LDRSHWui:
4504   case AArch64::LDRSBXui:
4505   case AArch64::LDRSHXui:
4506   case AArch64::LDRSWui:
4507   case AArch64::LDRSBWroX:
4508   case AArch64::LDRSHWroX:
4509   case AArch64::LDRSBXroX:
4510   case AArch64::LDRSHXroX:
4511   case AArch64::LDRSWroX:
4512   case AArch64::LDRSBWroW:
4513   case AArch64::LDRSHWroW:
4514   case AArch64::LDRSBXroW:
4515   case AArch64::LDRSHXroW:
4516   case AArch64::LDRSWroW:
4517     return true;
4518   }
4519 }
4520 
4521 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4522                                          MVT SrcVT) {
4523   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4524   if (!LI || !LI->hasOneUse())
4525     return false;
4526 
4527   // Check if the load instruction has already been selected.
4528   unsigned Reg = lookUpRegForValue(LI);
4529   if (!Reg)
4530     return false;
4531 
4532   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4533   if (!MI)
4534     return false;
4535 
4536   // Check if the correct load instruction has been emitted - SelectionDAG might
4537   // have emitted a zero-extending load, but we need a sign-extending load.
4538   bool IsZExt = isa<ZExtInst>(I);
4539   const auto *LoadMI = MI;
4540   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4541       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4542     Register LoadReg = MI->getOperand(1).getReg();
4543     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4544     assert(LoadMI && "Expected valid instruction");
4545   }
4546   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4547     return false;
4548 
4549   // Nothing to be done.
4550   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4551     updateValueMap(I, Reg);
4552     return true;
4553   }
4554 
4555   if (IsZExt) {
4556     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4557     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4558             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4559         .addImm(0)
4560         .addReg(Reg, getKillRegState(true))
4561         .addImm(AArch64::sub_32);
4562     Reg = Reg64;
4563   } else {
4564     assert((MI->getOpcode() == TargetOpcode::COPY &&
4565             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4566            "Expected copy instruction");
4567     Reg = MI->getOperand(1).getReg();
4568     MachineBasicBlock::iterator I(MI);
4569     removeDeadCode(I, std::next(I));
4570   }
4571   updateValueMap(I, Reg);
4572   return true;
4573 }
4574 
4575 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4576   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4577          "Unexpected integer extend instruction.");
4578   MVT RetVT;
4579   MVT SrcVT;
4580   if (!isTypeSupported(I->getType(), RetVT))
4581     return false;
4582 
4583   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4584     return false;
4585 
4586   // Try to optimize already sign-/zero-extended values from load instructions.
4587   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4588     return true;
4589 
4590   unsigned SrcReg = getRegForValue(I->getOperand(0));
4591   if (!SrcReg)
4592     return false;
4593   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4594 
4595   // Try to optimize already sign-/zero-extended values from function arguments.
4596   bool IsZExt = isa<ZExtInst>(I);
4597   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4598     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4599       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4600         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4601         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4602                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4603             .addImm(0)
4604             .addReg(SrcReg, getKillRegState(SrcIsKill))
4605             .addImm(AArch64::sub_32);
4606         SrcReg = ResultReg;
4607       }
4608       // Conservatively clear all kill flags from all uses, because we are
4609       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4610       // level. The result of the instruction at IR level might have been
4611       // trivially dead, which is now not longer true.
4612       unsigned UseReg = lookUpRegForValue(I);
4613       if (UseReg)
4614         MRI.clearKillFlags(UseReg);
4615 
4616       updateValueMap(I, SrcReg);
4617       return true;
4618     }
4619   }
4620 
4621   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4622   if (!ResultReg)
4623     return false;
4624 
4625   updateValueMap(I, ResultReg);
4626   return true;
4627 }
4628 
4629 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4630   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4631   if (!DestEVT.isSimple())
4632     return false;
4633 
4634   MVT DestVT = DestEVT.getSimpleVT();
4635   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4636     return false;
4637 
4638   unsigned DivOpc;
4639   bool Is64bit = (DestVT == MVT::i64);
4640   switch (ISDOpcode) {
4641   default:
4642     return false;
4643   case ISD::SREM:
4644     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4645     break;
4646   case ISD::UREM:
4647     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4648     break;
4649   }
4650   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4651   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4652   if (!Src0Reg)
4653     return false;
4654   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4655 
4656   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4657   if (!Src1Reg)
4658     return false;
4659   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4660 
4661   const TargetRegisterClass *RC =
4662       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4663   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4664                                      Src1Reg, /*IsKill=*/false);
4665   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4666   // The remainder is computed as numerator - (quotient * denominator) using the
4667   // MSUB instruction.
4668   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4669                                         Src1Reg, Src1IsKill, Src0Reg,
4670                                         Src0IsKill);
4671   updateValueMap(I, ResultReg);
4672   return true;
4673 }
4674 
4675 bool AArch64FastISel::selectMul(const Instruction *I) {
4676   MVT VT;
4677   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4678     return false;
4679 
4680   if (VT.isVector())
4681     return selectBinaryOp(I, ISD::MUL);
4682 
4683   const Value *Src0 = I->getOperand(0);
4684   const Value *Src1 = I->getOperand(1);
4685   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4686     if (C->getValue().isPowerOf2())
4687       std::swap(Src0, Src1);
4688 
4689   // Try to simplify to a shift instruction.
4690   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4691     if (C->getValue().isPowerOf2()) {
4692       uint64_t ShiftVal = C->getValue().logBase2();
4693       MVT SrcVT = VT;
4694       bool IsZExt = true;
4695       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4696         if (!isIntExtFree(ZExt)) {
4697           MVT VT;
4698           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4699             SrcVT = VT;
4700             IsZExt = true;
4701             Src0 = ZExt->getOperand(0);
4702           }
4703         }
4704       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4705         if (!isIntExtFree(SExt)) {
4706           MVT VT;
4707           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4708             SrcVT = VT;
4709             IsZExt = false;
4710             Src0 = SExt->getOperand(0);
4711           }
4712         }
4713       }
4714 
4715       unsigned Src0Reg = getRegForValue(Src0);
4716       if (!Src0Reg)
4717         return false;
4718       bool Src0IsKill = hasTrivialKill(Src0);
4719 
4720       unsigned ResultReg =
4721           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4722 
4723       if (ResultReg) {
4724         updateValueMap(I, ResultReg);
4725         return true;
4726       }
4727     }
4728 
4729   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4730   if (!Src0Reg)
4731     return false;
4732   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4733 
4734   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4735   if (!Src1Reg)
4736     return false;
4737   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4738 
4739   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4740 
4741   if (!ResultReg)
4742     return false;
4743 
4744   updateValueMap(I, ResultReg);
4745   return true;
4746 }
4747 
4748 bool AArch64FastISel::selectShift(const Instruction *I) {
4749   MVT RetVT;
4750   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4751     return false;
4752 
4753   if (RetVT.isVector())
4754     return selectOperator(I, I->getOpcode());
4755 
4756   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4757     unsigned ResultReg = 0;
4758     uint64_t ShiftVal = C->getZExtValue();
4759     MVT SrcVT = RetVT;
4760     bool IsZExt = I->getOpcode() != Instruction::AShr;
4761     const Value *Op0 = I->getOperand(0);
4762     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4763       if (!isIntExtFree(ZExt)) {
4764         MVT TmpVT;
4765         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4766           SrcVT = TmpVT;
4767           IsZExt = true;
4768           Op0 = ZExt->getOperand(0);
4769         }
4770       }
4771     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4772       if (!isIntExtFree(SExt)) {
4773         MVT TmpVT;
4774         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4775           SrcVT = TmpVT;
4776           IsZExt = false;
4777           Op0 = SExt->getOperand(0);
4778         }
4779       }
4780     }
4781 
4782     unsigned Op0Reg = getRegForValue(Op0);
4783     if (!Op0Reg)
4784       return false;
4785     bool Op0IsKill = hasTrivialKill(Op0);
4786 
4787     switch (I->getOpcode()) {
4788     default: llvm_unreachable("Unexpected instruction.");
4789     case Instruction::Shl:
4790       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4791       break;
4792     case Instruction::AShr:
4793       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4794       break;
4795     case Instruction::LShr:
4796       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4797       break;
4798     }
4799     if (!ResultReg)
4800       return false;
4801 
4802     updateValueMap(I, ResultReg);
4803     return true;
4804   }
4805 
4806   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4807   if (!Op0Reg)
4808     return false;
4809   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4810 
4811   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4812   if (!Op1Reg)
4813     return false;
4814   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4815 
4816   unsigned ResultReg = 0;
4817   switch (I->getOpcode()) {
4818   default: llvm_unreachable("Unexpected instruction.");
4819   case Instruction::Shl:
4820     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4821     break;
4822   case Instruction::AShr:
4823     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4824     break;
4825   case Instruction::LShr:
4826     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4827     break;
4828   }
4829 
4830   if (!ResultReg)
4831     return false;
4832 
4833   updateValueMap(I, ResultReg);
4834   return true;
4835 }
4836 
4837 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4838   MVT RetVT, SrcVT;
4839 
4840   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4841     return false;
4842   if (!isTypeLegal(I->getType(), RetVT))
4843     return false;
4844 
4845   unsigned Opc;
4846   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4847     Opc = AArch64::FMOVWSr;
4848   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4849     Opc = AArch64::FMOVXDr;
4850   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4851     Opc = AArch64::FMOVSWr;
4852   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4853     Opc = AArch64::FMOVDXr;
4854   else
4855     return false;
4856 
4857   const TargetRegisterClass *RC = nullptr;
4858   switch (RetVT.SimpleTy) {
4859   default: llvm_unreachable("Unexpected value type.");
4860   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4861   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4862   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4863   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4864   }
4865   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4866   if (!Op0Reg)
4867     return false;
4868   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4869   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4870 
4871   if (!ResultReg)
4872     return false;
4873 
4874   updateValueMap(I, ResultReg);
4875   return true;
4876 }
4877 
4878 bool AArch64FastISel::selectFRem(const Instruction *I) {
4879   MVT RetVT;
4880   if (!isTypeLegal(I->getType(), RetVT))
4881     return false;
4882 
4883   RTLIB::Libcall LC;
4884   switch (RetVT.SimpleTy) {
4885   default:
4886     return false;
4887   case MVT::f32:
4888     LC = RTLIB::REM_F32;
4889     break;
4890   case MVT::f64:
4891     LC = RTLIB::REM_F64;
4892     break;
4893   }
4894 
4895   ArgListTy Args;
4896   Args.reserve(I->getNumOperands());
4897 
4898   // Populate the argument list.
4899   for (auto &Arg : I->operands()) {
4900     ArgListEntry Entry;
4901     Entry.Val = Arg;
4902     Entry.Ty = Arg->getType();
4903     Args.push_back(Entry);
4904   }
4905 
4906   CallLoweringInfo CLI;
4907   MCContext &Ctx = MF->getContext();
4908   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4909                 TLI.getLibcallName(LC), std::move(Args));
4910   if (!lowerCallTo(CLI))
4911     return false;
4912   updateValueMap(I, CLI.ResultReg);
4913   return true;
4914 }
4915 
4916 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4917   MVT VT;
4918   if (!isTypeLegal(I->getType(), VT))
4919     return false;
4920 
4921   if (!isa<ConstantInt>(I->getOperand(1)))
4922     return selectBinaryOp(I, ISD::SDIV);
4923 
4924   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4925   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4926       !(C.isPowerOf2() || (-C).isPowerOf2()))
4927     return selectBinaryOp(I, ISD::SDIV);
4928 
4929   unsigned Lg2 = C.countTrailingZeros();
4930   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4931   if (!Src0Reg)
4932     return false;
4933   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4934 
4935   if (cast<BinaryOperator>(I)->isExact()) {
4936     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4937     if (!ResultReg)
4938       return false;
4939     updateValueMap(I, ResultReg);
4940     return true;
4941   }
4942 
4943   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4944   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4945   if (!AddReg)
4946     return false;
4947 
4948   // (Src0 < 0) ? Pow2 - 1 : 0;
4949   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4950     return false;
4951 
4952   unsigned SelectOpc;
4953   const TargetRegisterClass *RC;
4954   if (VT == MVT::i64) {
4955     SelectOpc = AArch64::CSELXr;
4956     RC = &AArch64::GPR64RegClass;
4957   } else {
4958     SelectOpc = AArch64::CSELWr;
4959     RC = &AArch64::GPR32RegClass;
4960   }
4961   unsigned SelectReg =
4962       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4963                        Src0IsKill, AArch64CC::LT);
4964   if (!SelectReg)
4965     return false;
4966 
4967   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4968   // negate the result.
4969   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4970   unsigned ResultReg;
4971   if (C.isNegative())
4972     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4973                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4974   else
4975     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4976 
4977   if (!ResultReg)
4978     return false;
4979 
4980   updateValueMap(I, ResultReg);
4981   return true;
4982 }
4983 
4984 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4985 /// have to duplicate it for AArch64, because otherwise we would fail during the
4986 /// sign-extend emission.
4987 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4988   unsigned IdxN = getRegForValue(Idx);
4989   if (IdxN == 0)
4990     // Unhandled operand. Halt "fast" selection and bail.
4991     return std::pair<unsigned, bool>(0, false);
4992 
4993   bool IdxNIsKill = hasTrivialKill(Idx);
4994 
4995   // If the index is smaller or larger than intptr_t, truncate or extend it.
4996   MVT PtrVT = TLI.getPointerTy(DL);
4997   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4998   if (IdxVT.bitsLT(PtrVT)) {
4999     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
5000     IdxNIsKill = true;
5001   } else if (IdxVT.bitsGT(PtrVT))
5002     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
5003   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
5004 }
5005 
5006 /// This is mostly a copy of the existing FastISel GEP code, but we have to
5007 /// duplicate it for AArch64, because otherwise we would bail out even for
5008 /// simple cases. This is because the standard fastEmit functions don't cover
5009 /// MUL at all and ADD is lowered very inefficientily.
5010 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
5011   if (Subtarget->isTargetILP32())
5012     return false;
5013 
5014   unsigned N = getRegForValue(I->getOperand(0));
5015   if (!N)
5016     return false;
5017   bool NIsKill = hasTrivialKill(I->getOperand(0));
5018 
5019   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
5020   // into a single N = N + TotalOffset.
5021   uint64_t TotalOffs = 0;
5022   MVT VT = TLI.getPointerTy(DL);
5023   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
5024        GTI != E; ++GTI) {
5025     const Value *Idx = GTI.getOperand();
5026     if (auto *StTy = GTI.getStructTypeOrNull()) {
5027       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
5028       // N = N + Offset
5029       if (Field)
5030         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
5031     } else {
5032       Type *Ty = GTI.getIndexedType();
5033 
5034       // If this is a constant subscript, handle it quickly.
5035       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
5036         if (CI->isZero())
5037           continue;
5038         // N = N + Offset
5039         TotalOffs +=
5040             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
5041         continue;
5042       }
5043       if (TotalOffs) {
5044         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5045         if (!N)
5046           return false;
5047         NIsKill = true;
5048         TotalOffs = 0;
5049       }
5050 
5051       // N = N + Idx * ElementSize;
5052       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5053       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
5054       unsigned IdxN = Pair.first;
5055       bool IdxNIsKill = Pair.second;
5056       if (!IdxN)
5057         return false;
5058 
5059       if (ElementSize != 1) {
5060         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5061         if (!C)
5062           return false;
5063         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5064         if (!IdxN)
5065           return false;
5066         IdxNIsKill = true;
5067       }
5068       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5069       if (!N)
5070         return false;
5071     }
5072   }
5073   if (TotalOffs) {
5074     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5075     if (!N)
5076       return false;
5077   }
5078   updateValueMap(I, N);
5079   return true;
5080 }
5081 
5082 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5083   assert(TM.getOptLevel() == CodeGenOpt::None &&
5084          "cmpxchg survived AtomicExpand at optlevel > -O0");
5085 
5086   auto *RetPairTy = cast<StructType>(I->getType());
5087   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5088   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5089          "cmpxchg has a non-i1 status result");
5090 
5091   MVT VT;
5092   if (!isTypeLegal(RetTy, VT))
5093     return false;
5094 
5095   const TargetRegisterClass *ResRC;
5096   unsigned Opc, CmpOpc;
5097   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5098   // extractvalue selection doesn't support that.
5099   if (VT == MVT::i32) {
5100     Opc = AArch64::CMP_SWAP_32;
5101     CmpOpc = AArch64::SUBSWrs;
5102     ResRC = &AArch64::GPR32RegClass;
5103   } else if (VT == MVT::i64) {
5104     Opc = AArch64::CMP_SWAP_64;
5105     CmpOpc = AArch64::SUBSXrs;
5106     ResRC = &AArch64::GPR64RegClass;
5107   } else {
5108     return false;
5109   }
5110 
5111   const MCInstrDesc &II = TII.get(Opc);
5112 
5113   const unsigned AddrReg = constrainOperandRegClass(
5114       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5115   const unsigned DesiredReg = constrainOperandRegClass(
5116       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5117   const unsigned NewReg = constrainOperandRegClass(
5118       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5119 
5120   const unsigned ResultReg1 = createResultReg(ResRC);
5121   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5122   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5123 
5124   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5125   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5126       .addDef(ResultReg1)
5127       .addDef(ScratchReg)
5128       .addUse(AddrReg)
5129       .addUse(DesiredReg)
5130       .addUse(NewReg);
5131 
5132   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5133       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5134       .addUse(ResultReg1)
5135       .addUse(DesiredReg)
5136       .addImm(0);
5137 
5138   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5139       .addDef(ResultReg2)
5140       .addUse(AArch64::WZR)
5141       .addUse(AArch64::WZR)
5142       .addImm(AArch64CC::NE);
5143 
5144   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5145   updateValueMap(I, ResultReg1, 2);
5146   return true;
5147 }
5148 
5149 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5150   switch (I->getOpcode()) {
5151   default:
5152     break;
5153   case Instruction::Add:
5154   case Instruction::Sub:
5155     return selectAddSub(I);
5156   case Instruction::Mul:
5157     return selectMul(I);
5158   case Instruction::SDiv:
5159     return selectSDiv(I);
5160   case Instruction::SRem:
5161     if (!selectBinaryOp(I, ISD::SREM))
5162       return selectRem(I, ISD::SREM);
5163     return true;
5164   case Instruction::URem:
5165     if (!selectBinaryOp(I, ISD::UREM))
5166       return selectRem(I, ISD::UREM);
5167     return true;
5168   case Instruction::Shl:
5169   case Instruction::LShr:
5170   case Instruction::AShr:
5171     return selectShift(I);
5172   case Instruction::And:
5173   case Instruction::Or:
5174   case Instruction::Xor:
5175     return selectLogicalOp(I);
5176   case Instruction::Br:
5177     return selectBranch(I);
5178   case Instruction::IndirectBr:
5179     return selectIndirectBr(I);
5180   case Instruction::BitCast:
5181     if (!FastISel::selectBitCast(I))
5182       return selectBitCast(I);
5183     return true;
5184   case Instruction::FPToSI:
5185     if (!selectCast(I, ISD::FP_TO_SINT))
5186       return selectFPToInt(I, /*Signed=*/true);
5187     return true;
5188   case Instruction::FPToUI:
5189     return selectFPToInt(I, /*Signed=*/false);
5190   case Instruction::ZExt:
5191   case Instruction::SExt:
5192     return selectIntExt(I);
5193   case Instruction::Trunc:
5194     if (!selectCast(I, ISD::TRUNCATE))
5195       return selectTrunc(I);
5196     return true;
5197   case Instruction::FPExt:
5198     return selectFPExt(I);
5199   case Instruction::FPTrunc:
5200     return selectFPTrunc(I);
5201   case Instruction::SIToFP:
5202     if (!selectCast(I, ISD::SINT_TO_FP))
5203       return selectIntToFP(I, /*Signed=*/true);
5204     return true;
5205   case Instruction::UIToFP:
5206     return selectIntToFP(I, /*Signed=*/false);
5207   case Instruction::Load:
5208     return selectLoad(I);
5209   case Instruction::Store:
5210     return selectStore(I);
5211   case Instruction::FCmp:
5212   case Instruction::ICmp:
5213     return selectCmp(I);
5214   case Instruction::Select:
5215     return selectSelect(I);
5216   case Instruction::Ret:
5217     return selectRet(I);
5218   case Instruction::FRem:
5219     return selectFRem(I);
5220   case Instruction::GetElementPtr:
5221     return selectGetElementPtr(I);
5222   case Instruction::AtomicCmpXchg:
5223     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5224   }
5225 
5226   // fall-back to target-independent instruction selection.
5227   return selectOperator(I, I->getOpcode());
5228 }
5229 
5230 namespace llvm {
5231 
5232 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5233                                         const TargetLibraryInfo *LibInfo) {
5234   return new AArch64FastISel(FuncInfo, LibInfo);
5235 }
5236 
5237 } // end namespace llvm
5238